diff --git a/.clang-format b/.clang-format deleted file mode 100644 index dcffb37fd..000000000 --- a/.clang-format +++ /dev/null @@ -1,66 +0,0 @@ -# Generated from CLion C/C++ Code Style settings -BasedOnStyle: LLVM -AccessModifierOffset: -4 -AlignAfterOpenBracket: AlwaysBreak -AlignConsecutiveAssignments: None -AlignOperands: Align -AllowAllArgumentsOnNextLine: false -AllowAllConstructorInitializersOnNextLine: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: Always -AllowShortCaseLabelsOnASingleLine: true -AllowShortFunctionsOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Always -AllowShortLambdasOnASingleLine: All -AllowShortLoopsOnASingleLine: true -BreakAfterReturnType: None -BreakBeforeBraces: Custom -BreakTemplateDeclarations: Yes -BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: Never - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterUnion: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false - SplitEmptyFunction: false - SplitEmptyRecord: false -BreakBeforeBinaryOperators: None -BreakBeforeTernaryOperators: true -BreakConstructorInitializers: BeforeComma -BreakInheritanceList: BeforeComma -ColumnLimit: 0 -CompactNamespaces: false -ContinuationIndentWidth: 8 -IndentCaseLabels: true -IndentPPDirectives: BeforeHash -IndentWidth: 4 -KeepEmptyLinesAtTheStartOfBlocks: false -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PointerAlignment: Left -ReflowComments: false -SpaceAfterCStyleCast: true -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 0 -SpacesInAngles: false -SpacesInCStyleCastParentheses: false -SpacesInContainerLiterals: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -TabWidth: 4 -UseTab: ForIndentation diff --git a/.editorconfig b/.editorconfig index dc74c0861..7dad88fd0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -37,6 +37,7 @@ ij_yaml_spaces_within_braces = true ij_yaml_spaces_within_brackets = true [{*.cmake,CMakeLists.txt}] +indent_style = space ij_cmake_align_command_call_r_par = false ij_cmake_align_control_flow_r_par = false ij_cmake_align_multiline_parameters_in_calls = false diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 6e691de18..c34e99bba 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Create & Deploy Docs - uses: DenverCoder1/doxygen-github-pages-action@v1.3.1 + uses: DenverCoder1/doxygen-github-pages-action@v2.0.0 with: github_token: ${{secrets.GITHUB_TOKEN}} branch: docs diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 000000000..6518ed65e --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,65 @@ +name: Build Wheels +on: + workflow_dispatch: + inputs: + version: + type: string + required: true + default: YYYY.MM.DDdevN + description: 'The package version' + release: + type: boolean + required: true + default: false + description: 'Push a release to PyPI' + +jobs: + build_sdist: + name: Build SDist + runs-on: ubuntu-latest + defaults: + run: + working-directory: '${{github.workspace}}/lang/python' + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Configure CMake + working-directory: '${{github.workspace}}' + run: cmake -G "Unix Makefiles" -B build -DCMAKE_BUILD_TYPE=Release -DSOURCEPP_BUILD_PYTHON_WRAPPERS=ON -DSOURCEPP_PYTHON_VERSION="${{inputs.version}}" + + - name: Build SDist + run: | + pipx run build --sdist + + - name: Check Metadata + run: | + pipx run twine check dist/* + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: dist-sdist + path: ${{github.workspace}}/lang/python/dist/*.tar.gz + + upload_release: + name: Upload a Release + if: inputs.release + needs: [build_sdist] + runs-on: ubuntu-latest + environment: + name: pypi-release + url: https://pypi.org/p/sourcepp + permissions: + id-token: write + steps: + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + name: dist-sdist + path: dist + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 0c0169e33..2669bc643 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,43 @@ -# IDEs +# IDE .idea/ .vs/ .vscode/ CMakeSettings.json -# Project exclude paths + +# Build build/ +dist/ cmake-build-*/ out/ +*.dll +*.ilk +*.pdb +*.pyd +*.so* -# Generated +# Docs docs/html/ + + +# Test test/res/ test/Helpers.h + + +# Python +.mypy_cache/ +.venv/ +__pycache__/ +wheelhouse/ +*.pyi +*.typed +*.whl + +lang/python/CMakeLists.txt +lang/python/pyproject.toml +lang/python/src/sourcepp/__init__.py + +lang/python/LICENSE +lang/python/README.md +lang/python/THIRDPARTY_LEGAL_NOTICES.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index b45a38798..167dff27f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,12 +3,13 @@ cmake_minimum_required(VERSION 3.25 FATAL_ERROR) # Set defaults before project call if(PROJECT_IS_TOP_LEVEL) - set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64") + set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64" CACHE INTERNAL "" FORCE) endif() # Create project -project(sourcepp) +project(sourcepp + DESCRIPTION "Several modern C++20 libraries for sanely parsing Valve formats.") set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -26,13 +27,15 @@ option(SOURCEPP_USE_VCRYPTPP "Build vcryptpp library" ${SOURC option(SOURCEPP_USE_VPKPP "Build vpkpp library" ${SOURCEPP_LIBS_START_ENABLED}) option(SOURCEPP_USE_VTFPP "Build vtfpp library" ${SOURCEPP_LIBS_START_ENABLED}) -option(SOURCEPP_BUILD_BENCHMARKS "Build benchmarks for supported libraries" OFF) -option(SOURCEPP_BUILD_C_WRAPPERS "Build C wrappers for supported libraries" OFF) -option(SOURCEPP_BUILD_WITH_OPENCL "Build with support for GPU compute" OFF) -option(SOURCEPP_BUILD_WITH_TBB "Build with support for std::execution" OFF) -option(SOURCEPP_BUILD_WITH_THREADS "Build with support for threading" ON) -option(SOURCEPP_BUILD_TESTS "Build tests for supported libraries" OFF) -option(SOURCEPP_BUILD_WIN7_COMPAT "Build with Windows 7 compatibility" OFF) +option(SOURCEPP_BUILD_BENCHMARKS "Build benchmarks for supported libraries" OFF) +option(SOURCEPP_BUILD_C_WRAPPERS "Build C wrappers for supported libraries" OFF) +option(SOURCEPP_BUILD_CSHARP_WRAPPERS "Build C# wrappers for supported libraries" OFF) +option(SOURCEPP_BUILD_PYTHON_WRAPPERS "Build Python wrappers for supported libraries" OFF) +option(SOURCEPP_BUILD_WITH_OPENCL "Build with support for GPU compute" OFF) +option(SOURCEPP_BUILD_WITH_TBB "Build with support for std::execution" OFF) +option(SOURCEPP_BUILD_WITH_THREADS "Build with support for threading" ON) +option(SOURCEPP_BUILD_TESTS "Build tests for supported libraries" OFF) +option(SOURCEPP_BUILD_WIN7_COMPAT "Build with Windows 7 compatibility" OFF) option(SOURCEPP_LINK_STATIC_MSVC_RUNTIME "Link to static MSVC runtime library" OFF) @@ -53,6 +56,9 @@ if(SOURCEPP_USE_VPKPP) set(SOURCEPP_USE_KVPP ON CACHE INTERNAL "" FORCE) endif() +if(SOURCEPP_BUILD_CSHARP_WRAPPERS) + set(SOURCEPP_BUILD_C_WRAPPERS ON CACHE INTERNAL "" FORCE) +endif() if(MSVC) # MSVC does not rely on tbb for std::execution policies, so we can force this on set(SOURCEPP_BUILD_WITH_TBB ON CACHE INTERNAL "" FORCE) @@ -73,6 +79,7 @@ endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(AddPrettyParser) include(AddSourcePPLibrary) +include(FetchContent) include(IncludeSubdirectory) include(PrintOptions) include(TargetOptimize) @@ -84,6 +91,8 @@ include_subdirectory(ext) # Shared code include_subdirectory(src/sourcepp) +include_subdirectory(src/sourcepp/crypto) +include_subdirectory(src/sourcepp/parser) # Shared C code @@ -92,10 +101,48 @@ if(SOURCEPP_BUILD_C_WRAPPERS) endif() +# Python bindings, part 1 +if(SOURCEPP_BUILD_PYTHON_WRAPPERS) + set(SOURCEPP_PYTHON_NAME "${PROJECT_NAME}_python") + if(NOT SOURCEPP_PYTHON_VERSION) + set(SOURCEPP_PYTHON_VERSION "0.0.1dev1") + message(WARNING "SOURCEPP_PYTHON_VERSION is not defined, do not release this build publicly! Defaulting it to ${SOURCEPP_PYTHON_VERSION}...") + endif() + find_package(Python REQUIRED + COMPONENTS Interpreter Development.Module + OPTIONAL_COMPONENTS Development.SABIModule) + FetchContent_Declare( + nanobind + GIT_REPOSITORY "https://github.com/wjakob/nanobind.git" + GIT_TAG "origin/master") + FetchContent_MakeAvailable(nanobind) + set(${SOURCEPP_PYTHON_NAME}_SOURCES "") + set(${SOURCEPP_PYTHON_NAME}_DEFINES "") + + # Set the version and git commit hash here + find_package(Git REQUIRED) + execute_process(COMMAND ${GIT_EXECUTABLE} log -1 --format=%H + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE SOURCEPP_GIT_TAG + RESULT_VARIABLE SOURCEPP_GIT_TAG_ERROR + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT SOURCEPP_GIT_TAG) + message(FATAL_ERROR "Failed to retrieve git commit SHA: ${SOURCEPP_GIT_TAG_ERROR}") + endif() + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/CMakeLists.txt" @ONLY) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/pyproject.toml" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/pyproject.toml") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/__init__.py" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp/__init__.py") + + # These need to be inside the python directory, let's duplicate them! + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/LICENSE" COPYONLY) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/README.md" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/README.md" COPYONLY) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/THIRDPARTY_LEGAL_NOTICES.txt" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/THIRDPARTY_LEGAL_NOTICES.txt" COPYONLY) +endif() + + # Tests, part 1 if(SOURCEPP_BUILD_TESTS) set(SOURCEPP_TEST_NAME "${PROJECT_NAME}_test") - include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY "https://github.com/google/googletest.git" @@ -123,7 +170,6 @@ endif() # Benchmarks if(SOURCEPP_BUILD_BENCHMARKS) set(SOURCEPP_BENCH_NAME "${PROJECT_NAME}_bench") - include(FetchContent) FetchContent_Declare( benchmark GIT_REPOSITORY https://github.com/google/benchmark.git @@ -138,16 +184,16 @@ endif() # Add libraries -add_sourcepp_library(bsppp NO_TEST ) # sourcepp::bsppp -add_sourcepp_library(dmxpp ) # sourcepp::dmxpp -add_sourcepp_library(gamepp ) # sourcepp::gamepp -add_sourcepp_library(kvpp BENCH) # sourcepp::kvpp -add_sourcepp_library(mdlpp ) # sourcepp::mdlpp -add_sourcepp_library(steampp C ) # sourcepp::steampp -add_sourcepp_library(toolpp ) # sourcepp::toolpp -add_sourcepp_library(vcryptpp C CSHARP ) # sourcepp::vcryptpp -add_sourcepp_library(vpkpp C CSHARP NO_TEST ) # sourcepp::vpkpp -add_sourcepp_library(vtfpp BENCH) # sourcepp::vtfpp +add_sourcepp_library(bsppp NO_TEST ) # sourcepp::bsppp +add_sourcepp_library(dmxpp ) # sourcepp::dmxpp +add_sourcepp_library(gamepp C PYTHON ) # sourcepp::gamepp +add_sourcepp_library(kvpp BENCH) # sourcepp::kvpp +add_sourcepp_library(mdlpp ) # sourcepp::mdlpp +add_sourcepp_library(steampp C PYTHON ) # sourcepp::steampp +add_sourcepp_library(toolpp PYTHON ) # sourcepp::toolpp +add_sourcepp_library(vcryptpp C CSHARP PYTHON ) # sourcepp::vcryptpp +add_sourcepp_library(vpkpp C CSHARP NO_TEST ) # sourcepp::vpkpp +add_sourcepp_library(vtfpp PYTHON BENCH) # sourcepp::vtfpp # Tests, part 2 @@ -160,9 +206,56 @@ if(SOURCEPP_BUILD_TESTS) endif() +# Python bindings, part 2 +if(SOURCEPP_BUILD_PYTHON_WRAPPERS) + nanobind_add_module(${SOURCEPP_PYTHON_NAME} NB_STATIC STABLE_ABI LTO + "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp.cpp" + ${${SOURCEPP_PYTHON_NAME}_SOURCES}) + set_target_properties(${SOURCEPP_PYTHON_NAME} PROPERTIES + OUTPUT_NAME "_${PROJECT_NAME}_impl" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp" + LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp" + LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp") + target_compile_definitions(${SOURCEPP_PYTHON_NAME} PRIVATE ${${SOURCEPP_PYTHON_NAME}_DEFINES}) + target_link_libraries(${SOURCEPP_PYTHON_NAME} PRIVATE ${${SOURCEPP_PYTHON_NAME}_DEPS}) + install(TARGETS ${SOURCEPP_PYTHON_NAME} LIBRARY DESTINATION "./${PROJECT_NAME}") + + add_custom_target(${SOURCEPP_PYTHON_NAME}_all) + add_dependencies(${SOURCEPP_PYTHON_NAME}_all ${SOURCEPP_PYTHON_NAME}) + + # We need to manually write out each module :( + set(${SOURCEPP_PYTHON_NAME}_MODULES + "sourcepp._sourcepp_impl" + "sourcepp._sourcepp_impl.gamepp" + "sourcepp._sourcepp_impl.sourcepp" + "sourcepp._sourcepp_impl.sourcepp.math" + "sourcepp._sourcepp_impl.steampp" + "sourcepp._sourcepp_impl.toolpp" + "sourcepp._sourcepp_impl.vcryptpp" + "sourcepp._sourcepp_impl.vcryptpp.VFONT" + "sourcepp._sourcepp_impl.vcryptpp.VICE" + "sourcepp._sourcepp_impl.vtfpp" + "sourcepp._sourcepp_impl.vtfpp.ImageFormatDetails" + "sourcepp._sourcepp_impl.vtfpp.ImageDimensions" + "sourcepp._sourcepp_impl.vtfpp.ImageConversion") + foreach(MODULE ${${SOURCEPP_PYTHON_NAME}_MODULES}) + string(REPLACE "." "/" MODULE_DIR "${MODULE}") + string(REPLACE "." "_" MODULE_NAME_NORMALIZED "${MODULE}") + set(MODULE_NAME_NORMALIZED "${MODULE_NAME_NORMALIZED}_stub") + nanobind_add_stub("${SOURCEPP_PYTHON_NAME}_stub_${MODULE_NAME_NORMALIZED}" + DEPENDS ${SOURCEPP_PYTHON_NAME} + MODULE "${MODULE}" + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${MODULE_DIR}.pyi" + PYTHON_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src") + install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${MODULE_DIR}.pyi" DESTINATION "./${MODULE_DIR}/..") + add_dependencies(${SOURCEPP_PYTHON_NAME}_all ${SOURCEPP_PYTHON_NAME}_stub_${MODULE_NAME_NORMALIZED}) + endforeach() +endif() + + # Print options print_options(OPTIONS USE_BSPPP USE_DMXPP USE_GAMEPP USE_KVPP USE_MDLPP USE_STEAMPP USE_TOOLPP USE_VCRYPTPP USE_VPKPP USE_VTFPP - BUILD_BENCHMARKS BUILD_C_WRAPPERS BUILD_WITH_OPENCL BUILD_WITH_TBB BUILD_WITH_THREADS BUILD_TESTS BUILD_WIN7_COMPAT + BUILD_BENCHMARKS BUILD_C_WRAPPERS BUILD_CSHARP_WRAPPERS BUILD_PYTHON_WRAPPERS BUILD_WITH_OPENCL BUILD_WITH_TBB BUILD_WITH_THREADS BUILD_TESTS BUILD_WIN7_COMPAT LINK_STATIC_MSVC_RUNTIME VPKPP_SUPPORT_VPK_V54) diff --git a/README.md b/README.md index de78d5c50..55c2cc083 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one - bsppp + bsppp* BSP v17-27 ✅ ✅ @@ -30,7 +30,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one - dmxpp + dmxpp* DMX Binary v1-5 ✅ ❌ @@ -42,7 +42,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one Get Source engine instance window title/position/size ✅ ❌ - + C
Python @@ -53,15 +53,15 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one kvpp - KeyValues v1* + KeyValues Text v1 ✅ ✅ - mdlpp - MDL v44-49 + mdlpp* + MDL v44-49 ✅ ❌ @@ -84,7 +84,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one Find Steam install folder ✅ - - C + C
Python @@ -102,7 +102,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one ✅ ✅ - + Python @@ -119,7 +119,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one VICE encrypted files ✅ ✅ - C
C# + C
C#
Python @@ -186,7 +186,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one - VPK v1-2, v54 + VPK pre-v1, v1-2, v54
Counter-Strike: 2 modifications
Counter-Strike: Source ClientMod modifications @@ -207,17 +207,77 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one - ZIP (and BZ2, GZ, XZ, ZSTD) + ZIP ✅ ✅ - vtfpp + vtfpp + BMP + ✅ + ✅ + Python + + + + EXR v1 + ✅ + ✅ + + + + GIF + ✅ + ❌ + + + + HDR + ✅ + ✅ + + + + JPEG + ✅ + ✅ + + + + PIC + ✅ + ❌ + + + + PNG + ✅ + ✅ + + + + PNM (PGM, PPM) + ✅ + ❌ + + + PPL v0 ✅ ✅ - + + + + PSD + ✅ + ❌ + + + + TGA + ✅ + ✅ @@ -230,9 +290,16 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one -(\*) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VDF](https://developer.valvesoftware.com/wiki/VDF), [VMT](https://developer.valvesoftware.com/wiki/VMT), and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)). +(\*) These libraries are incomplete and still in development. Their interfaces are unstable and will likely change in the future. +Libraries not starred should be considered stable, and their existing interfaces will not change much if at all. Note that wrappers +only exist for stable libraries. + +(†) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VMT](https://developer.valvesoftware.com/wiki/VMT) and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)). + +## Wrappers -(†) The MDL parser is not complete. It is usable in its current state, but it does not currently parse more complex components like animations. This parser is still in development. +Wrappers for libraries considered complete exist for C, C#, and/or Python, depending on the library. The Python wrappers can be +found on PyPI in the [sourcepp](https://pypi.org/project/sourcepp) package. ## Special Thanks diff --git a/THIRDPARTY_LEGAL_NOTICES.txt b/THIRDPARTY_LEGAL_NOTICES.txt index cf387f125..a6d198797 100644 --- a/THIRDPARTY_LEGAL_NOTICES.txt +++ b/THIRDPARTY_LEGAL_NOTICES.txt @@ -1,4 +1,4 @@ ---------------- bufferstream --------------- +--------------- BufferStream --------------- MIT License @@ -67,16 +67,41 @@ modification, are permitted provided that the following conditions are met: contributors may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--------------- half --------------- + +The MIT License + +Copyright (c) 2012-2021 Christian Rau + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. --------------- hat-trie --------------- @@ -119,6 +144,37 @@ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------- OpenEXR --------------- + +Copyright (c) 2002, Industrial Light & Magic, a division of Lucas Digital Ltd. LLC. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + --------------- miniz --------------- Copyright 2013-2014 RAD Game Tools and Valve Software @@ -166,6 +222,37 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. +--------------- nanobind --------------- + +Copyright (c) 2022 Wenzel Jakob . +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + --------------- stb --------------- Copyright (c) 2017 Sean Barrett @@ -214,6 +301,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +--------------- TinyEXR --------------- + +Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + --------------- zlib --------------- (C) 1995-2024 Jean-loup Gailly and Mark Adler @@ -244,21 +362,22 @@ BSD License For Zstandard software -Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. +Copyright (c) Meta Platforms, Inc. and affiliates. +All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. - * Neither the name Facebook, nor Meta, nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. +* Neither the name Facebook, nor Meta, nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED diff --git a/cmake/AddPrettyParser.cmake b/cmake/AddPrettyParser.cmake index 432c33c31..c2ba3dbc7 100644 --- a/cmake/AddPrettyParser.cmake +++ b/cmake/AddPrettyParser.cmake @@ -1,6 +1,6 @@ # Add a new parser library function(add_pretty_parser TARGET) - cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C" "" "DEPS;DEPS_INTERFACE;PRECOMPILED_HEADERS;SOURCES") + cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C" "" "DEPS;DEPS_PUBLIC;DEPS_INTERFACE;PRECOMPILED_HEADERS;SOURCES") if(OPTIONS_C) add_library(${TARGET}c SHARED ${${PROJECT_NAME}c_SOURCES} ${OPTIONS_PRECOMPILED_HEADERS} ${OPTIONS_SOURCES}) @@ -16,8 +16,8 @@ function(add_pretty_parser TARGET) if(NOT ("PRECOMPILED_HEADERS" IN_LIST OPTIONS_UNPARSED_ARGUMENTS)) target_precompile_headers(${TARGET} PUBLIC ${OPTIONS_HEADERS}) endif() - target_link_libraries(${TARGET} PUBLIC ${PROJECT_NAME}) target_link_libraries(${TARGET} PRIVATE ${OPTIONS_DEPS}) + target_link_libraries(${TARGET} PUBLIC ${PROJECT_NAME} ${OPTIONS_DEPS_PUBLIC}) target_link_libraries(${TARGET} INTERFACE ${OPTIONS_DEPS_INTERFACE}) # Define DEBUG macro diff --git a/cmake/AddSourcePPLibrary.cmake b/cmake/AddSourcePPLibrary.cmake index 18a760ed6..581dfd071 100644 --- a/cmake/AddSourcePPLibrary.cmake +++ b/cmake/AddSourcePPLibrary.cmake @@ -1,7 +1,9 @@ function(add_sourcepp_library TARGET) - cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C;CSHARP;NO_TEST;BENCH" "" "") + cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C;CSHARP;PYTHON;NO_TEST;BENCH" "" "") string(TOUPPER ${TARGET} TARGET_UPPER) if(SOURCEPP_USE_${TARGET_UPPER}) + set(PROPAGATE_VARS "") + # Add C++ include("${CMAKE_CURRENT_SOURCE_DIR}/src/${TARGET}/_${TARGET}.cmake") @@ -11,23 +13,31 @@ function(add_sourcepp_library TARGET) endif() # Add C# - if(OPTIONS_CSHARP) + if(SOURCEPP_BUILD_CSHARP_WRAPPERS AND OPTIONS_CSHARP) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/Buffer.cs.in" "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/Buffer.cs") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/String.cs.in" "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/String.cs") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/TARGET.csproj.in" "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/${TARGET}.csproj") + add_custom_target(${TARGET}_csharp DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/${TARGET}.csproj") + add_dependencies(${TARGET}_csharp ${TARGET}c) endif() - set(PROPAGATE_VARS "") + # Add Python + if(SOURCEPP_BUILD_PYTHON_WRAPPERS AND OPTIONS_PYTHON) + list(APPEND ${SOURCEPP_PYTHON_NAME}_DEPS sourcepp::${TARGET}) + list(APPEND ${SOURCEPP_PYTHON_NAME}_DEFINES ${TARGET_UPPER}) + list(APPEND ${SOURCEPP_PYTHON_NAME}_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${TARGET}.h") + list(APPEND PROPAGATE_VARS ${SOURCEPP_PYTHON_NAME}_DEPS ${SOURCEPP_PYTHON_NAME}_DEFINES ${SOURCEPP_PYTHON_NAME}_SOURCES) + endif() # Add tests - if(NOT OPTIONS_NO_TEST AND SOURCEPP_BUILD_TESTS) + if(SOURCEPP_BUILD_TESTS AND NOT OPTIONS_NO_TEST) list(APPEND ${SOURCEPP_TEST_NAME}_DEPS sourcepp::${TARGET}) list(APPEND ${SOURCEPP_TEST_NAME}_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/test/${TARGET}.cpp") list(APPEND PROPAGATE_VARS ${SOURCEPP_TEST_NAME}_DEPS ${SOURCEPP_TEST_NAME}_SOURCES) endif() # Add benchmarks - if(OPTIONS_BENCH AND SOURCEPP_BUILD_BENCHMARKS) + if(SOURCEPP_BUILD_BENCHMARKS AND OPTIONS_BENCH) add_executable(${TARGET}_bench "${CMAKE_CURRENT_SOURCE_DIR}/bench/${TARGET}.cpp") target_link_libraries(${TARGET}_bench PUBLIC ${SOURCEPP_BENCH_NAME} sourcepp::${TARGET}) include("${CMAKE_CURRENT_SOURCE_DIR}/bench/${TARGET}.cmake") diff --git a/docs/index.md b/docs/index.md index 8d4ff20e3..622f7d4d8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,14 +22,14 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one Wrappers - bsppp + bsppp* BSP v17-27 ✅ ✅ - dmxpp + dmxpp* DMX Binary v1-5 ✅ ❌ @@ -40,7 +40,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one Get Source engine instance window title/position/size ✅ ❌ - + C
Python Run commands in a Source engine instance remotely @@ -49,14 +49,14 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one kvpp - KeyValues v1* + KeyValues Text v1 ✅ ✅ - mdlpp - MDL v44-49 + mdlpp* + MDL v44-49 ✅ ❌ @@ -76,7 +76,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one Find Steam install folder ✅ - - C + C
Python Find installed Steam games @@ -92,7 +92,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one ✅ ✅ - + Python @@ -107,7 +107,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one VICE encrypted files ✅ ✅ - C
C# + C
C#
Python VFONT encrypted fonts @@ -163,7 +163,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one - VPK v1-2, v54 + VPK pre-v1, v1-2, v54
Counter-Strike: 2 modifications
Counter-Strike: Source ClientMod modifications @@ -181,16 +181,66 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one ✅ - ZIP (and BZ2, GZ, XZ, ZSTD) + ZIP ✅ ✅ - vtfpp + vtfpp + BMP + ✅ + ✅ + Python + + + EXR v1 + ✅ + ✅ + + + GIF + ✅ + ❌ + + + HDR + ✅ + ✅ + + + JPEG + ✅ + ✅ + + + PIC + ✅ + ❌ + + + PNG + ✅ + ✅ + + + PNM (PGM, PPM) + ✅ + ❌ + + PPL v0 ✅ ✅ - + + + PSD + ✅ + ❌ + + + TGA + ✅ + ✅ @@ -203,9 +253,16 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one \endhtmlonly -(\*) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VDF](https://developer.valvesoftware.com/wiki/VDF), [VMT](https://developer.valvesoftware.com/wiki/VMT), and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)). +(\*) These libraries are incomplete and still in development. Their interfaces are unstable and will likely change in the future. +Libraries not starred should be considered stable, and their existing interfaces will not change much if at all. Note that wrappers +only exist for stable libraries. + +(†) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VMT](https://developer.valvesoftware.com/wiki/VMT) and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)). + +## Wrappers -(†) The MDL parser is not complete. It is usable in its current state, but it does not currently parse more complex components like animations. This parser is still in development. +Wrappers for libraries considered complete exist for C, C#, and/or Python, depending on the library. The Python wrappers can be +found on PyPI in the [sourcepp](https://pypi.org/project/sourcepp) package. ## Special Thanks diff --git a/ext/_ext.cmake b/ext/_ext.cmake index 54964b0b7..530c62412 100644 --- a/ext/_ext.cmake +++ b/ext/_ext.cmake @@ -21,6 +21,10 @@ if(NOT TARGET cryptopp::cryptopp) endif() +# half +add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/half") + + # hat-trie if(NOT TARGET tsl::hat_trie) add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/hat-trie") @@ -38,11 +42,11 @@ endif() # minizip-ng (guard this behind vpkpp because this is a big dependency) -if(SOURCEPP_USE_VPKPP AND NOT TARGET MINIZIP::minizip) +if((SOURCEPP_USE_VPKPP OR SOURCEPP_USE_VTFPP) AND NOT TARGET MINIZIP::minizip) set(MZ_COMPAT OFF CACHE INTERNAL "") # todo: guard liblzma/xz force-enable behind BSP compression option set(MZ_LZMA ON CACHE INTERNAL "" FORCE) - if(SOURCEPP_VPKPP_SUPPORT_VPK_V54) + if(SOURCEPP_USE_VTFPP OR SOURCEPP_VPKPP_SUPPORT_VPK_V54) set(MZ_ZSTD ON CACHE INTERNAL "" FORCE) endif() set(MZ_FETCH_LIBS ON CACHE INTERNAL "" FORCE) @@ -73,7 +77,7 @@ endif() function(sourcepp_add_opencl TARGET) if(SOURCEPP_BUILD_WITH_OPENCL) - target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_OPENCL) + target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_OPENCL) target_link_libraries(${TARGET} PRIVATE OpenCL::OpenCL) endif() endfunction() @@ -83,10 +87,14 @@ endfunction() add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/stb") -# TBB +# tinyexr +add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/tinyexr") + + +# tbb function(sourcepp_add_tbb TARGET) if(SOURCEPP_BUILD_WITH_TBB) - target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_TBB) + target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_TBB) if(NOT MSVC) target_link_libraries(${TARGET} PRIVATE tbb) endif() @@ -106,7 +114,7 @@ endif() function(sourcepp_add_threads TARGET) if(SOURCEPP_BUILD_WITH_THREADS) - target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_THREADS) + target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_THREADS) target_link_libraries(${TARGET} PRIVATE Threads::Threads) endif() endfunction() diff --git a/ext/bufferstream b/ext/bufferstream index 689c50d56..fa4160118 160000 --- a/ext/bufferstream +++ b/ext/bufferstream @@ -1 +1 @@ -Subproject commit 689c50d56a0eefb066209c281eac99599845edb6 +Subproject commit fa4160118b06b84706a7a4766a3b487fd5c9a6d0 diff --git a/ext/compressonator/CMakeLists.txt b/ext/compressonator/CMakeLists.txt index a4757f151..22cc64f15 100644 --- a/ext/compressonator/CMakeLists.txt +++ b/ext/compressonator/CMakeLists.txt @@ -1,4 +1,6 @@ -set(COMPRESSONATOR_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "") +include_guard(GLOBAL) + +set(COMPRESSONATOR_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL "") function(target_link_compressonator TARGET) if(WIN32) @@ -20,8 +22,8 @@ function(target_link_compressonator TARGET) endif() elseif(APPLE) target_link_libraries(${TARGET} PRIVATE - "${COMPRESSONATOR_DIR}/lib/macOS/libCompressonator$<$:d>.a" - "${COMPRESSONATOR_DIR}/lib/macOS/libCMP_Core$<$:d>.a") + "${COMPRESSONATOR_DIR}/lib/macOS_arm64/libCompressonator$<$:d>.a" + "${COMPRESSONATOR_DIR}/lib/macOS_arm64/libCMP_Core$<$:d>.a") elseif(UNIX) target_link_libraries(${TARGET} PRIVATE "${COMPRESSONATOR_DIR}/lib/linux_x86_64/libCompressonator$<$:d>.a" diff --git a/ext/compressonator/lib/macOS/libCMP_Core.a b/ext/compressonator/lib/macOS_arm64/libCMP_Core.a similarity index 98% rename from ext/compressonator/lib/macOS/libCMP_Core.a rename to ext/compressonator/lib/macOS_arm64/libCMP_Core.a index b334e5f1f..69947a390 100644 Binary files a/ext/compressonator/lib/macOS/libCMP_Core.a and b/ext/compressonator/lib/macOS_arm64/libCMP_Core.a differ diff --git a/ext/compressonator/lib/macOS/libCMP_Cored.a b/ext/compressonator/lib/macOS_arm64/libCMP_Cored.a similarity index 51% rename from ext/compressonator/lib/macOS/libCMP_Cored.a rename to ext/compressonator/lib/macOS_arm64/libCMP_Cored.a index 523764d44..68c2458ee 100644 Binary files a/ext/compressonator/lib/macOS/libCMP_Cored.a and b/ext/compressonator/lib/macOS_arm64/libCMP_Cored.a differ diff --git a/ext/compressonator/lib/macOS/libCompressonator.a b/ext/compressonator/lib/macOS_arm64/libCompressonator.a similarity index 89% rename from ext/compressonator/lib/macOS/libCompressonator.a rename to ext/compressonator/lib/macOS_arm64/libCompressonator.a index 6ac269e9e..2fa0c0702 100644 Binary files a/ext/compressonator/lib/macOS/libCompressonator.a and b/ext/compressonator/lib/macOS_arm64/libCompressonator.a differ diff --git a/ext/compressonator/lib/macOS/libCompressonatord.a b/ext/compressonator/lib/macOS_arm64/libCompressonatord.a similarity index 61% rename from ext/compressonator/lib/macOS/libCompressonatord.a rename to ext/compressonator/lib/macOS_arm64/libCompressonatord.a index 5c5e41bf9..203957a5b 100644 Binary files a/ext/compressonator/lib/macOS/libCompressonatord.a and b/ext/compressonator/lib/macOS_arm64/libCompressonatord.a differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib index 0efe277be..9efa4b405 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib index b578fb151..4fc8a1a28 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib index c08519b47..25f22e752 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib index 97202b42f..e10065294 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib index 99a1b33cc..482bcc8e5 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib index 014aebdd5..de5062d49 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib index 61d410a9a..f3c42d72f 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib index 95c3ef771..0d6f7aa66 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib index 62660ca70..7a56db967 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib index 87defa8f6..f0b4cfc27 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib index 7a8681eff..3150b8ed9 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib index 41ef859f2..af3fbcc5e 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib index 9db8d8d7c..932ae1f1b 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib index 2bf68a0ee..05f719b66 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib index 538cee480..b720f1c7a 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib index 1a21c9d03..5c19763da 100644 Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib index 1c3f6b54c..f6cb96cda 100644 Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib differ diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib index 46aef8db7..79d3aad1c 100644 Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib differ diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib index 49197349a..9f7df3aae 100644 Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib differ diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib index d863c69f8..2b3d81cc3 100644 Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib differ diff --git a/ext/half/CMakeLists.txt b/ext/half/CMakeLists.txt new file mode 100644 index 000000000..516cacabb --- /dev/null +++ b/ext/half/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.16) +project(sourcepp_half) +set(CMAKE_CXX_STANDARD 20) + +# Create library +add_library(${PROJECT_NAME} INTERFACE + "${CMAKE_CURRENT_SOURCE_DIR}/include/half.hpp") + +target_include_directories(${PROJECT_NAME} INTERFACE + $ + $) diff --git a/ext/half/LICENSE b/ext/half/LICENSE new file mode 100644 index 000000000..6023222b0 --- /dev/null +++ b/ext/half/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2012-2021 Christian Rau + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ext/half/include/half.hpp b/ext/half/include/half.hpp new file mode 100644 index 000000000..d0a882dd6 --- /dev/null +++ b/ext/half/include/half.hpp @@ -0,0 +1,4601 @@ +// half - IEEE 754-based half-precision floating-point library. +// +// Copyright (c) 2012-2021 Christian Rau +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// Version 2.2.0 + +/// \file +/// Main header file for half-precision functionality. + +#ifndef HALF_HALF_HPP +#define HALF_HALF_HPP + +#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) + +#if defined(__INTEL_COMPILER) + #define HALF_ICC_VERSION __INTEL_COMPILER +#elif defined(__ICC) + #define HALF_ICC_VERSION __ICC +#elif defined(__ICL) + #define HALF_ICC_VERSION __ICL +#else + #define HALF_ICC_VERSION 0 +#endif + +// check C++11 language features +#if defined(__clang__) // clang + #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__) // Intel C++ + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif defined(__GNUC__) // gcc + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L + #if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 +#elif defined(_MSC_VER) // Visual C++ + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 + #define HALF_POP_WARNINGS 1 + #pragma warning(push) + #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features +#include +#if defined(_LIBCPP_VERSION) // libc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #ifndef HALF_ENABLE_CPP11_CSTDINT + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #ifndef HALF_ENABLE_CPP11_CMATH + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #ifndef HALF_ENABLE_CPP11_HASH + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #ifndef HALF_ENABLE_CPP11_CFENV + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif +#elif defined(__GLIBCXX__) // libstdc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifdef __clang__ + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #else + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif + #endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif +#endif +#undef HALF_GCC_VERSION +#undef HALF_ICC_VERSION + +// any error throwing C++ exceptions? +#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT) +#define HALF_ERRHANDLING_THROWS 1 +#endif + +// any error handling enabled? +#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) + +#if HALF_ERRHANDLING + #define HALF_UNUSED_NOERR(name) name +#else + #define HALF_UNUSED_NOERR(name) +#endif + +// support constexpr +#if HALF_ENABLE_CPP11_CONSTEXPR + #define HALF_CONSTEXPR constexpr + #define HALF_CONSTEXPR_CONST constexpr + #if HALF_ERRHANDLING + #define HALF_CONSTEXPR_NOERR + #else + #define HALF_CONSTEXPR_NOERR constexpr + #endif +#else + #define HALF_CONSTEXPR + #define HALF_CONSTEXPR_CONST const + #define HALF_CONSTEXPR_NOERR +#endif + +// support noexcept +#if HALF_ENABLE_CPP11_NOEXCEPT + #define HALF_NOEXCEPT noexcept + #define HALF_NOTHROW noexcept +#else + #define HALF_NOEXCEPT + #define HALF_NOTHROW throw() +#endif + +// support thread storage +#if HALF_ENABLE_CPP11_THREAD_LOCAL + #define HALF_THREAD_LOCAL thread_local +#else + #define HALF_THREAD_LOCAL static +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HALF_ENABLE_CPP11_TYPE_TRAITS + #include +#endif +#if HALF_ENABLE_CPP11_CSTDINT + #include +#endif +#if HALF_ERRHANDLING_ERRNO + #include +#endif +#if HALF_ENABLE_CPP11_CFENV + #include +#endif +#if HALF_ENABLE_CPP11_HASH + #include +#endif + + +#ifndef HALF_ENABLE_F16C_INTRINSICS + /// Enable F16C intruction set intrinsics. + /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between + /// half-precision and single-precision values which may result in improved performance. This will not perform additional checks + /// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature. + /// + /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms. + #define HALF_ENABLE_F16C_INTRINSICS __F16C__ +#endif +#if HALF_ENABLE_F16C_INTRINSICS + #include +#endif + +#ifdef HALF_DOXYGEN_ONLY +/// Type for internal floating-point computations. +/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal +/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available). +/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to +/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions. +#define HALF_ARITHMETIC_TYPE (undefined) + +/// Enable internal exception flags. +/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to +/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept(). +#define HALF_ERRHANDLING_FLAGS 0 + +/// Enable exception propagation to `errno`. +/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to +/// [errno](https://en.cppreference.com/w/cpp/error/errno) from ``. Specifically this will propagate domain errors as +/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as +/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated. +#define HALF_ERRHANDLING_ERRNO 0 + +/// Enable exception propagation to built-in floating-point platform. +/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in +/// single- and double-precision implementation's exception flags using the +/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from ``. However, this +/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision +/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags. +#define HALF_ERRHANDLING_FENV 0 + +/// Throw C++ exception on domain errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors. +#define HALF_ERRHANDLING_THROW_INVALID (undefined) + +/// Throw C++ exception on pole errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors. +#define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) + +/// Throw C++ exception on overflow errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows. +#define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) + +/// Throw C++ exception on underflow errors. +/// Defining this to a string literal causes operations on half-precision values to throw a +/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows. +#define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) + +/// Throw C++ exception on rounding errors. +/// Defining this to 1 causes operations on half-precision values to throw a +/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors. +#define HALF_ERRHANDLING_THROW_INEXACT (undefined) +#endif + +#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT +/// Raise INEXACT exception on overflow. +/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. +/// These will be raised after any possible handling of the underflow exception. +#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 +#endif + +#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT +/// Raise INEXACT exception on underflow. +/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. +/// These will be raised after any possible handling of the underflow exception. +/// +/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result +/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result. +#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 +#endif + +/// Default rounding mode. +/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types +/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical +/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective +/// constants or the equivalent values of +/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style): +/// +/// `std::float_round_style` | value | rounding +/// ---------------------------------|-------|------------------------- +/// `std::round_indeterminate` | -1 | fastest +/// `std::round_toward_zero` | 0 | toward zero +/// `std::round_to_nearest` | 1 | to nearest (default) +/// `std::round_toward_infinity` | 2 | toward positive infinity +/// `std::round_toward_neg_infinity` | 3 | toward negative infinity +/// +/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even +/// be set to [std::numeric_limits::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize +/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though). +#ifndef HALF_ROUND_STYLE + #define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#endif + +/// Value signaling overflow. +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an +/// operation, in particular it just evaluates to positive infinity. +/// +/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL) +#define HUGE_VALH std::numeric_limits::infinity() + +/// Fast half-precision fma function. +/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate +/// half-precision multiplication followed by an addition, which is always the case. +/// +/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma) +#define FP_FAST_FMAH 1 + +/// Half rounding mode. +/// In correspondence with `FLT_ROUNDS` from `` this symbol expands to the rounding mode used for +/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE). +/// +/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS) +#define HLF_ROUNDS HALF_ROUND_STYLE + +#ifndef FP_ILOGB0 + #define FP_ILOGB0 INT_MIN +#endif +#ifndef FP_ILOGBNAN + #define FP_ILOGBNAN INT_MAX +#endif +#ifndef FP_SUBNORMAL + #define FP_SUBNORMAL 0 +#endif +#ifndef FP_ZERO + #define FP_ZERO 1 +#endif +#ifndef FP_NAN + #define FP_NAN 2 +#endif +#ifndef FP_INFINITE + #define FP_INFINITE 3 +#endif +#ifndef FP_NORMAL + #define FP_NORMAL 4 +#endif + +#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT) + #define FE_INVALID 0x10 + #define FE_DIVBYZERO 0x08 + #define FE_OVERFLOW 0x04 + #define FE_UNDERFLOW 0x02 + #define FE_INEXACT 0x01 + #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) +#endif + + +/// Main namespace for half-precision functionality. +/// This namespace contains all the functionality provided by the library. +namespace half_float +{ + class half; + +#if HALF_ENABLE_CPP11_USER_LITERALS + /// Library-defined half-precision literals. + /// Import this namespace to enable half-precision floating-point literals: + /// ~~~~{.cpp} + /// using namespace half_float::literal; + /// half_float::half = 4.2_h; + /// ~~~~ + namespace literal + { + half operator "" _h(long double); + } +#endif + + /// \internal + /// \brief Implementation details. + namespace detail + { + #if HALF_ENABLE_CPP11_TYPE_TRAITS + /// Conditional type. + template struct conditional : std::conditional {}; + + /// Helper for tag dispatching. + template struct bool_type : std::integral_constant {}; + using std::true_type; + using std::false_type; + + /// Type traits for floating-point types. + template struct is_float : std::is_floating_point {}; + #else + /// Conditional type. + template struct conditional { typedef T type; }; + template struct conditional { typedef F type; }; + + /// Helper for tag dispatching. + template struct bool_type {}; + typedef bool_type true_type; + typedef bool_type false_type; + + /// Type traits for floating-point types. + template struct is_float : false_type {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + #endif + + /// Type traits for floating-point bits. + template struct bits { typedef unsigned char type; }; + template struct bits : bits {}; + template struct bits : bits {}; + template struct bits : bits {}; + + #if HALF_ENABLE_CPP11_CSTDINT + /// Unsigned integer of (at least) 16 bits width. + typedef std::uint_least16_t uint16; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef std::uint_fast32_t uint32; + + /// Fastest signed integer of (at least) 32 bits width. + typedef std::int_fast32_t int32; + + /// Unsigned integer of (at least) 32 bits width. + template<> struct bits { typedef std::uint_least32_t type; }; + + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef std::uint_least64_t type; }; + #else + /// Unsigned integer of (at least) 16 bits width. + typedef unsigned short uint16; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef unsigned long uint32; + + /// Fastest unsigned integer of (at least) 32 bits width. + typedef long int32; + + /// Unsigned integer of (at least) 32 bits width. + template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; + + #if HALF_ENABLE_CPP11_LONG_LONG + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; + #else + /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef unsigned long type; }; + #endif + #endif + + #ifdef HALF_ARITHMETIC_TYPE + /// Type to use for arithmetic computations and mathematic functions internally. + typedef HALF_ARITHMETIC_TYPE internal_t; + #endif + + /// Tag type for binary construction. + struct binary_t {}; + + /// Tag for binary construction. + HALF_CONSTEXPR_CONST binary_t binary = binary_t(); + + /// \name Implementation defined classification and arithmetic + /// \{ + + /// Check for infinity. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if infinity + /// \retval false else + template bool builtin_isinf(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); + #elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); + #else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); + #endif + } + + /// Check for NaN. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if not a number + /// \retval false else + template bool builtin_isnan(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); + #elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; + #else + return arg != arg; + #endif + } + + /// Check sign. + /// \tparam T argument type (builtin floating-point type) + /// \param arg value to query + /// \retval true if signbit set + /// \retval false else + template bool builtin_signbit(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); + #else + return arg < T() || (arg == T() && T(1)/arg < T()); + #endif + } + + /// Platform-independent sign mask. + /// \param arg integer value in two's complement + /// \retval -1 if \a arg negative + /// \retval 0 if \a arg positive + inline uint32 sign_mask(uint32 arg) + { + static const int N = std::numeric_limits::digits - 1; + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> N; + #else + return -((arg>>N)&1); + #endif + } + + /// Platform-independent arithmetic right shift. + /// \param arg integer value in two's complement + /// \param i shift amount (at most 31) + /// \return \a arg right shifted for \a i bits with possible sign extension + inline uint32 arithmetic_shift(uint32 arg, int i) + { + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> i; + #else + return static_cast(arg)/(static_cast(1)<>(std::numeric_limits::digits-1))&1); + #endif + } + + /// \} + /// \name Error handling + /// \{ + + /// Internal exception flags. + /// \return reference to global exception flags + inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } + + /// Raise floating-point exception. + /// \param flags exceptions to raise + /// \param cond condition to raise exceptions for + inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) + { + #if HALF_ERRHANDLING + if(!cond) + return; + #if HALF_ERRHANDLING_FLAGS + errflags() |= flags; + #endif + #if HALF_ERRHANDLING_ERRNO + if(flags & FE_INVALID) + errno = EDOM; + else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW)) + errno = ERANGE; + #endif + #if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV + std::feraiseexcept(flags); + #endif + #ifdef HALF_ERRHANDLING_THROW_INVALID + if(flags & FE_INVALID) + throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID); + #endif + #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO + if(flags & FE_DIVBYZERO) + throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO); + #endif + #ifdef HALF_ERRHANDLING_THROW_OVERFLOW + if(flags & FE_OVERFLOW) + throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW + if(flags & FE_UNDERFLOW) + throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_INEXACT + if(flags & FE_INEXACT) + throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT); + #endif + #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT + if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #endif + } + + /// Check and signal for any NaN. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \retval true if either \a x or \a y is NaN + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00); + #endif + return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; + } + + /// Signal and silence signaling NaN. + /// \param nan half-precision NaN value + /// \return quiet NaN + /// \exception FE_INVALID if \a nan is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, !(nan&0x200)); + #endif + return nan | 0x200; + } + + /// Signal and silence signaling NaNs. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \return quiet NaN + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); + } + + /// Signal and silence signaling NaNs. + /// \param x first half-precision value to check + /// \param y second half-precision value to check + /// \param z third half-precision value to check + /// \return quiet NaN + /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); + } + + /// Select value or signaling NaN. + /// \param x preferred half-precision value + /// \param y ignored half-precision value except for signaling NaN + /// \return \a y if signaling NaN, \a x otherwise + /// \exception FE_INVALID if \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) + { + #if HALF_ERRHANDLING + return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x; + #else + return x; + #endif + } + + /// Raise domain error and return NaN. + /// return quiet NaN + /// \exception FE_INVALID + inline HALF_CONSTEXPR_NOERR unsigned int invalid() + { + #if HALF_ERRHANDLING + raise(FE_INVALID); + #endif + return 0x7FFF; + } + + /// Raise pole error and return infinity. + /// \param sign half-precision value with sign bit only + /// \return half-precision infinity with sign of \a sign + /// \exception FE_DIVBYZERO + inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_DIVBYZERO); + #endif + return sign | 0x7C00; + } + + /// Check value for underflow. + /// \param arg non-zero half-precision value to check + /// \return \a arg + /// \exception FE_UNDERFLOW if arg is subnormal + inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) + { + #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + raise(FE_UNDERFLOW, !(arg&0x7C00)); + #endif + return arg; + } + + /// \} + /// \name Conversion and rounding + /// \{ + + /// Half-precision overflow. + /// \tparam R rounding mode to use + /// \param sign half-precision value with sign bit only + /// \return rounded overflowing half-precision value + /// \exception FE_OVERFLOW + template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_OVERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) : + (R==std::round_toward_zero) ? (sign|0x7BFF) : + (sign|0x7C00); + } + + /// Half-precision underflow. + /// \tparam R rounding mode to use + /// \param sign half-precision value with sign bit only + /// \return rounded underflowing half-precision value + /// \exception FE_UNDERFLOW + template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_UNDERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) : + sign; + } + + /// Round half-precision number. + /// \tparam R rounding mode to use + /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results + /// \param value finite half-precision number to round + /// \param g guard bit (most significant discarded bit) + /// \param s sticky bit (or of all but the most significant discarded bits) + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) + { + #if HALF_ERRHANDLING + value += (R==std::round_to_nearest) ? (g&(s|value)) : + (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) : + (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0; + if((value&0x7C00) == 0x7C00) + raise(FE_OVERFLOW); + else if(value & 0x7C00) + raise(FE_INEXACT, I || (g|s)!=0); + else + raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0); + return value; + #else + return (R==std::round_to_nearest) ? (value+(g&(s|value))) : + (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) : + (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) : + value; + #endif + } + + /// Round half-precision number to nearest integer value. + /// \tparam R rounding mode to use + /// \tparam E `true` for round to even, `false` for round away from zero + /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it + /// \param value half-precision value to round + /// \return half-precision bits for nearest integral value + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + template unsigned int integral(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs < 0x3C00) + { + raise(FE_INEXACT, I); + return ((R==std::round_to_nearest) ? (0x3C00&-static_cast(abs>=(0x3800+E))) : + (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) : + (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast(value>0x8000)) : + 0) | (value&0x8000); + } + if(abs >= 0x6400) + return (abs>0x7C00) ? signal(value) : value; + unsigned int exp = 25 - (abs>>10), mask = (1<>exp)&E)) : + (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) : + (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) : + 0) + value) & ~mask; + } + + /// Convert fixed point to half-precision floating-point. + /// \tparam R rounding mode to use + /// \tparam F number of fractional bits in [11,31] + /// \tparam S `true` for signed, `false` for unsigned + /// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F + /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results + /// \param m mantissa in Q1.F fixed point format + /// \param exp biased exponent - 1 + /// \param sign half-precision value with sign bit only + /// \param s sticky bit (or of all but the most significant already discarded bits) + /// \return value converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) + { + if(S) + { + uint32 msign = sign_mask(m); + m = (m^msign) - msign; + sign = msign & 0x8000; + } + if(N) + for(; m<(static_cast(1)<(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast(1)<<(F-11-exp))-1))!=0)); + return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); + } + + /// Convert IEEE single-precision to half-precision. + /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). + /// \tparam R rounding mode to use + /// \param value single-precision value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(float value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), + (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT : + (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO : + (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF : + (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF : + _MM_FROUND_CUR_DIRECTION)); + #else + bits::type fbits; + std::memcpy(&fbits, &value, sizeof(float)); + #if 1 + unsigned int sign = (fbits>>16) & 0x8000; + fbits &= 0x7FFFFFFF; + if(fbits >= 0x7F800000) + return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0); + if(fbits >= 0x47800000) + return overflow(sign); + if(fbits >= 0x38800000) + return rounded(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0); + if(fbits >= 0x33000000) + { + int i = 125 - (fbits>>23); + fbits = (fbits&0x7FFFFF) | 0x800000; + return rounded(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast(1)<(sign); + return sign; + #else + static const uint16 base_table[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, + 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, + 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, + 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, + 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 }; + static const unsigned char shift_table[256] = { + 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; + int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp]; + fbits &= 0x7FFFFF; + uint32 m = (fbits|((exp!=0)<<23)) & -static_cast(exp!=0xFF); + return rounded(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast(1)<<(i-1))-1)&m)!=0); + #endif + #endif + } + + /// Convert IEEE double-precision to half-precision. + /// \tparam R rounding mode to use + /// \param value double-precision value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(double value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + if(R == std::round_indeterminate) + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION)); + #endif + bits::type dbits; + std::memcpy(&dbits, &value, sizeof(double)); + uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF; + unsigned int sign = (hi>>16) & 0x8000; + hi &= 0x7FFFFFFF; + if(hi >= 0x7FF00000) + return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0); + if(hi >= 0x40F00000) + return overflow(sign); + if(hi >= 0x3F100000) + return rounded(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0); + if(hi >= 0x3E600000) + { + int i = 1018 - (hi>>20); + hi = (hi&0xFFFFF) | 0x100000; + return rounded(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast(1)<(sign); + return sign; + } + + /// Convert non-IEEE floating-point to half-precision. + /// \tparam R rounding mode to use + /// \tparam T source type (builtin floating-point type) + /// \param value floating-point value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half_impl(T value, ...) + { + unsigned int hbits = static_cast(builtin_signbit(value)) << 15; + if(value == T()) + return hbits; + if(builtin_isnan(value)) + return hbits | 0x7FFF; + if(builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if(exp > 16) + return overflow(hbits); + if(exp < -13) + value = std::ldexp(value, 25); + else + { + value = std::ldexp(value, 12-exp); + hbits |= ((exp+13)<<10); + } + T ival, frac = std::modf(value, &ival); + int m = std::abs(static_cast(ival)); + return rounded(hbits+(m>>1), m&1, frac!=T()); + } + + /// Convert floating-point to half-precision. + /// \tparam R rounding mode to use + /// \tparam T source type (builtin floating-point type) + /// \param value floating-point value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int float2half(T value) + { + return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } + + /// Convert integer to half-precision floating-point. + /// \tparam R rounding mode to use + /// \tparam T type to convert (builtin integer type) + /// \param value integral value to convert + /// \return rounded half-precision value + /// \exception FE_OVERFLOW on overflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int int2half(T value) + { + unsigned int bits = static_cast(value<0) << 15; + if(!value) + return bits; + if(bits) + value = -value; + if(value > 0xFFFF) + return overflow(bits); + unsigned int m = static_cast(value), exp = 24; + for(; m<0x400; m<<=1,--exp) ; + for(; m>0x7FF; m>>=1,++exp) ; + bits |= (exp<<10) + m; + return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; + } + + /// Convert half-precision to IEEE single-precision. + /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). + /// \param value half-precision value to convert + /// \return single-precision value + inline float half2float_impl(unsigned int value, float, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); + #else + #if 0 + bits::type fbits = static_cast::type>(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + fbits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,fbits-=0x800000) ; + fbits += static_cast::type>(abs) << 13; + } + #else + static const bits::type mantissa_table[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, + 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, + 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, + 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, + 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, + 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, + 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, + 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, + 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, + 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, + 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, + 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, + 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, + 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, + 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, + 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, + 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, + 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, + 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, + 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, + 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, + 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, + 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, + 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, + 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, + 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, + 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, + 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, + 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, + 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, + 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, + 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, + 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, + 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, + 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, + 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, + 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, + 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, + 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, + 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, + 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, + 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, + 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, + 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, + 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, + 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, + 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, + 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, + 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, + 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, + 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, + 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, + 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, + 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, + 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, + 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, + 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, + 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, + 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, + 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, + 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, + 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, + 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, + 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, + 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, + 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, + 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, + 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, + 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, + 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, + 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, + 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, + 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, + 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, + 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, + 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, + 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, + 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, + 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, + 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, + 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, + 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, + 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, + 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, + 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, + 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, + 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, + 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, + 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, + 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, + 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, + 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, + 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, + 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, + 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, + 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, + 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, + 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, + 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, + 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, + 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, + 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; + static const bits::type exponent_table[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, + 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, + 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; + static const unsigned short offset_table[64] = { + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; + bits::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; + #endif + float out; + std::memcpy(&out, &fbits, sizeof(float)); + return out; + #endif + } + + /// Convert half-precision to IEEE double-precision. + /// \param value half-precision value to convert + /// \return double-precision value + inline double half2float_impl(unsigned int value, double, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value)))); + #else + uint32 hi = static_cast(value&0x8000) << 16; + unsigned int abs = value & 0x7FFF; + if(abs) + { + hi |= 0x3F000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,hi-=0x100000) ; + hi += static_cast(abs) << 10; + } + bits::type dbits = static_cast::type>(hi) << 32; + double out; + std::memcpy(&out, &dbits, sizeof(double)); + return out; + #endif + } + + /// Convert half-precision to non-IEEE floating-point. + /// \tparam T type to convert to (builtin integer type) + /// \param value half-precision value to convert + /// \return floating-point value + template T half2float_impl(unsigned int value, T, ...) + { + T out; + unsigned int abs = value & 0x7FFF; + if(abs > 0x7C00) + out = (std::numeric_limits::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits::signaling_NaN() : + std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if(abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if(abs > 0x3FF) + out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); + else + out = std::ldexp(static_cast(abs), -24); + return (value&0x8000) ? -out : out; + } + + /// Convert half-precision to floating-point. + /// \tparam T type to convert to (builtin integer type) + /// \param value half-precision value to convert + /// \return floating-point value + template T half2float(unsigned int value) + { + return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } + + /// Convert half-precision floating-point to integer. + /// \tparam R rounding mode to use + /// \tparam E `true` for round to even, `false` for round away from zero + /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it + /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) + /// \param value half-precision value to convert + /// \return rounded integer value + /// \exception FE_INVALID if value is not representable in type \a T + /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + template T half2int(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs >= 0x7C00) + { + raise(FE_INVALID); + return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + } + if(abs < 0x3800) + { + raise(FE_INEXACT, I); + return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) : + (R==std::round_toward_neg_infinity) ? -T(value>0x8000) : + T(); + } + int exp = 25 - (abs>>10); + unsigned int m = (value&0x3FF) | 0x400; + int32 i = static_cast((exp<=0) ? (m<<-exp) : ((m+( + (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) : + (R==std::round_toward_infinity) ? (((1<>15)-1)) : + (R==std::round_toward_neg_infinity) ? (((1<>15)) : 0))>>exp)); + if((!std::numeric_limits::is_signed && (value&0x8000)) || (std::numeric_limits::digits<16 && + ((value&0x8000) ? (-i::min()) : (i>std::numeric_limits::max())))) + raise(FE_INVALID); + else if(I && exp > 0 && (m&((1<((value&0x8000) ? -i : i); + } + + /// \} + /// \name Mathematics + /// \{ + + /// upper part of 64-bit multiplication. + /// \tparam R rounding mode to use + /// \param x first factor + /// \param y second factor + /// \return upper 32 bit of \a x * \a y + template uint32 mulhi(uint32 x, uint32 y) + { + uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); + return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) + + ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); + } + + /// 64-bit multiplication. + /// \param x first factor + /// \param y second factor + /// \return upper 32 bit of \a x * \a y rounded to nearest + inline uint32 multiply64(uint32 x, uint32 y) + { + #if HALF_ENABLE_CPP11_LONG_LONG + return static_cast((static_cast(x)*static_cast(y)+0x80000000)>>32); + #else + return mulhi(x, y); + #endif + } + + /// 64-bit division. + /// \param x upper 32 bit of dividend + /// \param y divisor + /// \param s variable to store sticky bit for rounding + /// \return (\a x << 32) / \a y + inline uint32 divide64(uint32 x, uint32 y, int &s) + { + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long xx = static_cast(x) << 32; + return s = (xx%y!=0), static_cast(xx/y); + #else + y >>= 1; + uint32 rem = x, div = 0; + for(unsigned int i=0; i<32; ++i) + { + div <<= 1; + if(rem >= y) + { + rem -= y; + div |= 1; + } + rem <<= 1; + } + return s = rem > 1, div; + #endif + } + + /// Half precision positive modulus. + /// \tparam Q `true` to compute full quotient, `false` else + /// \tparam R `true` to compute signed remainder, `false` for positive remainder + /// \param x first operand as positive finite half-precision value + /// \param y second operand as positive finite half-precision value + /// \param quo adress to store quotient at, `nullptr` if \a Q `false` + /// \return modulus of \a x / \a y + template unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) + { + unsigned int q = 0; + if(x > y) + { + int absx = x, absy = y, expx = 0, expy = 0; + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + expx += absx >> 10; + expy += absy >> 10; + int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + for(int d=expx-expy; d; --d) + { + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + q += Q; + } + mx <<= 1; + q <<= static_cast(Q); + } + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + ++q; + } + if(Q) + { + q &= (1<<(std::numeric_limits::digits-1)) - 1; + if(!mx) + return *quo = q, 0; + } + for(; mx<0x400; mx<<=1,--expy) ; + x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy)); + } + if(R) + { + unsigned int a, b; + if(y < 0x800) + { + a = (x<0x400) ? (x<<1) : (x+0x400); + b = y; + } + else + { + a = x; + b = y - 0x400; + } + if(a > b || (a == b && (q&1))) + { + int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF); + int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d)); + for(; m<0x800 && exp>1; m<<=1,--exp) ; + x = 0x8000 + ((exp-1)<<10) + (m>>1); + q += Q; + } + } + if(Q) + *quo = q; + return x; + } + + /// Fixed point square root. + /// \tparam F number of fractional bits + /// \param r radicand in Q1.F fixed point format + /// \param exp exponent + /// \return square root as Q1.F/2 + template uint32 sqrt(uint32 &r, int &exp) + { + int i = exp & 1; + r <<= i; + exp = (exp-i) / 2; + uint32 m = 0; + for(uint32 bit=static_cast(1)<>=2) + { + if(r < m+bit) + m >>= 1; + else + { + r -= m + bit; + m = (m>>1) + bit; + } + } + return m; + } + + /// Fixed point binary exponential. + /// This uses the BKM algorithm in E-mode. + /// \param m exponent in [0,1) as Q0.31 + /// \param n number of iterations (at most 32) + /// \return 2 ^ \a m as Q1.31 + inline uint32 exp2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(!m) + return 0x80000000; + uint32 mx = 0x80000000, my = 0; + for(unsigned int i=1; i> i; + } + } + return mx; + } + + /// Fixed point binary logarithm. + /// This uses the BKM algorithm in L-mode. + /// \param m mantissa in [1,2) as Q1.30 + /// \param n number of iterations (at most 32) + /// \return log2(\a m) as Q0.31 + inline uint32 log2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(m == 0x40000000) + return 0; + uint32 mx = 0x40000000, my = 0; + for(unsigned int i=1; i>i); + if(mz <= m) + { + mx = mz; + my += logs[i]; + } + } + return my; + } + + /// Fixed point sine and cosine. + /// This uses the CORDIC algorithm in rotation mode. + /// \param mz angle in [-pi/2,pi/2] as Q1.30 + /// \param n number of iterations (at most 31) + /// \return sine and cosine of \a mz as Q1.30 + inline std::pair sincos(uint32 mz, unsigned int n = 31) + { + static const uint32 angles[] = { + 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55, + 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, + 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080, + 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 }; + uint32 mx = 0x26DD3B6A, my = 0; + for(unsigned int i=0; i0x3FF)<<10); + int exp = (abs>>10) + (abs<=0x3FF) - 15; + if(abs < 0x3A48) + return k = 0, m << (exp+20); + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi; + uint32 sign = -static_cast(f>>63); + k = static_cast(yi>>(62-exp)); + return (multiply64(static_cast((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign; + #else + uint32 yh = m*0xA2F98 + mulhi(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF; + uint32 mask = (static_cast(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast(yi>yh); + k = static_cast(yi>>(30-exp)); + uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign; + return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign; + #endif + } + + /// Get arguments for atan2 function. + /// \param abs half-precision floating-point value + /// \return \a abs and sqrt(1 - \a abs^2) as Q0.30 + inline std::pair atan2_args(unsigned int abs) + { + int exp = -15; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my; + int rexp = 2 * exp; + r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast(1)<<-rexp)-1))!=0)) : 1); + for(rexp=0; r<0x40000000; r<<=1,--rexp) ; + uint32 mx = sqrt<30>(r, rexp); + int d = exp - rexp; + if(d < 0) + return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx); + if(d > 0) + return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx))); + return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); + } + + /// Get exponentials for hyperbolic computation + /// \param abs half-precision floating-point value + /// \param exp variable to take unbiased exponent of larger result + /// \param n number of BKM iterations (at most 32) + /// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent + inline std::pair hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) + { + uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; + int e = (abs>>10) + (abs<=0x3FF); + if(e < 14) + { + exp = 0; + mx >>= 14 - e; + } + else + { + exp = mx >> (45-e); + mx = (mx<<(e-14)) & 0x7FFFFFFF; + } + mx = exp2(mx, n); + int d = exp << 1, s; + if(mx > 0x80000000) + { + my = divide64(0x80000000, mx, s); + my |= s; + ++d; + } + else + my = mx; + return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) + { + if(esign) + { + exp = -exp - (m!=0); + if(exp < -25) + return underflow(sign); + else if(exp == -25) + return rounded(sign, 1, m!=0); + } + else if(exp > 15) + return overflow(sign); + if(!m) + return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); + m = exp2(m, n); + int s = 0; + if(esign) + m = divide64(0x80000000, m, s); + return fixed2half(m, exp+14, sign, s); + } + + /// Postprocessing for binary logarithm. + /// \tparam R rounding mode to use + /// \tparam L logarithm for base transformation as Q1.31 + /// \param m fractional part of logarithm as Q0.31 + /// \param ilog signed integer part of logarithm + /// \param exp biased exponent of result + /// \param sign sign bit of result + /// \return value base-transformed and converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) + { + uint32 msign = sign_mask(ilog); + m = (((static_cast(ilog)<<27)+(m>>4))^msign) - msign; + if(!m) + return 0; + for(; m<0x80000000; m<<=1,--exp) ; + int i = m >= L, s; + exp += i; + m >>= 1 + i; + sign ^= msign & 0x8000; + if(exp < -11) + return underflow(sign); + m = divide64(m, L, s); + return fixed2half(m, exp, sign, 1); + } + + /// Hypotenuse square root and postprocessing. + /// \tparam R rounding mode to use + /// \param r mantissa as Q2.30 + /// \param exp biased exponent + /// \return square root converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if value had to be rounded + template unsigned int hypot_post(uint32 r, int exp) + { + int i = r >> 31; + if((exp+=i) > 46) + return overflow(); + if(exp < -34) + return underflow(); + r = (r>>i) | (r&i); + uint32 m = sqrt<30>(r, exp+=15); + return fixed2half(m, exp-1, 0, r!=0); + } + + /// Division and postprocessing for tangents. + /// \tparam R rounding mode to use + /// \param my dividend as Q1.31 + /// \param mx divisor as Q1.31 + /// \param exp biased exponent of result + /// \param sign sign bit of result + /// \return quotient converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) + { + int i = my >= mx, s; + exp += i; + if(exp > 29) + return overflow(sign); + if(exp < -11) + return underflow(sign); + uint32 m = divide64(my>>(i+1), mx, s); + return fixed2half(m, exp, sign, s); + } + + /// Area function and postprocessing. + /// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`. + /// \tparam R rounding mode to use + /// \tparam S `true` for asinh, `false` for acosh + /// \param arg half-precision argument + /// \return asinh|acosh(\a arg) converted to half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int area(unsigned int arg) + { + int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; + uint32 mx = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r; + for(; abs<0x400; abs<<=1,--expy) ; + expy += abs >> 10; + r = ((abs&0x3FF)|0x400) << 5; + r *= r; + i = r >> 31; + expy = 2*expy + i; + r >>= i; + if(S) + { + if(expy < 0) + { + r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast(1)<<-expy)-1))!=0)) : 1); + expy = 0; + } + else + { + r += 0x40000000 >> expy; + i = r >> 31; + r = (r>>i) | (r&i); + expy += i; + } + } + else + { + r -= 0x40000000 >> expy; + for(; r<0x40000000; r<<=1,--expy) ; + } + my = sqrt<30>(r, expy); + my = (my<<15) + (r<<14)/my; + if(S) + { + mx >>= expy - expx; + ilog = expy; + } + else + { + my >>= expx - expy; + ilog = expx; + } + my += mx; + i = my >> 31; + static const int G = S && (R==std::round_to_nearest); + return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); + } + + /// Class for 1.31 unsigned floating-point computation + struct f31 + { + /// Constructor. + /// \param mant mantissa as 1.31 + /// \param e exponent + HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} + + /// Constructor. + /// \param abs unsigned half-precision value + f31(unsigned int abs) : exp(-15) + { + for(; abs<0x400; abs<<=1,--exp) ; + m = static_cast((abs&0x3FF)|0x400) << 21; + exp += (abs>>10); + } + + /// Addition operator. + /// \param a first operand + /// \param b second operand + /// \return \a a + \a b + friend f31 operator+(f31 a, f31 b) + { + if(b.exp > a.exp) + std::swap(a, b); + int d = a.exp - b.exp; + uint32 m = a.m + ((d<32) ? (b.m>>d) : 0); + int i = (m&0xFFFFFFFF) < a.m; + return f31(((m+i)>>i)|0x80000000, a.exp+i); + } + + /// Subtraction operator. + /// \param a first operand + /// \param b second operand + /// \return \a a - \a b + friend f31 operator-(f31 a, f31 b) + { + int d = a.exp - b.exp, exp = a.exp; + uint32 m = a.m - ((d<32) ? (b.m>>d) : 0); + if(!m) + return f31(0, -32); + for(; m<0x80000000; m<<=1,--exp) ; + return f31(m, exp); + } + + /// Multiplication operator. + /// \param a first operand + /// \param b second operand + /// \return \a a * \a b + friend f31 operator*(f31 a, f31 b) + { + uint32 m = multiply64(a.m, b.m); + int i = m >> 31; + return f31(m<<(1-i), a.exp + b.exp + i); + } + + /// Division operator. + /// \param a first operand + /// \param b second operand + /// \return \a a / \a b + friend f31 operator/(f31 a, f31 b) + { + int i = a.m >= b.m, s; + uint32 m = divide64((a.m+i)>>i, b.m, s); + return f31(m, a.exp - b.exp + i - 1); + } + + uint32 m; ///< mantissa as 1.31. + int exp; ///< exponent. + }; + + /// Error function and postprocessing. + /// This computes the value directly in Q1.31 using the approximations given + /// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions). + /// \tparam R rounding mode to use + /// \tparam C `true` for comlementary error function, `false` else + /// \param arg half-precision function argument + /// \return approximated value of error function in half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if no other exception occurred + template unsigned int erf(unsigned int arg) + { + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t; + f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t / + ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<>(31-x2.exp))); + return (!C || sign) ? fixed2half(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) : + (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); + } + + /// Gamma function and postprocessing. + /// This approximates the value of either the gamma function or its logarithm directly in Q1.31. + /// \tparam R rounding mode to use + /// \tparam L `true` for lograithm of gamma function, `false` for gamma function + /// \param arg half-precision floating-point value + /// \return lgamma/tgamma(\a arg) in half-precision + /// \exception FE_OVERFLOW on overflows + /// \exception FE_UNDERFLOW on underflows + /// \exception FE_INEXACT if \a arg is not a positive integer + template unsigned int gamma(unsigned int arg) + { +/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; + double t = arg + 4.65, s = p[0]; + for(unsigned int i=0; i<5; ++i) + s += p[i+1] / (arg+i); + return std::log(s) + (arg-0.5)*std::log(t) - t; +*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + bool bsign = sign != 0; + f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s = + f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1)) + + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1)); + int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16); + s = f31((static_cast(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe; + if(x.exp != -1 || x.m != 0x80000000) + { + i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8); + f31 l = f31((static_cast(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe; + s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l); + } + s = x.exp ? (s-t) : (t-s); + if(bsign) + { + if(z.exp >= 0) + { + sign &= (L|((z.m>>(31-z.exp))&1)) - 1; + for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ; + } + if(z.exp == -1) + z = f31(0x80000000, 0) - z; + if(z.exp < -1) + { + z = z * pi; + z.m = sincos(z.m>>(1-z.exp), 30).first; + for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ; + } + else + z = f31(0x80000000, 0); + } + if(L) + { + if(bsign) + { + f31 l(0x92868247, 0); + if(z.exp < 0) + { + uint32 m = log2((z.m+1)>>1, 27); + z = f31(-((static_cast(z.exp)<<26)+(m>>5)), 5); + for(; z.m<0x80000000; z.m<<=1,--z.exp) ; + l = l + z / lbe; + } + sign = static_cast(x.exp&&(l.exp(x.exp==0) << 15; + if(s.exp < -24) + return underflow(sign); + if(s.exp > 15) + return overflow(sign); + } + } + else + { + s = s * lbe; + uint32 m; + if(s.exp < 0) + { + m = s.m >> -s.exp; + s.exp = 0; + } + else + { + m = (s.m<>(31-s.exp)); + } + s.m = exp2(m, 27); + if(!x.exp) + s = f31(0x80000000, 0) / s; + if(bsign) + { + if(z.exp < 0) + s = s * z; + s = pi / s; + if(s.exp < -24) + return underflow(sign); + } + else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1))) + return ((s.exp+14)<<10) + (s.m>>21); + if(s.exp > 15) + return overflow(sign); + } + return fixed2half(s.m, s.exp+14, sign); + } + /// \} + + template struct half_caster; + } + + /// Half-precision floating-point type. + /// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic + /// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic + /// expressions and functions with mixed-type operands to be of the most precise operand type. + /// + /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and + /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which + /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the + /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of + /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most + /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit + /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if + /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on + /// nearly any reasonable platform. + /// + /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable + /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. + class half + { + public: + /// \name Construction and assignment + /// \{ + + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Conversion constructor. + /// \param rhs float to convert + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const { return detail::half2float(data_); } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } + + /// \} + /// \name Arithmetic updates + /// \{ + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + /// \exception FE_... according to operator+(half,half) + half& operator+=(half rhs) { return *this = *this + rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + /// \exception FE_... according to operator-(half,half) + half& operator-=(half rhs) { return *this = *this - rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + /// \exception FE_... according to operator*(half,half) + half& operator*=(half rhs) { return *this = *this * rhs; } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + /// \exception FE_... according to operator/(half,half) + half& operator/=(half rhs) { return *this = *this / rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator+=(float rhs) { return *this = *this + rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator-=(float rhs) { return *this = *this - rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator*=(float rhs) { return *this = *this * rhs; } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + /// \exception FE_... according to operator=() + half& operator/=(float rhs) { return *this = *this / rhs; } + + /// \} + /// \name Increment and decrement + /// \{ + + /// Prefix increment. + /// \return incremented half value + /// \exception FE_... according to operator+(half,half) + half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } + + /// Prefix decrement. + /// \return decremented half value + /// \exception FE_... according to operator-(half,half) + half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } + + /// Postfix increment. + /// \return non-incremented half value + /// \exception FE_... according to operator+(half,half) + half operator++(int) { half out(*this); ++*this; return out; } + + /// Postfix decrement. + /// \return non-decremented half value + /// \exception FE_... according to operator-(half,half) + half operator--(int) { half out(*this); --*this; return out; } + /// \} + + private: + /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} + + /// Internal binary representation + detail::uint16 data_; + + #ifndef HALF_DOXYGEN_ONLY + friend HALF_CONSTEXPR_NOERR bool operator==(half, half); + friend HALF_CONSTEXPR_NOERR bool operator!=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>=(half, half); + friend HALF_CONSTEXPR half operator-(half); + friend half operator+(half, half); + friend half operator-(half, half); + friend half operator*(half, half); + friend half operator/(half, half); + template friend std::basic_ostream& operator<<(std::basic_ostream&, half); + template friend std::basic_istream& operator>>(std::basic_istream&, half&); + friend HALF_CONSTEXPR half fabs(half); + friend half fmod(half, half); + friend half remainder(half, half); + friend half remquo(half, half, int*); + friend half fma(half, half, half); + friend HALF_CONSTEXPR_NOERR half fmax(half, half); + friend HALF_CONSTEXPR_NOERR half fmin(half, half); + friend half fdim(half, half); + friend half nanh(const char*); + friend half exp(half); + friend half exp2(half); + friend half expm1(half); + friend half log(half); + friend half log10(half); + friend half log2(half); + friend half log1p(half); + friend half sqrt(half); + friend half rsqrt(half); + friend half cbrt(half); + friend half hypot(half, half); + friend half hypot(half, half, half); + friend half pow(half, half); + friend void sincos(half, half*, half*); + friend half sin(half); + friend half cos(half); + friend half tan(half); + friend half asin(half); + friend half acos(half); + friend half atan(half); + friend half atan2(half, half); + friend half sinh(half); + friend half cosh(half); + friend half tanh(half); + friend half asinh(half); + friend half acosh(half); + friend half atanh(half); + friend half erf(half); + friend half erfc(half); + friend half lgamma(half); + friend half tgamma(half); + friend half ceil(half); + friend half floor(half); + friend half trunc(half); + friend half round(half); + friend long lround(half); + friend half rint(half); + friend long lrint(half); + friend half nearbyint(half); + #ifdef HALF_ENABLE_CPP11_LONG_LONG + friend long long llround(half); + friend long long llrint(half); + #endif + friend half frexp(half, int*); + friend half scalbln(half, long); + friend half modf(half, half*); + friend int ilogb(half); + friend half logb(half); + friend half nextafter(half, half); + friend half nexttoward(half, long double); + friend HALF_CONSTEXPR half copysign(half, half); + friend HALF_CONSTEXPR int fpclassify(half); + friend HALF_CONSTEXPR bool isfinite(half); + friend HALF_CONSTEXPR bool isinf(half); + friend HALF_CONSTEXPR bool isnan(half); + friend HALF_CONSTEXPR bool isnormal(half); + friend HALF_CONSTEXPR bool signbit(half); + friend HALF_CONSTEXPR bool isgreater(half, half); + friend HALF_CONSTEXPR bool isgreaterequal(half, half); + friend HALF_CONSTEXPR bool isless(half, half); + friend HALF_CONSTEXPR bool islessequal(half, half); + friend HALF_CONSTEXPR bool islessgreater(half, half); + template friend struct detail::half_caster; + friend class std::numeric_limits; + #if HALF_ENABLE_CPP11_HASH + friend struct std::hash; + #endif + #if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator "" _h(long double); + #endif + #endif + }; + +#if HALF_ENABLE_CPP11_USER_LITERALS + namespace literal + { + /// Half literal. + /// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant + /// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving + /// conversion operations at runtime. It is a convenience feature, not a performance optimization. + /// \param value literal value + /// \return half with of given value (possibly rounded) + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } + } +#endif + + namespace detail + { + /// Helper class for half casts. + /// This class template has to be specialized for all valid cast arguments to define an appropriate static + /// `cast` member function and a corresponding `type` member denoting its return type. + /// \tparam T destination type + /// \tparam U source type + /// \tparam R rounding mode to use + template struct half_caster {}; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); + #endif + + static half cast(U arg) { return cast_impl(arg, is_float()); }; + + private: + static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } + static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } + }; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); + #endif + + static T cast(half arg) { return cast_impl(arg, is_float()); } + + private: + static T cast_impl(half arg, true_type) { return half2float(arg.data_); } + static T cast_impl(half arg, false_type) { return half2int(arg.data_); } + }; + template struct half_caster + { + static half cast(half arg) { return arg; } + }; + } +} + +/// Extensions to the C++ standard library. +namespace std +{ + /// Numeric limits for half-precision floats. + /// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits) + template<> class numeric_limits + { + public: + /// Is template specialization. + static HALF_CONSTEXPR_CONST bool is_specialized = true; + + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not an integer type. + static HALF_CONSTEXPR_CONST bool is_integer = false; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// Has a finite set of values. + static HALF_CONSTEXPR_CONST bool is_bounded = true; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports signaling NaNs. + static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Supports no denormalization detection. + static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; + + #if HALF_ERRHANDLING_THROWS + static HALF_CONSTEXPR_CONST bool traps = true; + #else + /// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated. + static HALF_CONSTEXPR_CONST bool traps = false; + #endif + + /// Does not support no pre-rounding underflow detection. + static HALF_CONSTEXPR_CONST bool tinyness_before = false; + + /// Rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } + + /// Difference between 1 and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } + + /// Maximum rounding error in ULP (units in the last place). + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } + + /// Signaling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } + }; + +#if HALF_ENABLE_CPP11_HASH + /// Hash function for half-precision floats. + /// This is only defined if C++11 `std::hash` is supported and enabled. + /// + /// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash) + template<> struct hash + { + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } + }; +#endif +} + +namespace half_float +{ + /// \anchor compop + /// \name Comparison operators + /// \{ + + /// Comparison for equality. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); + } + + /// Comparison for inequality. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) + { + return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); + } + + /// Comparison for less than. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less than \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for greater than. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater than \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for less equal. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less equal \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// Comparison for greater equal. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater equal \a y + /// \retval false else + /// \exception FE_INVALID if \a x or \a y is NaN + inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } + + /// \} + /// \anchor arithmetics + /// \name Arithmetic operators + /// \{ + + /// Identity. + /// \param arg operand + /// \return unchanged operand + inline HALF_CONSTEXPR half operator+(half arg) { return arg; } + + /// Negation. + /// \param arg operand + /// \return negated operand + inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } + + /// Addition. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return sum of half expressions + /// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator+(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)+detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF; + bool sub = ((x.data_^y.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ : + (sub && absx==0x7C00) ? detail::invalid() : y.data_); + if(!absx) + return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_)); + if(!absy) + return x; + unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000; + if(absy > absx) + std::swap(absx, absy); + int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my; + if(d < 13) + { + my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3; + my = (my>>d) | ((my&((1<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; mx<0x2000 && exp>1; mx<<=1,--exp) ; + } + else + { + mx += my; + int i = mx >> 14; + if((exp+=i) > 30) + return half(detail::binary, detail::overflow(sign)); + mx = (mx>>i) | (mx&i); + } + return half(detail::binary, detail::rounded(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0)); + #endif + } + + /// Subtraction. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return difference of half expressions + /// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator-(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)-detail::half2float(y.data_))); + #else + return x + -y; + #endif + } + + /// Multiplication. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return product of half expressions + /// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator*(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)*detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00)); + if(!absx || !absy) + return half(detail::binary, sign); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21, s = m & i; + exp += (absx>>10) + (absy>>10) + i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m>>i, exp, sign, s)); + #endif + } + + /// Division. + /// This operation is exact to rounding for all rounding modes. + /// \param x left operand + /// \param y right operand + /// \return quotient of half expressions + /// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN + /// \exception FE_DIVBYZERO if dividing finite value by 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half operator/(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)/detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0))); + if(!absx) + return half(detail::binary, absy ? sign : detail::invalid()); + if(!absy) + return half(detail::binary, detail::pole(sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,++exp) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + int i = mx < my; + exp += (absx>>10) - (absy>>10) - i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + mx <<= 12 + i; + my <<= 1; + return half(detail::binary, detail::fixed2half(mx/my, exp, sign, mx%my!=0)); + #endif + } + + /// \} + /// \anchor streaming + /// \name Input and output + /// \{ + + /// Output operator. + /// This uses the built-in functionality for streaming out floating-point numbers. + /// \param out output stream to write into + /// \param arg half expression to write + /// \return reference to output stream + template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return out << detail::half2float(arg.data_); + #else + return out << detail::half2float(arg.data_); + #endif + } + + /// Input operator. + /// This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating + /// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first + /// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded + /// to half-precision using the library's half-precision rounding mode. + /// \param in input stream to read from + /// \param arg half to read into + /// \return reference to input stream + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template std::basic_istream& operator>>(std::basic_istream &in, half &arg) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f; + #else + double f; + #endif + if(in >> f) + arg.data_ = detail::float2half(f); + return in; + } + + /// \} + /// \anchor basic + /// \name Basic mathematical operations + /// \{ + + /// Absolute value. + /// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs). + /// \param arg operand + /// \return absolute value of \a arg + inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } + + /// Absolute value. + /// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs). + /// \param arg operand + /// \return absolute value of \a arg + inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } + + /// Remainder of division. + /// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod). + /// \param x first operand + /// \param y second operand + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half fmod(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(!absx) + return x; + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign|detail::mod(absx, absy)); + } + + /// Remainder of division. + /// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder). + /// \param x first operand + /// \param y second operand + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half remainder(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign^detail::mod(absx, absy)); + } + + /// Remainder of division. + /// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo). + /// \param x first operand + /// \param y second operand + /// \param quo address to store some bits of quotient at + /// \return remainder of floating-point division. + /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + inline half remquo(half x, half y, int *quo) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_)); + if(!absy) + return half(detail::binary, detail::invalid()); + bool qsign = ((value^y.data_)&0x8000) != 0; + int q = 1; + if(absx != absy) + value ^= detail::mod(absx, absy, &q); + return *quo = qsign ? -q : q, half(detail::binary, value); + } + + /// Fused multiply add. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma). + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return ( \a x * \a y ) + \a z rounded as one operation. + /// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition + inline half fma(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + #if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA + return half(detail::binary, detail::float2half(std::fma(fx, fy, fz))); + #else + return half(detail::binary, detail::float2half(fx*fy+fz)); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15; + unsigned int sign = (x.data_^y.data_) & 0x8000; + bool sub = ((sign^z.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) : + (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : + (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z; + if(!absx || !absy) + return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21; + exp += (absx>>10) + (absy>>10) + i; + m <<= 3 - i; + if(absz) + { + int expz = 0; + for(; absz<0x400; absz<<=1,--expz) ; + expz += absz >> 10; + detail::uint32 mz = static_cast((absz&0x3FF)|0x400) << 13; + if(expz > exp || (expz == exp && mz > m)) + { + std::swap(m, mz); + std::swap(exp, expz); + if(sub) + sign = z.data_ & 0x8000; + } + int d = exp - expz; + mz = (d<23) ? ((mz>>d)|((mz&((static_cast(1)<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; m<0x800000; m<<=1,--exp) ; + } + else + { + m += mz; + i = m >> 24; + m = (m>>i) | (m&i); + exp += i; + } + } + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp-1, sign)); + #endif + } + + /// Maximum of half expressions. + /// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax). + /// \param x first operand + /// \param y second operand + /// \return maximum of operands, ignoring quiet NaNs + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } + + /// Minimum of half expressions. + /// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin). + /// \param x first operand + /// \param y second operand + /// \return minimum of operands, ignoring quiet NaNs + /// \exception FE_INVALID if \a x or \a y is signaling NaN + inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } + + /// Positive difference. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim). + /// \param x first operand + /// \param y second operand + /// \return \a x - \a y or 0 if difference negative + /// \exception FE_... according to operator-(half,half) + inline half fdim(half x, half y) + { + if(isnan(x) || isnan(y)) + return half(detail::binary, detail::signal(x.data_, y.data_)); + return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); + } + + /// Get NaN value. + /// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan). + /// \param arg string code + /// \return quiet NaN + inline half nanh(const char *arg) + { + unsigned int value = 0x7FFF; + while(*arg) + value ^= static_cast(*arg++) & 0xFF; + return half(detail::binary, value); + } + + /// \} + /// \anchor exponential + /// \name Exponential functions + /// \{ + + /// Exponential function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp). + /// \param arg function argument + /// \return e raised to \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half exp(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4C80) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); + #endif + } + + /// Binary exponential. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2). + /// \param arg function argument + /// \return 2 raised to \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half exp2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4E40) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + return half(detail::binary, detail::exp2_post( + (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); + #endif + } + + /// Exponential minus one. + /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` + /// and in <1% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1). + /// \param arg function argument + /// \return e raised to \a arg and subtracted by 1 + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half expm1(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_)); + if(abs >= 0x4A00) + return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + m = detail::exp2(m); + if(sign) + { + int s = 0; + if(m > 0x80000000) + { + ++exp; + m = detail::divide64(0x80000000, m, s); + } + m = 0x80000000 - ((m>>exp)|((m&((static_cast(1)<>exp) : 1; + for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ; + if(exp > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::rounded(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0)); + #endif + } + + /// Natural logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log). + /// \param arg function argument + /// \return logarithm of \a arg to base e + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17)); + #endif + } + + /// Common logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10). + /// \param arg function argument + /// \return logarithm of \a arg to base 10 + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log10(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log10(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + switch(abs) + { + case 0x4900: return half(detail::binary, 0x3C00); + case 0x5640: return half(detail::binary, 0x4000); + case 0x63D0: return half(detail::binary, 0x4200); + case 0x70E2: return half(detail::binary, 0x4400); + } + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16)); + #endif + } + + /// Binary logarithm. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2). + /// \param arg function argument + /// \return logarithm of \a arg to base 2 + /// \exception FE_INVALID for signaling NaN or negative argument + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15, s = 0; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + for(; abs<0x400; abs<<=1,--exp) ; + exp += (abs>>10); + if(!(abs&0x3FF)) + { + unsigned int value = static_cast(exp<0) << 15, m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + return half(detail::binary, value+(exp<<10)+m); + } + detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign; + if(!m) + return half(detail::binary, 0); + for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ; + for(; m>0xFFFFFFF; m>>=1,++exp) + s |= m & 1; + return half(detail::binary, detail::fixed2half(m, exp, sign&0x8000, s)); + #endif + } + + /// Natural logarithm plus one. + /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` + /// and in ~1% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p). + /// \param arg function argument + /// \return logarithm of \a arg plus 1 to base e + /// \exception FE_INVALID for signaling NaN or argument <-1 + /// \exception FE_DIVBYZERO for -1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half log1p(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log1p(detail::half2float(arg.data_)))); + #else + if(arg.data_ >= 0xBC00) + return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + detail::uint32 m = static_cast((abs&0x3FF)|0x400) << 20; + if(arg.data_ & 0x8000) + { + m = 0x40000000 - (m>>-exp); + for(exp=0; m<0x40000000; m<<=1,--exp) ; + } + else + { + if(exp < 0) + { + m = 0x40000000 + (m>>-exp); + exp = 0; + } + else + { + m += 0x40000000 >> exp; + int i = m >> 31; + m >>= i; + exp += i; + } + } + return half(detail::binary, detail::log2_post(detail::log2(m), exp, 17)); + #endif + } + + /// \} + /// \anchor power + /// \name Power functions + /// \{ + + /// Square root. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt). + /// \param arg function argument + /// \return square root of \a arg + /// \exception FE_INVALID for signaling NaN and negative arguments + /// \exception FE_INEXACT according to rounding + inline half sqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sqrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 15; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_); + for(; abs<0x400; abs<<=1,--exp) ; + detail::uint32 r = static_cast((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10); + return half(detail::binary, detail::rounded((exp<<10)+(m&0x3FF), r>m, r!=0)); + #endif + } + + /// Inverse square root. + /// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing + /// 1 / sqrt(\a arg) in half-precision, in addition to also being faster. + /// \param arg function argument + /// \return reciprocal of square root of \a arg + /// \exception FE_INVALID for signaling NaN and negative arguments + /// \exception FE_INEXACT according to rounding + inline half rsqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? + detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); + for(; abs<0x400; abs<<=1,bias-=0x400) ; + unsigned int frac = (abs+=bias) & 0x7FF; + if(frac == 0x400) + return half(detail::binary, 0x7A00-(abs>>1)); + if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || + (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) + return pow(arg, half(detail::binary, 0xB800)); + detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; + int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=mz>>10) >> 31; + expy += i; + my = (my>>(20+i)) + 1; + i = (mz=my*my) >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=(mz>>10)+1) >> 31; + return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); + #endif + } + + /// Cubic root. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt). + /// \param arg function argument + /// \return cubic root of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT according to rounding + inline half cbrt(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::cbrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs == 0x3C00 || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1, --exp); + detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign; + for(exp=2; m<0x80000000; m<<=1,--exp) ; + m = detail::multiply64(m, 0xAAAAAAAB); + int i = m >> 31, s; + exp += i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26); + if(sign) + { + if(m > 0x80000000) + { + m = detail::divide64(0x80000000, m, s); + ++exp; + } + exp = -exp; + } + return half(detail::binary, (half::round_style==std::round_to_nearest) ? + detail::fixed2half(m, exp+14, arg.data_&0x8000) : + detail::fixed2half((m+0x80)>>8, exp+14, arg.data_&0x8000)); + #endif + } + + /// Hypotenuse function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). + /// \param x first argument + /// \param y second argument + /// \return square root of sum of squares without internal over- or underflows + /// \exception FE_INVALID if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + inline half hypot(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_); + #if HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::hypot(fx, fy))); + #else + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy))); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) : + (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_)); + if(!absx) + return half(detail::binary, absy ? detail::check_underflow(absy) : 0); + if(!absy) + return half(detail::binary, detail::check_underflow(absx)); + if(absy > absx) + std::swap(absx, absy); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + mx *= mx; + my *= my; + int ix = mx >> 21, iy = my >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + mx <<= 10 - ix; + my <<= 10 - iy; + int d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + /// Hypotenuse function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). + /// \param x first argument + /// \param y second argument + /// \param z third argument + /// \return square root of sum of squares without internal over- or underflows + /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + inline half hypot(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy+fz*fz))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0; + if(!absx) + return hypot(y, z); + if(!absy) + return hypot(x, z); + if(!absz) + return hypot(x, y); + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) : + (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) : + (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) : + detail::signal(x.data_, y.data_, z.data_)); + if(absz > absy) + std::swap(absy, absz); + if(absy > absx) + std::swap(absx, absy); + if(absz > absy) + std::swap(absy, absz); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + for(; absz<0x400; absz<<=1,--expz) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400; + mx *= mx; + my *= my; + mz *= mz; + int ix = mx >> 21, iy = my >> 21, iz = mz >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + expz = 2*(expz+(absz>>10)) - 15 + iz; + mx <<= 10 - ix; + my <<= 10 - iy; + mz <<= 10 - iz; + int d = expy - expz; + mz = (d<30) ? ((mz>>d)|((mz&((static_cast(1)<>1) | (my&1); + if(++expy > expx) + { + std::swap(mx, my); + std::swap(expx, expy); + } + } + d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + /// Power function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs. + /// + /// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow). + /// \param x base + /// \param y exponent + /// \return \a x raised to \a y + /// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral + /// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half pow(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::pow(detail::half2float(x.data_), detail::half2float(y.data_)))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15; + if(!absy || x.data_ == 0x3C00) + return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_)); + bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1))); + unsigned int sign = x.data_ & (static_cast((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15); + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() : + (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U)))); + if(!absx) + return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign); + if((x.data_&0x8000) && !is_int) + return half(detail::binary, detail::invalid()); + if(x.data_ == 0xBC00) + return half(detail::binary, sign|0x3C00); + switch(y.data_) + { + case 0x3800: return sqrt(x); + case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); + case 0x4000: return x * x; + case 0xBC00: return half(detail::binary, 0x3C00) / x; + } + for(; absx<0x400; absx<<=1,--exp) ; + detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; + for(exp=-11; m<0x80000000; m<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + m = detail::multiply64(m, static_cast((absy&0x3FF)|0x400)<<21); + int i = m >> 31; + exp += (absy>>10) + i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); + #endif + } + + /// \} + /// \anchor trigonometric + /// \name Trigonometric functions + /// \{ + + /// Compute sine and cosine simultaneously. + /// This returns the same results as sin() and cos() but is faster than calling each function individually. + /// + /// This function is exact to rounding for all rounding modes. + /// \param arg function argument + /// \param sin variable to take sine of \a arg + /// \param cos variable to take cosine of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline void sincos(half arg, half *sin, half *cos) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f = detail::half2float(arg.data_); + *sin = half(detail::binary, detail::float2half(std::sin(f))); + *cos = half(detail::binary, detail::float2half(std::cos(f))); + #else + int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k; + if(abs >= 0x7C00) + *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + else if(!abs) + { + *sin = arg; + *cos = half(detail::binary, 0x3C00); + } + else if(abs < 0x2500) + { + *sin = half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + } + else + { + if(half::round_style != std::round_to_nearest) + { + switch(abs) + { + case 0x48B7: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + *cos = half(detail::binary, detail::rounded(0xBBFF, 1, 1)); + return; + case 0x598C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x80FC, 1, 1)); + return; + case 0x6A64: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x27FF, 1, 1)); + return; + case 0x6D8C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + return; + } + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + switch(k & 3) + { + case 1: sc = std::make_pair(sc.second, -sc.first); break; + case 2: sc = std::make_pair(-sc.first, -sc.second); break; + case 3: sc = std::make_pair(-sc.second, sc.first); break; + } + *sin = half(detail::binary, detail::fixed2half((sc.first^-static_cast(sign))+sign)); + *cos = half(detail::binary, detail::fixed2half(sc.second)); + } + #endif + } + + /// Sine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin). + /// \param arg function argument + /// \return sine value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half sin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sin(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x48B7: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + case 0x6A64: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + case 0x6D8C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)&1)^(arg.data_>>15)); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.second : sc.first)^sign) - sign)); + #endif + } + + /// Cosine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos). + /// \param arg function argument + /// \return cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half cos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cos(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2500) + return half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x598C) + return half(detail::binary, detail::rounded(0x80FC, 1, 1)); + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)^k)&1); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.first : sc.second)^sign) - sign)); + #endif + } + + /// Tangent function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan). + /// \param arg function argument + /// \return tangent value of \a arg + /// \exception FE_INVALID for signaling NaN or infinity + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tan(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 13, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x658C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x07E6, 1, 1)); + case 0x7330: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x4B62, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 30); + if(k & 1) + sc = std::make_pair(-sc.second, sc.first); + detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); + detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx; + for(; my<0x80000000; my<<=1,--exp) ; + for(; mx<0x80000000; mx<<=1,++exp) ; + return half(detail::binary, detail::tangent_post(my, mx, exp, (signy^signx^arg.data_)&0x8000)); + #endif + } + + /// Arc sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin). + /// \param arg function argument + /// \return arc sine value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half asin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::asin(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + detail::rounded(sign|0x3E48, 0, 1)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3)) + return half(detail::binary, detail::rounded(arg.data_+1, 1, 1)); + std::pair sc = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } + + /// Arc cosine function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos). + /// \param arg function argument + /// \return arc cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half acos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::acos(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15; + if(!abs) + return half(detail::binary, detail::rounded(0x3E48, 0, 1)); + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + sign ? detail::rounded(0x4248, 0, 1) : 0); + std::pair cs = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(cs.second, cs.first, 28); + return half(detail::binary, detail::fixed2half(sign ? (0xC90FDAA2-m) : m, 15, 0, sign)); + #endif + } + + /// Arc tangent function. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan). + /// \param arg function argument + /// \return arc tangent value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::rounded(sign|0x3E48, 0, 1) : detail::signal(arg.data_)); + if(abs <= 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + int exp = (abs>>10) + (abs<=0x3FF); + detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10); + detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) : + detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } + + /// Arc tangent function. + /// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, + /// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode. + /// + /// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2). + /// \param y numerator + /// \param x denominator + /// \return arc tangent value + /// \exception FE_INVALID if \a x or \a y is signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atan2(half y, half x) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan2(detail::half2float(y.data_), detail::half2float(x.data_)))); + #else + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + { + if(absx > 0x7C00 || absy > 0x7C00) + return half(detail::binary, detail::signal(x.data_, y.data_)); + if(absy == 0x7C00) + return half(detail::binary, (absx<0x7C00) ? detail::rounded(signy|0x3E48, 0, 1) : + signx ? detail::rounded(signy|0x40B6, 0, 1) : + detail::rounded(signy|0x3A48, 0, 1)); + return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + } + if(!absy) + return signx ? half(detail::binary, detail::rounded(signy|0x4248, 0, 1)) : y; + if(!absx) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF); + if(d > (signx ? 18 : 12)) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + if(signx && d < -11) + return half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) + { + for(; absy<0x400; absy<<=1,--d) ; + detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800; + int i = my < mx; + d -= i; + if(d < -25) + return half(detail::binary, detail::underflow(signy)); + my <<= 11 + i; + return half(detail::binary, detail::fixed2half(my/mx, d+14, signy, my%mx!=0)); + } + detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)), + ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1))); + return half(detail::binary, detail::fixed2half(signx ? (0xC90FDAA2-m) : m, 15, signy, signx)); + #endif + } + + /// \} + /// \anchor hyperbolic + /// \name Hyperbolic functions + /// \{ + + /// Hyperbolic sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh). + /// \param arg function argument + /// \return hyperbolic sine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half sinh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27); + detail::uint32 m = mm.first - mm.second; + for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ; + unsigned int sign = arg.data_ & 0x8000; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp, sign)); + #endif + } + + /// Hyperbolic cosine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh). + /// \param arg function argument + /// \return hyperbolic cosine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half cosh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26); + detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31; + m = (m>>i) | (m&i) | 0x80000000; + if((exp+=13+i) > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::fixed2half(m, exp)); + #endif + } + + /// Hyperbolic tangent. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh). + /// \param arg function argument + /// \return hyperbolic tangent value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tanh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000)); + if(abs >= 0x4500) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x2D3F) + return half(detail::binary, detail::rounded(arg.data_-3, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, 27); + detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31; + for(exp=13; my<0x80000000; my<<=1,--exp) ; + mx = (mx>>i) | 0x80000000; + return half(detail::binary, detail::tangent_post(my, mx, exp-i, arg.data_&0x8000)); + #endif + } + + /// Hyperbolic area sine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh). + /// \param arg function argument + /// \return area sine value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half asinh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::asinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x32D4: return half(detail::binary, detail::rounded(arg.data_-13, 1, 1)); + case 0x3B5B: return half(detail::binary, detail::rounded(arg.data_-197, 1, 1)); + } + return half(detail::binary, detail::area(arg.data_)); + #endif + } + + /// Hyperbolic area cosine. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh). + /// \param arg function argument + /// \return area cosine value of \a arg + /// \exception FE_INVALID for signaling NaN or arguments <1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half acosh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::acosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if((arg.data_&0x8000) || abs < 0x3C00) + return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + if(arg.data_ >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + return half(detail::binary, detail::area(arg.data_)); + #endif + } + + /// Hyperbolic area tangent. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh). + /// \param arg function argument + /// \return area tangent value of \a arg + /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 + /// \exception FE_DIVBYZERO for +/-1 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half atanh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::atanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 0; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + detail::uint32 m = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m; + for(; mx<0x80000000; mx<<=1,++exp) ; + int i = my >= mx, s; + return half(detail::binary, detail::log2_post(detail::log2( + (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000)); + #endif + } + + /// \} + /// \anchor special + /// \name Error and gamma functions + /// \{ + + /// Error function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. + /// + /// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf). + /// \param arg function argument + /// \return error function value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half erf(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erf(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg; + if(abs >= 0x4200) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } + + /// Complementary error function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. + /// + /// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc). + /// \param arg function argument + /// \return 1 minus error function value of \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half erfc(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erfc(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x4400) + return half(detail::binary, detail::rounded((sign>>1)-(sign>>15), sign>>15, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } + + /// Natural logarithm of gamma function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs. + /// + /// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma). + /// \param arg function argument + /// \return natural logarith of gamma function for \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_DIVBYZERO for 0 or negative integer arguments + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half lgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::lgamma(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::pole()); + if(arg.data_ == 0x3C00 || arg.data_ == 0x4000) + return half(detail::binary, 0); + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } + + /// Gamma function. + /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs. + /// + /// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma). + /// \param arg function argument + /// \return gamma function value of \a arg + /// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments + /// \exception FE_DIVBYZERO for 0 + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half tgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::tgamma(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs) + return half(detail::binary, detail::pole(arg.data_)); + if(abs >= 0x7C00) + return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::invalid()); + if(arg.data_ >= 0xCA80) + return half(detail::binary, detail::underflow((1-((abs>>(25-(abs>>10)))&1))<<15)); + if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000)) + return half(detail::binary, detail::overflow()); + if(arg.data_ == 0x3C00) + return arg; + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } + + /// \} + /// \anchor rounding + /// \name Rounding + /// \{ + + /// Nearest integer not less than half value. + /// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil). + /// \param arg half to round + /// \return nearest integer not less than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer not greater than half value. + /// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor). + /// \param arg half to round + /// \return nearest integer not greater than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer not greater in magnitude than half value. + /// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc). + /// \param arg half to round + /// \return nearest integer not greater in magnitude than \a arg + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer. + /// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer. + /// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID if value is not representable as `long` + inline long lround(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID for signaling NaN + /// \exception FE_INEXACT if value had to be rounded + inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID if value is not representable as `long` + /// \exception FE_INEXACT if value had to be rounded + inline long lrint(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID for signaling NaN + inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer. + /// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round). + /// \param arg half to round + /// \return nearest integer, rounded away from zero in half-way cases + /// \exception FE_INVALID if value is not representable as `long long` + inline long long llround(half arg) { return detail::half2int(arg.data_); } + + /// Nearest integer using half's internal rounding mode. + /// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint). + /// \param arg half expression to round + /// \return nearest integer using default rounding mode + /// \exception FE_INVALID if value is not representable as `long long` + /// \exception FE_INEXACT if value had to be rounded + inline long long llrint(half arg) { return detail::half2int(arg.data_); } +#endif + + /// \} + /// \anchor float + /// \name Floating point manipulation + /// \{ + + /// Decompress floating-point number. + /// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp). + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return significant in range [0.5, 1) + /// \exception FE_INVALID for signaling NaN + inline half frexp(half arg, int *exp) + { + *exp = 0; + unsigned int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--*exp) ; + *exp += (abs>>10) - 14; + return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); + } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half scalbln(half arg, long exp) + { + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + else if(exp > 0) + return half(detail::binary, sign|(exp<<10)|(abs&0x3FF)); + unsigned int m = (abs&0x3FF) | 0x400; + return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); + } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } + + /// Multiply by power of two. + /// This function is exact to rounding for all rounding modes. + /// + /// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp). + /// \param arg number to modify + /// \param exp power of two to multiply with + /// \return \a arg multplied by 2 raised to \a exp + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } + + /// Extract integer and fractional parts. + /// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf). + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + /// \exception FE_INVALID for signaling NaN + inline half modf(half arg, half *iptr) + { + unsigned int abs = arg.data_ & 0x7FFF; + if(abs > 0x7C00) + { + arg = half(detail::binary, detail::signal(arg.data_)); + return *iptr = arg, arg; + } + if(abs >= 0x6400) + return *iptr = arg, half(detail::binary, arg.data_&0x8000); + if(abs < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if(!m) + return half(detail::binary, arg.data_&0x8000); + for(; m<0x400; m<<=1,--exp) ; + return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); + } + + /// Extract exponent. + /// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb). + /// \param arg number to query + /// \return floating-point exponent + /// \retval FP_ILOGB0 for zero + /// \retval FP_ILOGBNAN for NaN + /// \retval INT_MAX for infinity + /// \exception FE_INVALID for 0 or infinite values + inline int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + { + detail::raise(FE_INVALID); + return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN; + } + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + return exp; + } + + /// Extract exponent. + /// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb). + /// \param arg number to query + /// \return floating-point exponent + /// \exception FE_INVALID for signaling NaN + /// \exception FE_DIVBYZERO for 0 + inline half logb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + unsigned int value = static_cast(exp<0) << 15; + if(exp) + { + unsigned int m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + value |= (exp<<10) + m; + } + return half(detail::binary, value); + } + + /// Next representable value. + /// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter). + /// \param from value to compute next representable value for + /// \param to direction towards which to compute next value + /// \return next representable value after \a from in direction towards \a to + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW for infinite result from finite argument + /// \exception FE_UNDERFLOW for subnormal result + inline half nextafter(half from, half to) + { + int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if(fabs > 0x7C00 || tabs > 0x7C00) + return half(detail::binary, detail::signal(from.data_, to.data_)); + if(from.data_ == to.data_ || !(fabs|tabs)) + return to; + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (to.data_&0x8000)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast( + (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1; + detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00); + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400); + return half(detail::binary, out); + } + + /// Next representable value. + /// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward). + /// \param from value to compute next representable value for + /// \param to direction towards which to compute next value + /// \return next representable value after \a from in direction towards \a to + /// \exception FE_INVALID for signaling NaN + /// \exception FE_OVERFLOW for infinite result from finite argument + /// \exception FE_UNDERFLOW for subnormal result + inline half nexttoward(half from, long double to) + { + int fabs = from.data_ & 0x7FFF; + if(fabs > 0x7C00) + return half(detail::binary, detail::signal(from.data_)); + long double lfrom = static_cast(from); + if(detail::builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (static_cast(detail::builtin_signbit(to))<<15)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast(lfrom 0x7C00; } + + /// Check if normal number. + /// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal). + /// \param arg number to check + /// \retval true if normal number + /// \retval false if either subnormal, zero, infinity or NaN + inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } + + /// Check sign. + /// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit). + /// \param arg number to check + /// \retval true for negative number + /// \retval false for positive number + inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } + + /// \} + /// \anchor compfunc + /// \name Comparison + /// \{ + + /// Quiet comparison for greater than. + /// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater than \a y + /// \retval false else + inline HALF_CONSTEXPR bool isgreater(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for greater equal. + /// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x greater equal \a y + /// \retval false else + inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for less than. + /// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less than \a y + /// \retval false else + inline HALF_CONSTEXPR bool isless(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comparison for less equal. + /// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal). + /// \param x first operand + /// \param y second operand + /// \retval true if \a x less equal \a y + /// \retval false else + inline HALF_CONSTEXPR bool islessequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } + + /// Quiet comarison for less or greater. + /// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater). + /// \param x first operand + /// \param y second operand + /// \retval true if either less or greater + /// \retval false else + inline HALF_CONSTEXPR bool islessgreater(half x, half y) + { + return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); + } + + /// Quiet check if unordered. + /// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered). + /// \param x first operand + /// \param y second operand + /// \retval true if unordered (one or two NaN operands) + /// \retval false else + inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } + + /// \} + /// \anchor casting + /// \name Casting + /// \{ + + /// Cast to or from half-precision floating-point number. + /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted + /// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. + /// + /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types + /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler + /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. + /// \tparam T destination type (half or built-in arithmetic type) + /// \tparam U source type (half or built-in arithmetic type) + /// \param arg value to cast + /// \return \a arg converted to destination type + /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template T half_cast(U arg) { return detail::half_caster::cast(arg); } + + /// Cast to or from half-precision floating-point number. + /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted + /// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. + /// + /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types + /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler + /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. + /// \tparam T destination type (half or built-in arithmetic type) + /// \tparam R rounding mode to use. + /// \tparam U source type (half or built-in arithmetic type) + /// \param arg value to cast + /// \return \a arg converted to destination type + /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T + /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + template T half_cast(U arg) { return detail::half_caster::cast(arg); } + /// \} + + /// \} + /// \anchor errors + /// \name Error handling + /// \{ + + /// Clear exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept). + /// \param excepts OR of exceptions to clear + /// \retval 0 all selected flags cleared successfully + inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } + + /// Test exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept). + /// \param excepts OR of exceptions to test + /// \return OR of selected exceptions if raised + inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } + + /// Raise exception flags. + /// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as + /// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept). + /// \param excepts OR of exceptions to raise + /// \retval 0 all selected exceptions raised successfully + inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } + + /// Save exception flags. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). + /// \param flagp adress to store flag state at + /// \param excepts OR of flags to save + /// \retval 0 for success + inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } + + /// Restore exception flags. + /// This only copies the specified exception state (including unset flags) without incurring any additional exception handling. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// + /// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). + /// \param flagp adress to take flag state from + /// \param excepts OR of flags to restore + /// \retval 0 for success + inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } + + /// Throw C++ exceptions based on set exception flags. + /// This function manually throws a corresponding C++ exception if one of the specified flags is set, + /// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not. + /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, + /// but in that case manual flag management is the only way to raise flags. + /// \param excepts OR of exceptions to test + /// \param msg error message to use for exception description + /// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set + /// \throw std::overflow_error if `FE_OVERFLOW` is selected and set + /// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set + /// \throw std::range_error if `FE_INEXACT` is selected and set + inline void fethrowexcept(int excepts, const char *msg = "") + { + excepts &= detail::errflags(); + if(excepts & (FE_INVALID|FE_DIVBYZERO)) + throw std::domain_error(msg); + if(excepts & FE_OVERFLOW) + throw std::overflow_error(msg); + if(excepts & FE_UNDERFLOW) + throw std::underflow_error(msg); + if(excepts & FE_INEXACT) + throw std::range_error(msg); + } + /// \} +} + + +#undef HALF_UNUSED_NOERR +#undef HALF_CONSTEXPR +#undef HALF_CONSTEXPR_CONST +#undef HALF_CONSTEXPR_NOERR +#undef HALF_NOEXCEPT +#undef HALF_NOTHROW +#undef HALF_THREAD_LOCAL +#undef HALF_TWOS_COMPLEMENT_INT +#ifdef HALF_POP_WARNINGS + #pragma warning(pop) + #undef HALF_POP_WARNINGS +#endif + +#endif diff --git a/ext/miniz b/ext/miniz index 1ff82be7d..35528ad76 160000 --- a/ext/miniz +++ b/ext/miniz @@ -1 +1 @@ -Subproject commit 1ff82be7d67f5c2f8b5497f538eea247861e0717 +Subproject commit 35528ad769143b9ed38a95a22d460b963e39f278 diff --git a/ext/stb/CMakeLists.txt b/ext/stb/CMakeLists.txt index 70a3ab18d..bca64d93d 100644 --- a/ext/stb/CMakeLists.txt +++ b/ext/stb/CMakeLists.txt @@ -7,3 +7,7 @@ add_library(${PROJECT_NAME} INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image_resize2.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image_write.h") + +target_include_directories(${PROJECT_NAME} INTERFACE + $ + $) diff --git a/ext/tinyexr/CMakeLists.txt b/ext/tinyexr/CMakeLists.txt new file mode 100644 index 000000000..9a766886f --- /dev/null +++ b/ext/tinyexr/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.16) +project(sourcepp_tinyexr) +set(CMAKE_CXX_STANDARD 20) + +# Create library +add_library(${PROJECT_NAME} INTERFACE + "${CMAKE_CURRENT_SOURCE_DIR}/include/tinyexr.h") + +target_include_directories(${PROJECT_NAME} INTERFACE + $ + $) diff --git a/ext/tinyexr/LICENSE b/ext/tinyexr/LICENSE new file mode 100644 index 000000000..292ab32ec --- /dev/null +++ b/ext/tinyexr/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ext/tinyexr/include/tinyexr.h b/ext/tinyexr/include/tinyexr.h new file mode 100644 index 000000000..64ee67f2c --- /dev/null +++ b/ext/tinyexr/include/tinyexr.h @@ -0,0 +1,9304 @@ +#ifndef TINYEXR_H_ +#define TINYEXR_H_ +/* +Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Syoyo Fujita nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// TinyEXR contains some OpenEXR code, which is licensed under ------------ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Industrial Light & Magic nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// End of OpenEXR license ------------------------------------------------- + + +// +// +// Do this: +// #define TINYEXR_IMPLEMENTATION +// before you include this file in *one* C or C++ file to create the +// implementation. +// +// // i.e. it should look like this: +// #include ... +// #include ... +// #include ... +// #define TINYEXR_IMPLEMENTATION +// #include "tinyexr.h" +// +// + +#include // for size_t +#include // guess stdint.h is available(C99) + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__i386) || defined(__i486__) || defined(__i486) || \ + defined(i386) || defined(__ia64__) || defined(__x86_64__) +#define TINYEXR_X86_OR_X64_CPU 1 +#else +#define TINYEXR_X86_OR_X64_CPU 0 +#endif + +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || TINYEXR_X86_OR_X64_CPU +#define TINYEXR_LITTLE_ENDIAN 1 +#else +#define TINYEXR_LITTLE_ENDIAN 0 +#endif + +// Use miniz or not to decode ZIP format pixel. Linking with zlib +// required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0. +#ifndef TINYEXR_USE_MINIZ +#define TINYEXR_USE_MINIZ (1) +#endif + +// Use the ZIP implementation of stb_image.h and stb_image_write.h. +#ifndef TINYEXR_USE_STB_ZLIB +#define TINYEXR_USE_STB_ZLIB (0) +#endif + +// Use nanozlib. +#ifndef TINYEXR_USE_NANOZLIB +#define TINYEXR_USE_NANOZLIB (0) +#endif + +// Disable PIZ compression when applying cpplint. +#ifndef TINYEXR_USE_PIZ +#define TINYEXR_USE_PIZ (1) +#endif + +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (0) // TinyEXR extension. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_THREAD +#define TINYEXR_USE_THREAD (0) // No threaded loading. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_OPENMP +#ifdef _OPENMP +#define TINYEXR_USE_OPENMP (1) +#else +#define TINYEXR_USE_OPENMP (0) +#endif +#endif + +#define TINYEXR_SUCCESS (0) +#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) +#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) +#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) +#define TINYEXR_ERROR_INVALID_DATA (-4) +#define TINYEXR_ERROR_INVALID_FILE (-5) +#define TINYEXR_ERROR_INVALID_PARAMETER (-6) +#define TINYEXR_ERROR_CANT_OPEN_FILE (-7) +#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8) +#define TINYEXR_ERROR_INVALID_HEADER (-9) +#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10) +#define TINYEXR_ERROR_CANT_WRITE_FILE (-11) +#define TINYEXR_ERROR_SERIALIZATION_FAILED (-12) +#define TINYEXR_ERROR_LAYER_NOT_FOUND (-13) +#define TINYEXR_ERROR_DATA_TOO_LARGE (-14) + +// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } + +// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 +#define TINYEXR_PIXELTYPE_UINT (0) +#define TINYEXR_PIXELTYPE_HALF (1) +#define TINYEXR_PIXELTYPE_FLOAT (2) + +#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) +#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) + +#define TINYEXR_COMPRESSIONTYPE_NONE (0) +#define TINYEXR_COMPRESSIONTYPE_RLE (1) +#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) +#define TINYEXR_COMPRESSIONTYPE_ZIP (3) +#define TINYEXR_COMPRESSIONTYPE_PIZ (4) +#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension + +#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) +#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) +#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) + +#define TINYEXR_TILE_ONE_LEVEL (0) +#define TINYEXR_TILE_MIPMAP_LEVELS (1) +#define TINYEXR_TILE_RIPMAP_LEVELS (2) + +#define TINYEXR_TILE_ROUND_DOWN (0) +#define TINYEXR_TILE_ROUND_UP (1) + +typedef struct TEXRVersion { + int version; // this must be 2 + // tile format image; + // not zero for only a single-part "normal" tiled file (according to spec.) + int tiled; + int long_name; // long name attribute + // deep image(EXR 2.0); + // for a multi-part file, indicates that at least one part is of type deep* (according to spec.) + int non_image; + int multipart; // multi-part(EXR 2.0) +} EXRVersion; + +typedef struct TEXRAttribute { + char name[256]; // name and type are up to 255 chars long. + char type[256]; + unsigned char *value; // uint8_t* + int size; + int pad0; +} EXRAttribute; + +typedef struct TEXRChannelInfo { + char name[256]; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} EXRChannelInfo; + +typedef struct TEXRTile { + int offset_x; + int offset_y; + int level_x; + int level_y; + + int width; // actual width in a tile. + int height; // actual height int a tile. + + unsigned char **images; // image[channels][pixels] +} EXRTile; + +typedef struct TEXRBox2i { + int min_x; + int min_y; + int max_x; + int max_y; +} EXRBox2i; + +typedef struct TEXRHeader { + float pixel_aspect_ratio; + int line_order; + EXRBox2i data_window; + EXRBox2i display_window; + float screen_window_center[2]; + float screen_window_width; + + int chunk_count; + + // Properties for tiled format(`tiledesc`). + int tiled; + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + int long_name; + // for a single-part file, agree with the version field bit 11 + // for a multi-part file, it is consistent with the type of part + int non_image; + int multipart; + unsigned int header_len; + + // Custom attributes(exludes required attributes(e.g. `channels`, + // `compression`, etc) + int num_custom_attributes; + EXRAttribute *custom_attributes; // array of EXRAttribute. size = + // `num_custom_attributes`. + + EXRChannelInfo *channels; // [num_channels] + + int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for + // each channel. This is overwritten with `requested_pixel_types` when + // loading. + int num_channels; + + int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) + int *requested_pixel_types; // Filled initially by + // ParseEXRHeaderFrom(Meomory|File), then users + // can edit it(only valid for HALF pixel type + // channel) + // name attribute required for multipart files; + // must be unique and non empty (according to spec.); + // use EXRSetNameAttr for setting value; + // max 255 character allowed - excluding terminating zero + char name[256]; +} EXRHeader; + +typedef struct TEXRMultiPartHeader { + int num_headers; + EXRHeader *headers; + +} EXRMultiPartHeader; + +typedef struct TEXRImage { + EXRTile *tiles; // Tiled pixel data. The application must reconstruct image + // from tiles manually. NULL if scanline format. + struct TEXRImage* next_level; // NULL if scanline format or image is the last level. + int level_x; // x level index + int level_y; // y level index + + unsigned char **images; // image[channels][pixels]. NULL if tiled format. + + int width; + int height; + int num_channels; + + // Properties for tile format. + int num_tiles; + +} EXRImage; + +typedef struct TEXRMultiPartImage { + int num_images; + EXRImage *images; + +} EXRMultiPartImage; + +typedef struct TDeepImage { + const char **channel_names; + float ***image; // image[channels][scanlines][samples] + int **offset_table; // offset_table[scanline][offsets] + int num_channels; + int width; + int height; + int pad0; +} DeepImage; + +// @deprecated { For backward compatibility. Not recommended to use. } +// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel +// alpha) or RGB(A) channels. +// Application must free image data as returned by `out_rgba` +// Result image format is: float x RGBA x width x hight +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXR(float **out_rgba, int *width, int *height, + const char *filename, const char **err); + +// Loads single-frame OpenEXR image by specifying layer name. Assume EXR image +// contains A(single channel alpha) or RGB(A) channels. Application must free +// image data as returned by `out_rgba` Result image format is: float x RGBA x +// width x hight Returns negative value and may set error string in `err` when +// there's an error When the specified layer name is not found in the EXR file, +// the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`. +extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height, + const char *filename, const char *layer_name, + const char **err); + +// +// Get layer infos from EXR file. +// +// @param[out] layer_names List of layer names. Application must free memory +// after using this. +// @param[out] num_layers The number of layers +// @param[out] err Error string(will be filled when the function returns error +// code). Free it using FreeEXRErrorMessage after using this value. +// +// @return TINYEXR_SUCCEES upon success. +// +extern int EXRLayers(const char *filename, const char **layer_names[], + int *num_layers, const char **err); + +// @deprecated +// Simple wrapper API for ParseEXRHeaderFromFile. +// checking given file is a EXR file(by just look up header) +// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for +// others +extern int IsEXR(const char *filename); + +// Simple wrapper API for ParseEXRHeaderFromMemory. +// Check if given data is a EXR image(by just looking up a header section) +// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for +// others +extern int IsEXRFromMemory(const unsigned char *memory, size_t size); + +// @deprecated +// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels. +// components must be 1(Grayscale), 3(RGB) or 4(RGBA). +// Input image format is: `float x width x height`, or `float x RGB(A) x width x +// hight` +// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero +// value. +// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. +// Use ZIP compression by default. +// `buffer` is the pointer to write EXR data. +// Memory for `buffer` is allocated internally in SaveEXRToMemory. +// Returns the data size of EXR file when the value is positive(up to 2GB EXR data). +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXRToMemory(const float *data, const int width, const int height, + const int components, const int save_as_fp16, + unsigned char **buffer, const char **err); + +// @deprecated { Not recommended, but handy to use. } +// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels. +// components must be 1(Grayscale), 3(RGB) or 4(RGBA). +// Input image format is: `float x width x height`, or `float x RGB(A) x width x +// hight` +// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero +// value. +// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. +// Use ZIP compression by default. +// Returns TINYEXR_SUCCEES(0) when success. +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXR(const float *data, const int width, const int height, + const int components, const int save_as_fp16, + const char *filename, const char **err); + +// Returns the number of resolution levels of the image (including the base) +extern int EXRNumLevels(const EXRImage* exr_image); + +// Initialize EXRHeader struct +extern void InitEXRHeader(EXRHeader *exr_header); + +// Set name attribute of EXRHeader struct (it makes a copy) +extern void EXRSetNameAttr(EXRHeader *exr_header, const char* name); + +// Initialize EXRImage struct +extern void InitEXRImage(EXRImage *exr_image); + +// Frees internal data of EXRHeader struct +extern int FreeEXRHeader(EXRHeader *exr_header); + +// Frees internal data of EXRImage struct +extern int FreeEXRImage(EXRImage *exr_image); + +// Frees error message +extern void FreeEXRErrorMessage(const char *msg); + +// Parse EXR version header of a file. +extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); + +// Parse EXR version header from memory-mapped EXR data. +extern int ParseEXRVersionFromMemory(EXRVersion *version, + const unsigned char *memory, size_t size); + +// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, + const char *filename, const char **err); + +// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromMemory(EXRHeader *header, + const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err); + +// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` +// array. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const char *filename, + const char **err); + +// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` +// array +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const unsigned char *memory, + size_t size, const char **err); + +// Loads single-part OpenEXR image from a file. +// Application must setup `ParseEXRHeaderFromFile` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, + const char *filename, const char **err); + +// Loads single-part OpenEXR image from a memory. +// Application must setup `EXRHeader` with +// `ParseEXRHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, + const unsigned char *memory, + const size_t size, const char **err); + +// Loads multi-part OpenEXR image from a file. +// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this +// function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromFile(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const char *filename, + const char **err); + +// Loads multi-part OpenEXR image from a memory. +// Application must setup `EXRHeader*` array with +// `ParseEXRMultipartHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromMemory(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a file. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRImageToFile(const EXRImage *image, + const EXRHeader *exr_header, const char *filename, + const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRImageToMemory(const EXRImage *image, + const EXRHeader *exr_header, + unsigned char **memory, const char **err); + +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRMultipartImageToFile(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const char *filename, const char **err); + +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRMultipartImageToMemory(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + unsigned char **memory, const char **err); +// Loads single-frame OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadDeepEXR(DeepImage *out_image, const char *filename, + const char **err); + +// NOT YET IMPLEMENTED: +// Saves single-frame OpenEXR deep image. +// Returns negative value and may set error string in `err` when there's an +// error +// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, +// const char **err); + +// NOT YET IMPLEMENTED: +// Loads multi-part OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const +// char *filename, +// const char **err); + +// For emscripten. +// Loads single-frame OpenEXR image from memory. Assume EXR image contains +// RGB(A) channels. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err); + +#ifdef __cplusplus +} +#endif + +#endif // TINYEXR_H_ + +#ifdef TINYEXR_IMPLEMENTATION +#ifndef TINYEXR_IMPLEMENTATION_DEFINED +#define TINYEXR_IMPLEMENTATION_DEFINED + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include // for UTF-8 and memory-mapping + +#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) +#define TINYEXR_USE_WIN32_MMAP (1) +#endif + +#elif defined(__linux__) || defined(__unix__) +#include // for open() +#include // for memory-mapping +#include // for stat +#include // for close() +#define TINYEXR_USE_POSIX_MMAP (1) +#endif + +#include +#include +#include +#include +#include + +//#include // debug + +#include +#include +#include +#include + +// https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support +#if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900) +#define TINYEXR_HAS_CXX11 (1) +// C++11 +#include + +#if TINYEXR_USE_THREAD +#include +#include +#endif + +#else // __cplusplus > 199711L +#define TINYEXR_HAS_CXX11 (0) +#endif // __cplusplus > 199711L + +#if TINYEXR_USE_OPENMP +#include +#endif + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) +#include +#else +// Issue #46. Please include your own zlib-compatible API header before +// including `tinyexr.h` +//#include "zlib.h" +#endif + +#if defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) +#define NANOZLIB_IMPLEMENTATION +#include "nanozlib.h" +#endif + +#if TINYEXR_USE_STB_ZLIB +// Since we don't know where a project has stb_image.h and stb_image_write.h +// and whether they are in the include path, we don't include them here, and +// instead declare the two relevant functions manually. +// from stb_image.h: +extern "C" int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); +// from stb_image_write.h: +extern "C" unsigned char *stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality); +#endif + + +#if TINYEXR_USE_ZFP + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +#endif + +#include "zfp.h" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif + +// cond: conditional expression +// msg: std::string +// err: std::string* +#define TINYEXR_CHECK_AND_RETURN_MSG(cond, msg, err) do { \ + if (!(cond)) { \ + if (!err) { \ + std::ostringstream ss_e; \ + ss_e << __func__ << "():" << __LINE__ << msg << "\n"; \ + (*err) += ss_e.str(); \ + } \ + return false;\ + } \ + } while(0) + +// no error message. +#define TINYEXR_CHECK_AND_RETURN_C(cond, retcode) do { \ + if (!(cond)) { \ + return retcode; \ + } \ + } while(0) + +namespace tinyexr { + +#if __cplusplus > 199711L +// C++11 +typedef uint64_t tinyexr_uint64; +typedef int64_t tinyexr_int64; +#else +// Although `long long` is not a standard type pre C++11, assume it is defined +// as a compiler's extension. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#endif +typedef unsigned long long tinyexr_uint64; +typedef long long tinyexr_int64; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + +// static bool IsBigEndian(void) { +// union { +// unsigned int i; +// char c[4]; +// } bint = {0x01020304}; +// +// return bint.c[0] == 1; +//} + +static void SetErrorMessage(const std::string &msg, const char **err) { + if (err) { +#ifdef _WIN32 + (*err) = _strdup(msg.c_str()); +#else + (*err) = strdup(msg.c_str()); +#endif + } +} + +#if 0 +static void SetWarningMessage(const std::string &msg, const char **warn) { + if (warn) { +#ifdef _WIN32 + (*warn) = _strdup(msg.c_str()); +#else + (*warn) = strdup(msg.c_str()); +#endif + } +} +#endif + +static const int kEXRVersionSize = 8; + +static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; +} + +static void swap2(unsigned short *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + unsigned short tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[1]; + dst[1] = src[0]; +#endif +} + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +static void cpy4(int *dst_val, const int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(float *dst_val, const float *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +static void swap4(unsigned int *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + unsigned int tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +static void swap4(int *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + int tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +static void swap4(float *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + float tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +#if 0 +static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; +} +#endif + +static void swap8(tinyexr::tinyexr_uint64 *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + tinyexr::tinyexr_uint64 tmp = (*val); + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[7]; + dst[1] = src[6]; + dst[2] = src[5]; + dst[3] = src[4]; + dst[4] = src[3]; + dst[5] = src[2]; + dst[6] = src[1]; + dst[7] = src[0]; +#endif +} + +// https://gist.github.com/rygorous/2156668 +union FP32 { + unsigned int u; + float f; + struct { +#if TINYEXR_LITTLE_ENDIAN + unsigned int Mantissa : 23; + unsigned int Exponent : 8; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 8; + unsigned int Mantissa : 23; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +union FP16 { + unsigned short u; + struct { +#if TINYEXR_LITTLE_ENDIAN + unsigned int Mantissa : 10; + unsigned int Exponent : 5; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 5; + unsigned int Mantissa : 10; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static FP32 half_to_float(FP16 h) { + static const FP32 magic = {113 << 23}; + static const unsigned int shifted_exp = 0x7c00 + << 13; // exponent mask after shift + FP32 o; + + o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits + unsigned int exp_ = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp_ == shifted_exp) // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + else if (exp_ == 0) // Zero/Denormal? + { + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.u & 0x8000U) << 16U; // sign bit + return o; +} + +static FP16 float_to_half_full(FP32 f) { + FP16 o = {0}; + + // Based on ISPC reference code (with minor modifications) + if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) + o.s.Exponent = 0; + else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) + { + o.s.Exponent = 31; + o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf + } else // Normalized number + { + // Exponent unbias the single, then bias the halfp + int newexp = f.s.Exponent - 127 + 15; + if (newexp >= 31) // Overflow, return signed infinity + o.s.Exponent = 31; + else if (newexp <= 0) // Underflow + { + if ((14 - newexp) <= 24) // Mantissa might be non-zero + { + unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit + o.s.Mantissa = mant >> (14 - newexp); + if ((mant >> (13 - newexp)) & 1) // Check for rounding + o.u++; // Round, might overflow into exp bit, but this is OK + } + } else { + o.s.Exponent = static_cast(newexp); + o.s.Mantissa = f.s.Mantissa >> 13; + if (f.s.Mantissa & 0x1000) // Check for rounding + o.u++; // Round, might overflow to inf, this is OK + } + } + + o.s.Sign = f.s.Sign; + return o; +} + +// NOTE: From OpenEXR code +// #define IMF_INCREASING_Y 0 +// #define IMF_DECREASING_Y 1 +// #define IMF_RAMDOM_Y 2 +// +// #define IMF_NO_COMPRESSION 0 +// #define IMF_RLE_COMPRESSION 1 +// #define IMF_ZIPS_COMPRESSION 2 +// #define IMF_ZIP_COMPRESSION 3 +// #define IMF_PIZ_COMPRESSION 4 +// #define IMF_PXR24_COMPRESSION 5 +// #define IMF_B44_COMPRESSION 6 +// #define IMF_B44A_COMPRESSION 7 + +#ifdef __clang__ +#pragma clang diagnostic push + +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#endif + +static const char *ReadString(std::string *s, const char *ptr, size_t len) { + // Read untile NULL(\0). + const char *p = ptr; + const char *q = ptr; + while ((size_t(q - ptr) < len) && (*q) != 0) { + q++; + } + + if (size_t(q - ptr) >= len) { + (*s).clear(); + return NULL; + } + + (*s) = std::string(p, q); + + return q + 1; // skip '\0' +} + +static bool ReadAttribute(std::string *name, std::string *type, + std::vector *data, size_t *marker_size, + const char *marker, size_t size) { + size_t name_len = strnlen(marker, size); + if (name_len == size) { + // String does not have a terminating character. + return false; + } + *name = std::string(marker, name_len); + + marker += name_len + 1; + size -= name_len + 1; + + size_t type_len = strnlen(marker, size); + if (type_len == size) { + return false; + } + *type = std::string(marker, type_len); + + marker += type_len + 1; + size -= type_len + 1; + + if (size < sizeof(uint32_t)) { + return false; + } + + uint32_t data_len; + memcpy(&data_len, marker, sizeof(uint32_t)); + tinyexr::swap4(reinterpret_cast(&data_len)); + + if (data_len == 0) { + if ((*type).compare("string") == 0) { + // Accept empty string attribute. + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t); + + data->resize(1); + (*data)[0] = '\0'; + + return true; + } else { + return false; + } + } + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + if (size < data_len) { + return false; + } + + data->resize(static_cast(data_len)); + memcpy(&data->at(0), marker, static_cast(data_len)); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; + return true; +} + +static void WriteAttributeToMemory(std::vector *out, + const char *name, const char *type, + const unsigned char *data, int len) { + out->insert(out->end(), name, name + strlen(name) + 1); + out->insert(out->end(), type, type + strlen(type) + 1); + + int outLen = len; + tinyexr::swap4(&outLen); + out->insert(out->end(), reinterpret_cast(&outLen), + reinterpret_cast(&outLen) + sizeof(int)); + out->insert(out->end(), data, data + len); +} + +typedef struct TChannelInfo { + std::string name; // less than 255 bytes long + int pixel_type; + int requested_pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} ChannelInfo; + +typedef struct { + int min_x; + int min_y; + int max_x; + int max_y; +} Box2iInfo; + +struct HeaderInfo { + std::vector channels; + std::vector attributes; + + Box2iInfo data_window; + int line_order; + Box2iInfo display_window; + float screen_window_center[2]; + float screen_window_width; + float pixel_aspect_ratio; + + int chunk_count; + + // Tiled format + int tiled; // Non-zero if the part is tiled. + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + unsigned int header_len; + + int compression_type; + + // required for multi-part or non-image files + std::string name; + // required for multi-part or non-image files + std::string type; + + void clear() { + channels.clear(); + attributes.clear(); + + data_window.min_x = 0; + data_window.min_y = 0; + data_window.max_x = 0; + data_window.max_y = 0; + line_order = 0; + display_window.min_x = 0; + display_window.min_y = 0; + display_window.max_x = 0; + display_window.max_y = 0; + screen_window_center[0] = 0.0f; + screen_window_center[1] = 0.0f; + screen_window_width = 0.0f; + pixel_aspect_ratio = 0.0f; + + chunk_count = 0; + + // Tiled format + tiled = 0; + tile_size_x = 0; + tile_size_y = 0; + tile_level_mode = 0; + tile_rounding_mode = 0; + + header_len = 0; + compression_type = 0; + + name.clear(); + type.clear(); + } +}; + +static bool ReadChannelInfo(std::vector &channels, + const std::vector &data) { + const char *p = reinterpret_cast(&data.at(0)); + + for (;;) { + if ((*p) == 0) { + break; + } + ChannelInfo info; + info.requested_pixel_type = 0; + + tinyexr_int64 data_len = static_cast(data.size()) - + (p - reinterpret_cast(data.data())); + if (data_len < 0) { + return false; + } + + p = ReadString(&info.name, p, size_t(data_len)); + if ((p == NULL) && (info.name.empty())) { + // Buffer overrun. Issue #51. + return false; + } + + const unsigned char *data_end = + reinterpret_cast(p) + 16; + if (data_end >= (data.data() + data.size())) { + return false; + } + + memcpy(&info.pixel_type, p, sizeof(int)); + p += 4; + info.p_linear = static_cast(p[0]); // uchar + p += 1 + 3; // reserved: uchar[3] + memcpy(&info.x_sampling, p, sizeof(int)); // int + p += 4; + memcpy(&info.y_sampling, p, sizeof(int)); // int + p += 4; + + tinyexr::swap4(&info.pixel_type); + tinyexr::swap4(&info.x_sampling); + tinyexr::swap4(&info.y_sampling); + + channels.push_back(info); + } + + return true; +} + +static void WriteChannelInfo(std::vector &data, + const std::vector &channels) { + size_t sz = 0; + + // Calculate total size. + for (size_t c = 0; c < channels.size(); c++) { + sz += channels[c].name.length() + 1; // +1 for \0 + sz += 16; // 4 * int + } + data.resize(sz + 1); + + unsigned char *p = &data.at(0); + + for (size_t c = 0; c < channels.size(); c++) { + memcpy(p, channels[c].name.c_str(), channels[c].name.length()); + p += channels[c].name.length(); + (*p) = '\0'; + p++; + + int pixel_type = channels[c].requested_pixel_type; + int x_sampling = channels[c].x_sampling; + int y_sampling = channels[c].y_sampling; + tinyexr::swap4(&pixel_type); + tinyexr::swap4(&x_sampling); + tinyexr::swap4(&y_sampling); + + memcpy(p, &pixel_type, sizeof(int)); + p += sizeof(int); + + (*p) = channels[c].p_linear; + p += 4; + + memcpy(p, &x_sampling, sizeof(int)); + p += sizeof(int); + + memcpy(p, &y_sampling, sizeof(int)); + p += sizeof(int); + } + + (*p) = '\0'; +} + +static bool CompressZip(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + // + // Compress the data using miniz + // + + mz_ulong outSize = mz_compressBound(src_size); + int ret = mz_compress( + dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + if (ret != MZ_OK) { + return false; + } + + compressedSize = outSize; +#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1) + int outSize; + unsigned char* ret = stbi_zlib_compress(const_cast(&tmpBuf.at(0)), src_size, &outSize, 8); + if (!ret) { + return false; + } + memcpy(dst, ret, outSize); + free(ret); + + compressedSize = outSize; +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + uint64_t dstSize = nanoz_compressBound(static_cast(src_size)); + int outSize{0}; + unsigned char *ret = nanoz_compress(&tmpBuf.at(0), src_size, &outSize, /* quality */8); + if (!ret) { + return false; + } + + memcpy(dst, ret, outSize); + free(ret); + + compressedSize = outSize; +#else + uLong outSize = compressBound(static_cast(src_size)); + int ret = compress(dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + if (ret != Z_OK) { + return false; + } + + compressedSize = outSize; +#endif + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } + + return true; +} + +static bool DecompressZip(unsigned char *dst, + unsigned long *uncompressed_size /* inout */, + const unsigned char *src, unsigned long src_size) { + if ((*uncompressed_size) == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + std::vector tmpBuf(*uncompressed_size); + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + int ret = + mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (MZ_OK != ret) { + return false; + } +#elif TINYEXR_USE_STB_ZLIB + int ret = stbi_zlib_decode_buffer(reinterpret_cast(&tmpBuf.at(0)), + *uncompressed_size, reinterpret_cast(src), src_size); + if (ret < 0) { + return false; + } +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + uint64_t dest_size = (*uncompressed_size); + uint64_t uncomp_size{0}; + nanoz_status_t ret = + nanoz_uncompress(src, src_size, dest_size, &tmpBuf.at(0), &uncomp_size); + if (NANOZ_SUCCESS != ret) { + return false; + } + if ((*uncompressed_size) != uncomp_size) { + return false; + } +#else + int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (Z_OK != ret) { + return false; + } +#endif + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (*uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + (*uncompressed_size); + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +// RLE code from OpenEXR -------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) // nonstandard extension used : non-constant + // aggregate initializer (also supported by GNU + // C and C99, so no big deal) +#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is + // deprecated. Instead, use the ISO C and C++ + // conformant name: _strdup. +#endif + +const int MIN_RUN_LENGTH = 3; +const int MAX_RUN_LENGTH = 127; + +// +// Compress an array of bytes, using run-length encoding, +// and return the length of the compressed data. +// + +static int rleCompress(int inLength, const char in[], signed char out[]) { + const char *inEnd = in + inLength; + const char *runStart = in; + const char *runEnd = in + 1; + signed char *outWrite = out; + + while (runStart < inEnd) { + while (runEnd < inEnd && *runStart == *runEnd && + runEnd - runStart - 1 < MAX_RUN_LENGTH) { + ++runEnd; + } + + if (runEnd - runStart >= MIN_RUN_LENGTH) { + // + // Compressible run + // + + *outWrite++ = static_cast(runEnd - runStart) - 1; + *outWrite++ = *(reinterpret_cast(runStart)); + runStart = runEnd; + } else { + // + // Uncompressable run + // + + while (runEnd < inEnd && + ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || + (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && + runEnd - runStart < MAX_RUN_LENGTH) { + ++runEnd; + } + + *outWrite++ = static_cast(runStart - runEnd); + + while (runStart < runEnd) { + *outWrite++ = *(reinterpret_cast(runStart++)); + } + } + + ++runEnd; + } + + return static_cast(outWrite - out); +} + +// +// Uncompress an array of bytes compressed with rleCompress(). +// Returns the length of the uncompressed data, or 0 if the +// length of the uncompressed data would be more than maxLength. +// + +static int rleUncompress(int inLength, int maxLength, const signed char in[], + char out[]) { + char *outStart = out; + + while (inLength > 0) { + if (*in < 0) { + int count = -(static_cast(*in++)); + inLength -= count + 1; + + // Fixes #116: Add bounds check to in buffer. + if ((0 > (maxLength -= count)) || (inLength < 0)) return 0; + + memcpy(out, in, count); + out += count; + in += count; + } else { + int count = *in++; + inLength -= 2; + + if ((0 > (maxLength -= count + 1)) || (inLength < 0)) return 0; + + memset(out, *reinterpret_cast(in), count + 1); + out += count + 1; + + in++; + } + } + + return static_cast(out - outStart); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +// End of RLE code from OpenEXR ----------------------------------- + +static bool CompressRle(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + + // outSize will be (srcSiz * 3) / 2 at max. + int outSize = rleCompress(static_cast(src_size), + reinterpret_cast(&tmpBuf.at(0)), + reinterpret_cast(dst)); + TINYEXR_CHECK_AND_RETURN_C(outSize > 0, false); + + compressedSize = static_cast(outSize); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } + + return true; +} + +static bool DecompressRle(unsigned char *dst, + const unsigned long uncompressed_size, + const unsigned char *src, unsigned long src_size) { + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + + // Workaround for issue #112. + // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`. + if (src_size <= 2) { + return false; + } + + std::vector tmpBuf(uncompressed_size); + + int ret = rleUncompress(static_cast(src_size), + static_cast(uncompressed_size), + reinterpret_cast(src), + reinterpret_cast(&tmpBuf.at(0))); + if (ret != static_cast(uncompressed_size)) { + return false; + } + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + uncompressed_size; + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +#if TINYEXR_USE_PIZ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" + +#if __has_warning("-Wcast-qual") +#pragma clang diagnostic ignored "-Wcast-qual" +#endif + +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif + +#endif + +// +// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp +// +// ----------------------------------------------------------------- +// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC) +// (3 clause BSD license) +// + +struct PIZChannelData { + unsigned short *start; + unsigned short *end; + int nx; + int ny; + int ys; + int size; +}; + +//----------------------------------------------------------------------------- +// +// 16-bit Haar Wavelet encoding and decoding +// +// The source code in this file is derived from the encoding +// and decoding routines written by Christian Rouet for his +// PIZ image file format. +// +//----------------------------------------------------------------------------- + +// +// Wavelet basis functions without modulo arithmetic; they produce +// the best compression ratios when the wavelet-transformed data are +// Huffman-encoded, but the wavelet transform works only for 14-bit +// data (untransformed data values must be less than (1 << 14)). +// + +inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + short as = static_cast(a); + short bs = static_cast(b); + + short ms = (as + bs) >> 1; + short ds = as - bs; + + l = static_cast(ms); + h = static_cast(ds); +} + +inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + short ls = static_cast(l); + short hs = static_cast(h); + + int hi = hs; + int ai = ls + (hi & 1) + (hi >> 1); + + short as = static_cast(ai); + short bs = static_cast(ai - hi); + + a = static_cast(as); + b = static_cast(bs); +} + +// +// Wavelet basis functions with modulo arithmetic; they work with full +// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't +// compress the data quite as well. +// + +const int NBITS = 16; +const int A_OFFSET = 1 << (NBITS - 1); +const int M_OFFSET = 1 << (NBITS - 1); +const int MOD_MASK = (1 << NBITS) - 1; + +inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + int ao = (a + A_OFFSET) & MOD_MASK; + int m = ((ao + b) >> 1); + int d = ao - b; + + if (d < 0) m = (m + M_OFFSET) & MOD_MASK; + + d &= MOD_MASK; + + l = static_cast(m); + h = static_cast(d); +} + +inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + int m = l; + int d = h; + int bb = (m - (d >> 1)) & MOD_MASK; + int aa = (d + bb - A_OFFSET) & MOD_MASK; + b = static_cast(bb); + a = static_cast(aa); +} + +// +// 2D Wavelet encoding: +// + +static void wav2Encode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; // == 1 << level + int p2 = 2; // == 1 << (level+1) + + // + // Hierarchical loop on smaller dimension n + // + + while (p2 <= n) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet encoding + // + + if (w14) { + wenc14(*px, *p01, i00, i01); + wenc14(*p10, *p11, i10, i11); + wenc14(i00, i10, *px, *p10); + wenc14(i01, i11, *p01, *p11); + } else { + wenc16(*px, *p01, i00, i01); + wenc16(*p10, *p11, i10, i11); + wenc16(i00, i10, *px, *p10); + wenc16(i01, i11, *p01, *p11); + } + } + + // + // Encode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wenc14(*px, *p10, i00, *p10); + else + wenc16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Encode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wenc14(*px, *p01, i00, *p01); + else + wenc16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p = p2; + p2 <<= 1; + } +} + +// +// 2D Wavelet decoding: +// + +static void wav2Decode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; + int p2; + + // + // Search max level + // + + while (p <= n) p <<= 1; + + p >>= 1; + p2 = p; + p >>= 1; + + // + // Hierarchical loop on smaller dimension n + // + + while (p >= 1) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet decoding + // + + if (w14) { + wdec14(*px, *p10, i00, i10); + wdec14(*p01, *p11, i01, i11); + wdec14(i00, i01, *px, *p01); + wdec14(i10, i11, *p10, *p11); + } else { + wdec16(*px, *p10, i00, i10); + wdec16(*p01, *p11, i01, i11); + wdec16(i00, i01, *px, *p01); + wdec16(i10, i11, *p10, *p11); + } + } + + // + // Decode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wdec14(*px, *p10, i00, *p10); + else + wdec16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Decode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wdec14(*px, *p01, i00, *p01); + else + wdec16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p2 = p; + p >>= 1; + } +} + +//----------------------------------------------------------------------------- +// +// 16-bit Huffman compression and decompression. +// +// The source code in this file is derived from the 8-bit +// Huffman compression and decompression routines written +// by Christian Rouet for his PIZ image file format. +// +//----------------------------------------------------------------------------- + +// Adds some modification for tinyexr. + +const int HUF_ENCBITS = 16; // literal (value) bit length +const int HUF_DECBITS = 14; // decoding bit size (>= 8) + +const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size +const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size +const int HUF_DECMASK = HUF_DECSIZE - 1; + +struct HufDec { // short code long code + //------------------------------- + unsigned int len : 8; // code length 0 + unsigned int lit : 24; // lit p size + unsigned int *p; // 0 lits +}; + +inline long long hufLength(long long code) { return code & 63; } + +inline long long hufCode(long long code) { return code >> 6; } + +inline void outputBits(int nBits, long long bits, long long &c, int &lc, + char *&out) { + c <<= nBits; + lc += nBits; + + c |= bits; + + while (lc >= 8) *out++ = static_cast((c >> (lc -= 8))); +} + +inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { + while (lc < nBits) { + c = (c << 8) | *(reinterpret_cast(in++)); + lc += 8; + } + + lc -= nBits; + return (c >> lc) & ((1 << nBits) - 1); +} + +// +// ENCODING TABLE BUILDING & (UN)PACKING +// + +// +// Build a "canonical" Huffman code table: +// - for each (uncompressed) symbol, hcode contains the length +// of the corresponding code (in the compressed data) +// - canonical codes are computed and stored in hcode +// - the rules for constructing canonical codes are as follows: +// * shorter codes (if filled with zeroes to the right) +// have a numerically higher value than longer codes +// * for codes with the same length, numerical values +// increase with numerical symbol values +// - because the canonical code table can be constructed from +// symbol lengths alone, the code table can be transmitted +// without sending the actual code values +// - see http://www.compressconsult.com/huffman/ +// + +static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { + long long n[59]; + + // + // For each i from 0 through 58, count the + // number of different codes of length i, and + // store the count in n[i]. + // + + for (int i = 0; i <= 58; ++i) n[i] = 0; + + for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; + + // + // For each i from 58 through 1, compute the + // numerically lowest code with length i, and + // store that code in n[i]. + // + + long long c = 0; + + for (int i = 58; i > 0; --i) { + long long nc = ((c + n[i]) >> 1); + n[i] = c; + c = nc; + } + + // + // hcode[i] contains the length, l, of the + // code for symbol i. Assign the next available + // code of length l to the symbol and store both + // l and the code in hcode[i]. + // + + for (int i = 0; i < HUF_ENCSIZE; ++i) { + int l = static_cast(hcode[i]); + + if (l > 0) hcode[i] = l | (n[l]++ << 6); + } +} + +// +// Compute Huffman codes (based on frq input) and store them in frq: +// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; +// - max code length is 58 bits; +// - codes outside the range [im-iM] have a null length (unused values); +// - original frequencies are destroyed; +// - encoding tables are used by hufEncode() and hufBuildDecTable(); +// + +struct FHeapCompare { + bool operator()(long long *a, long long *b) { return *a > *b; } +}; + +static bool hufBuildEncTable( + long long *frq, // io: input frequencies [HUF_ENCSIZE], output table + int *im, // o: min frq index + int *iM) // o: max frq index +{ + // + // This function assumes that when it is called, array frq + // indicates the frequency of all possible symbols in the data + // that are to be Huffman-encoded. (frq[i] contains the number + // of occurrences of symbol i in the data.) + // + // The loop below does three things: + // + // 1) Finds the minimum and maximum indices that point + // to non-zero entries in frq: + // + // frq[im] != 0, and frq[i] == 0 for all i < im + // frq[iM] != 0, and frq[i] == 0 for all i > iM + // + // 2) Fills array fHeap with pointers to all non-zero + // entries in frq. + // + // 3) Initializes array hlink such that hlink[i] == i + // for all array entries. + // + + std::vector hlink(HUF_ENCSIZE); + std::vector fHeap(HUF_ENCSIZE); + + *im = 0; + + while (!frq[*im]) (*im)++; + + int nf = 0; + + for (int i = *im; i < HUF_ENCSIZE; i++) { + hlink[i] = i; + + if (frq[i]) { + fHeap[nf] = &frq[i]; + nf++; + *iM = i; + } + } + + // + // Add a pseudo-symbol, with a frequency count of 1, to frq; + // adjust the fHeap and hlink array accordingly. Function + // hufEncode() uses the pseudo-symbol for run-length encoding. + // + + (*iM)++; + frq[*iM] = 1; + fHeap[nf] = &frq[*iM]; + nf++; + + // + // Build an array, scode, such that scode[i] contains the number + // of bits assigned to symbol i. Conceptually this is done by + // constructing a tree whose leaves are the symbols with non-zero + // frequency: + // + // Make a heap that contains all symbols with a non-zero frequency, + // with the least frequent symbol on top. + // + // Repeat until only one symbol is left on the heap: + // + // Take the two least frequent symbols off the top of the heap. + // Create a new node that has first two nodes as children, and + // whose frequency is the sum of the frequencies of the first + // two nodes. Put the new node back into the heap. + // + // The last node left on the heap is the root of the tree. For each + // leaf node, the distance between the root and the leaf is the length + // of the code for the corresponding symbol. + // + // The loop below doesn't actually build the tree; instead we compute + // the distances of the leaves from the root on the fly. When a new + // node is added to the heap, then that node's descendants are linked + // into a single linear list that starts at the new node, and the code + // lengths of the descendants (that is, their distance from the root + // of the tree) are incremented by one. + // + + std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + std::vector scode(HUF_ENCSIZE); + memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); + + while (nf > 1) { + // + // Find the indices, mm and m, of the two smallest non-zero frq + // values in fHeap, add the smallest frq to the second-smallest + // frq, and remove the smallest frq value from fHeap. + // + + int mm = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + --nf; + + int m = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + frq[m] += frq[mm]; + std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + // + // The entries in scode are linked into lists with the + // entries in hlink serving as "next" pointers and with + // the end of a list marked by hlink[j] == j. + // + // Traverse the lists that start at scode[m] and scode[mm]. + // For each element visited, increment the length of the + // corresponding code by one bit. (If we visit scode[j] + // during the traversal, then the code for symbol j becomes + // one bit longer.) + // + // Merge the lists that start at scode[m] and scode[mm] + // into a single list that starts at scode[m]. + // + + // + // Add a bit to all codes in the first list. + // + + for (int j = m;; j = hlink[j]) { + scode[j]++; + + TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false); + + if (hlink[j] == j) { + // + // Merge the two lists. + // + + hlink[j] = mm; + break; + } + } + + // + // Add a bit to all codes in the second list + // + + for (int j = mm;; j = hlink[j]) { + scode[j]++; + + TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false); + + if (hlink[j] == j) break; + } + } + + // + // Build a canonical Huffman code table, replacing the code + // lengths in scode with (code, code length) pairs. Copy the + // code table from scode into frq. + // + + hufCanonicalCodeTable(scode.data()); + memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); + + return true; +} + +// +// Pack an encoding table: +// - only code lengths, not actual codes, are stored +// - runs of zeroes are compressed as follows: +// +// unpacked packed +// -------------------------------- +// 1 zero 0 (6 bits) +// 2 zeroes 59 +// 3 zeroes 60 +// 4 zeroes 61 +// 5 zeroes 62 +// n zeroes (6 or more) 63 n-6 (6 + 8 bits) +// + +const int SHORT_ZEROCODE_RUN = 59; +const int LONG_ZEROCODE_RUN = 63; +const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; +const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; + +static void hufPackEncTable( + const long long *hcode, // i : encoding table [HUF_ENCSIZE] + int im, // i : min hcode index + int iM, // i : max hcode index + char **pcode) // o: ptr to packed table (updated) +{ + char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + int l = hufLength(hcode[im]); + + if (l == 0) { + int zerun = 1; + + while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { + if (hufLength(hcode[im + 1]) > 0) break; + im++; + zerun++; + } + + if (zerun >= 2) { + if (zerun >= SHORTEST_LONG_RUN) { + outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); + outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); + } else { + outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); + } + continue; + } + } + + outputBits(6, l, c, lc, p); + } + + if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); + + *pcode = p; +} + +// +// Unpack an encoding table packed by hufPackEncTable(): +// + +static bool hufUnpackEncTable( + const char **pcode, // io: ptr to packed table (updated) + int ni, // i : input size (in bytes) + int im, // i : min hcode index + int iM, // i : max hcode index + long long *hcode) // o: encoding table [HUF_ENCSIZE] +{ + memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); + + const char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + if (p - *pcode >= ni) { + return false; + } + + long long l = hcode[im] = getBits(6, c, lc, p); // code length + + if (l == (long long)LONG_ZEROCODE_RUN) { + if (p - *pcode > ni) { + return false; + } + + int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } else if (l >= (long long)SHORT_ZEROCODE_RUN) { + int zerun = l - SHORT_ZEROCODE_RUN + 2; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } + } + + *pcode = const_cast(p); + + hufCanonicalCodeTable(hcode); + + return true; +} + +// +// DECODING TABLE BUILDING +// + +// +// Clear a newly allocated decoding table so that it contains only zeroes. +// + +static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + hdecod[i].len = 0; + hdecod[i].lit = 0; + hdecod[i].p = NULL; + } + // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); +} + +// +// Build a decoding hash table based on the encoding table hcode: +// - short codes (<= HUF_DECBITS) are resolved with a single table access; +// - long code entry allocations are not optimized, because long codes are +// unfrequent; +// - decoding tables are used by hufDecode(); +// + +static bool hufBuildDecTable(const long long *hcode, // i : encoding table + int im, // i : min index in hcode + int iM, // i : max index in hcode + HufDec *hdecod) // o: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + // + // Init hashtable & loop on all codes. + // Assumes that hufClearDecTable(hdecod) has already been called. + // + + for (; im <= iM; im++) { + long long c = hufCode(hcode[im]); + int l = hufLength(hcode[im]); + + if (c >> l) { + // + // Error: c is supposed to be an l-bit code, + // but c contains a value that is greater + // than the largest l-bit number. + // + + // invalidTableEntry(); + return false; + } + + if (l > HUF_DECBITS) { + // + // Long code: add a secondary entry + // + + HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); + + if (pl->len) { + // + // Error: a short code has already + // been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->lit++; + + if (pl->p) { + unsigned int *p = pl->p; + pl->p = new unsigned int[pl->lit]; + + for (unsigned int i = 0; i < pl->lit - 1u; ++i) pl->p[i] = p[i]; + + delete[] p; + } else { + pl->p = new unsigned int[1]; + } + + pl->p[pl->lit - 1] = im; + } else if (l) { + // + // Short code: init all primary entries + // + + HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); + + for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { + if (pl->len || pl->p) { + // + // Error: a short code or a long code has + // already been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->len = l; + pl->lit = im; + } + } + } + + return true; +} + +// +// Free the long code entries of a decoding table built by hufBuildDecTable() +// + +static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + if (hdecod[i].p) { + delete[] hdecod[i].p; + hdecod[i].p = 0; + } + } +} + +// +// ENCODING +// + +inline void outputCode(long long code, long long &c, int &lc, char *&out) { + outputBits(hufLength(code), hufCode(code), c, lc, out); +} + +inline void sendCode(long long sCode, int runCount, long long runCode, + long long &c, int &lc, char *&out) { + // + // Output a run of runCount instances of the symbol sCount. + // Output the symbols explicitly, or if that is shorter, output + // the sCode symbol once followed by a runCode symbol and runCount + // expressed as an 8-bit number. + // + + if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { + outputCode(sCode, c, lc, out); + outputCode(runCode, c, lc, out); + outputBits(8, runCount, c, lc, out); + } else { + while (runCount-- >= 0) outputCode(sCode, c, lc, out); + } +} + +// +// Encode (compress) ni values based on the Huffman encoding table hcode: +// + +static int hufEncode // return: output size (in bits) + (const long long *hcode, // i : encoding table + const unsigned short *in, // i : uncompressed input buffer + const int ni, // i : input buffer size (in bytes) + int rlc, // i : rl code + char *out) // o: compressed output buffer +{ + char *outStart = out; + long long c = 0; // bits not yet written to out + int lc = 0; // number of valid bits in c (LSB) + int s = in[0]; + int cs = 0; + + // + // Loop on input values + // + + for (int i = 1; i < ni; i++) { + // + // Count same values or send code + // + + if (s == in[i] && cs < 255) { + cs++; + } else { + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + cs = 0; + } + + s = in[i]; + } + + // + // Send remaining code + // + + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + + if (lc) *out = (c << (8 - lc)) & 0xff; + + return (out - outStart) * 8 + lc; +} + +// +// DECODING +// + +// +// In order to force the compiler to inline them, +// getChar() and getCode() are implemented as macros +// instead of "inline" functions. +// + +#define getChar(c, lc, in) \ + { \ + c = (c << 8) | *(unsigned char *)(in++); \ + lc += 8; \ + } + +#if 0 +#define getCode(po, rlc, c, lc, in, out, ob, oe) \ + { \ + if (po == rlc) { \ + if (lc < 8) getChar(c, lc, in); \ + \ + lc -= 8; \ + \ + unsigned char cs = (c >> lc); \ + \ + if (out + cs > oe) return false; \ + \ + /* TinyEXR issue 78 */ \ + unsigned short s = out[-1]; \ + \ + while (cs-- > 0) *out++ = s; \ + } else if (out < oe) { \ + *out++ = po; \ + } else { \ + return false; \ + } \ + } +#else +static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, + const char *in_end, unsigned short *&out, + const unsigned short *ob, const unsigned short *oe) { + (void)ob; + if (po == rlc) { + if (lc < 8) { + /* TinyEXR issue 78 */ + /* TinyEXR issue 160. in + 1 -> in */ + if (in >= in_end) { + return false; + } + + getChar(c, lc, in); + } + + lc -= 8; + + unsigned char cs = (c >> lc); + + if (out + cs > oe) return false; + + // Bounds check for safety + // Issue 100. + if ((out - 1) < ob) return false; + unsigned short s = out[-1]; + + while (cs-- > 0) *out++ = s; + } else if (out < oe) { + *out++ = po; + } else { + return false; + } + return true; +} +#endif + +// +// Decode (uncompress) ni bits based on encoding & decoding tables: +// + +static bool hufDecode(const long long *hcode, // i : encoding table + const HufDec *hdecod, // i : decoding table + const char *in, // i : compressed input buffer + int ni, // i : input size (in bits) + int rlc, // i : run-length code + int no, // i : expected output size (in bytes) + unsigned short *out) // o: uncompressed output buffer +{ + long long c = 0; + int lc = 0; + unsigned short *outb = out; // begin + unsigned short *oe = out + no; // end + const char *ie = in + (ni + 7) / 8; // input byte size + + // + // Loop on input bytes + // + + while (in < ie) { + getChar(c, lc, in); + + // + // Access decoding table + // + + while (lc >= HUF_DECBITS) { + const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; + + if (pl.len) { + // + // Get short code + // + + lc -= pl.len; + // std::cout << "lit = " << pl.lit << std::endl; + // std::cout << "rlc = " << rlc << std::endl; + // std::cout << "c = " << c << std::endl; + // std::cout << "lc = " << lc << std::endl; + // std::cout << "in = " << in << std::endl; + // std::cout << "out = " << out << std::endl; + // std::cout << "oe = " << oe << std::endl; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + if (!pl.p) { + return false; + } + // invalidCode(); // wrong code + + // + // Search long code + // + + unsigned int j; + + for (j = 0; j < pl.lit; j++) { + int l = hufLength(hcode[pl.p[j]]); + + while (lc < l && in < ie) // get more bits + getChar(c, lc, in); + + if (lc >= l) { + if (hufCode(hcode[pl.p[j]]) == + ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { + // + // Found : get long code + // + + lc -= l; + if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + break; + } + } + } + + if (j == pl.lit) { + return false; + // invalidCode(); // Not found + } + } + } + } + + // + // Get remaining (short) codes + // + + int i = (8 - ni) & 7; + c >>= i; + lc -= i; + + while (lc > 0) { + const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; + + if (pl.len) { + lc -= pl.len; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + return false; + // invalidCode(); // wrong (long) code + } + } + + if (out - outb != no) { + return false; + } + // notEnoughData (); + + return true; +} + +static void countFrequencies(std::vector &freq, + const unsigned short data[/*n*/], int n) { + for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; + + for (int i = 0; i < n; ++i) ++freq[data[i]]; +} + +static void writeUInt(char buf[4], unsigned int i) { + unsigned char *b = (unsigned char *)buf; + + b[0] = i; + b[1] = i >> 8; + b[2] = i >> 16; + b[3] = i >> 24; +} + +static unsigned int readUInt(const char buf[4]) { + const unsigned char *b = (const unsigned char *)buf; + + return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | + ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); +} + +// +// EXTERNAL INTERFACE +// + +static int hufCompress(const unsigned short raw[], int nRaw, + char compressed[]) { + if (nRaw == 0) return 0; + + std::vector freq(HUF_ENCSIZE); + + countFrequencies(freq, raw, nRaw); + + int im = 0; + int iM = 0; + hufBuildEncTable(freq.data(), &im, &iM); + + char *tableStart = compressed + 20; + char *tableEnd = tableStart; + hufPackEncTable(freq.data(), im, iM, &tableEnd); + int tableLength = tableEnd - tableStart; + + char *dataStart = tableEnd; + int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); + int data_length = (nBits + 7) / 8; + + writeUInt(compressed, im); + writeUInt(compressed + 4, iM); + writeUInt(compressed + 8, tableLength); + writeUInt(compressed + 12, nBits); + writeUInt(compressed + 16, 0); // room for future extensions + + return dataStart + data_length - compressed; +} + +static bool hufUncompress(const char compressed[], int nCompressed, + std::vector *raw) { + if (nCompressed == 0) { + if (raw->size() != 0) return false; + + return false; + } + + int im = readUInt(compressed); + int iM = readUInt(compressed + 4); + // int tableLength = readUInt (compressed + 8); + int nBits = readUInt(compressed + 12); + + if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; + + const char *ptr = compressed + 20; + + // + // Fast decoder needs at least 2x64-bits of compressed data, and + // needs to be run-able on this platform. Otherwise, fall back + // to the original decoder + // + + // if (FastHufDecoder::enabled() && nBits > 128) + //{ + // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); + // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); + //} + // else + { + std::vector freq(HUF_ENCSIZE); + std::vector hdec(HUF_DECSIZE); + + hufClearDecTable(&hdec.at(0)); + + hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, + &freq.at(0)); + + { + if (nBits > 8 * (nCompressed - (ptr - compressed))) { + return false; + } + + hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); + hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(), + raw->data()); + } + // catch (...) + //{ + // hufFreeDecTable (hdec); + // throw; + //} + + hufFreeDecTable(&hdec.at(0)); + } + + return true; +} + +// +// Functions to compress the range of values in the pixel data +// + +const int USHORT_RANGE = (1 << 16); +const int BITMAP_SIZE = (USHORT_RANGE >> 3); + +static void bitmapFromData(const unsigned short data[/*nData*/], int nData, + unsigned char bitmap[BITMAP_SIZE], + unsigned short &minNonZero, + unsigned short &maxNonZero) { + for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; + + for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); + + bitmap[0] &= ~1; // zero is not explicitly stored in + // the bitmap; we assume that the + // data always contain zeroes + minNonZero = BITMAP_SIZE - 1; + maxNonZero = 0; + + for (int i = 0; i < BITMAP_SIZE; ++i) { + if (bitmap[i]) { + if (minNonZero > i) minNonZero = i; + if (maxNonZero < i) maxNonZero = i; + } + } +} + +static unsigned short forwardLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) + lut[i] = k++; + else + lut[i] = 0; + } + + return k - 1; // maximum value stored in lut[], +} // i.e. number of ones in bitmap minus 1 + +static unsigned short reverseLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; + } + + int n = k - 1; + + while (k < USHORT_RANGE) lut[k++] = 0; + + return n; // maximum k where lut[k] is non-zero, +} // i.e. number of ones in bitmap minus 1 + +static void applyLut(const unsigned short lut[USHORT_RANGE], + unsigned short data[/*nData*/], int nData) { + for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, + const unsigned char *inPtr, size_t inSize, + const std::vector &channelInfo, + int data_width, int num_lines) { + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !TINYEXR_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + return false; +#endif + + // Assume `inSize` is multiple of 2 or 4. + std::vector tmpBuffer(inSize / sizeof(unsigned short)); + + std::vector channelData(channelInfo.size()); + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t c = 0; c < channelData.size(); c++) { + PIZChannelData &cd = channelData[c]; + + cd.start = tmpBufferEnd; + cd.end = cd.start; + + cd.nx = data_width; + cd.ny = num_lines; + // cd.ys = c.channel().ySampling; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (channelInfo[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + cd.size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += cd.nx * cd.ny * cd.size; + } + + const unsigned char *ptr = inPtr; + for (int y = 0; y < num_lines; ++y) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(cd.end, ptr, n * sizeof(unsigned short)); + ptr += n * sizeof(unsigned short); + cd.end += n; + } + } + + bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), + bitmap.data(), minNonZero, maxNonZero); + + std::vector lut(USHORT_RANGE); + unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); + + // + // Store range compression info in _outBuffer + // + + char *buf = reinterpret_cast(outPtr); + + memcpy(buf, &minNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + memcpy(buf, &maxNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + + if (minNonZero <= maxNonZero) { + memcpy(buf, reinterpret_cast(&bitmap[0] + minNonZero), + maxNonZero - minNonZero + 1); + buf += maxNonZero - minNonZero + 1; + } + + // + // Apply wavelet encoding + // + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Apply Huffman encoding; append the result to _outBuffer + // + + // length header(4byte), then huff data. Initialize length header with zero, + // then later fill it by `length`. + char *lengthPtr = buf; + int zero = 0; + memcpy(buf, &zero, sizeof(int)); + buf += sizeof(int); + + int length = + hufCompress(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), buf); + memcpy(lengthPtr, &length, sizeof(int)); + + (*outSize) = static_cast( + (reinterpret_cast(buf) - outPtr) + + static_cast(length)); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if ((*outSize) >= inSize) { + (*outSize) = static_cast(inSize); + memcpy(outPtr, inPtr, inSize); + } + return true; +} + +static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, + size_t tmpBufSizeInBytes, size_t inLen, int num_channels, + const EXRChannelInfo *channels, int data_width, + int num_lines) { + if (inLen == tmpBufSizeInBytes) { + // Data is not compressed(Issue 40). + memcpy(outPtr, inPtr, inLen); + return true; + } + + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !TINYEXR_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + return false; +#endif + + memset(bitmap.data(), 0, BITMAP_SIZE); + + if (inLen < 4) { + return false; + } + + size_t readLen = 0; + + const unsigned char *ptr = inPtr; + // minNonZero = *(reinterpret_cast(ptr)); + tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); + // maxNonZero = *(reinterpret_cast(ptr + 2)); + tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); + ptr += 4; + readLen += 4; + + if (maxNonZero >= BITMAP_SIZE) { + return false; + } + + //printf("maxNonZero = %d\n", maxNonZero); + //printf("minNonZero = %d\n", minNonZero); + //printf("len = %d\n", (maxNonZero - minNonZero + 1)); + //printf("BITMAPSIZE - min = %d\n", (BITMAP_SIZE - minNonZero)); + + if (minNonZero <= maxNonZero) { + if (((maxNonZero - minNonZero + 1) + readLen) > inLen) { + // Input too short + return false; + } + + memcpy(reinterpret_cast(&bitmap[0] + minNonZero), ptr, + maxNonZero - minNonZero + 1); + ptr += maxNonZero - minNonZero + 1; + readLen += maxNonZero - minNonZero + 1; + } else { + // Issue 194 + if ((minNonZero == (BITMAP_SIZE - 1)) && (maxNonZero == 0)) { + // OK. all pixels are zero. And no need to read `bitmap` data. + } else { + // invalid minNonZero/maxNonZero combination. + return false; + } + } + + std::vector lut(USHORT_RANGE); + memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); + unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); + + // + // Huffman decoding + // + + if ((readLen + 4) > inLen) { + return false; + } + + int length=0; + + // length = *(reinterpret_cast(ptr)); + tinyexr::cpy4(&length, reinterpret_cast(ptr)); + ptr += sizeof(int); + + if (size_t((ptr - inPtr) + length) > inLen) { + return false; + } + + std::vector tmpBuffer(tmpBufSizeInBytes / sizeof(unsigned short)); + hufUncompress(reinterpret_cast(ptr), length, &tmpBuffer); + + // + // Wavelet decoding + // + + std::vector channelData(static_cast(num_channels)); + + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t i = 0; i < static_cast(num_channels); ++i) { + const EXRChannelInfo &chan = channels[i]; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + channelData[i].start = tmpBufferEnd; + channelData[i].end = channelData[i].start; + channelData[i].nx = data_width; + channelData[i].ny = num_lines; + // channelData[i].ys = 1; + channelData[i].size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; + } + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Expand the pixel data to their original range + // + + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSizeInBytes / sizeof(unsigned short))); + + for (int y = 0; y < num_lines; y++) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(outPtr, cd.end, static_cast(n * sizeof(unsigned short))); + outPtr += n * sizeof(unsigned short); + cd.end += n; + } + } + + return true; +} +#endif // TINYEXR_USE_PIZ + +#if TINYEXR_USE_ZFP + +struct ZFPCompressionParam { + double rate; + unsigned int precision; + unsigned int __pad0; + double tolerance; + int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* + unsigned int __pad1; + + ZFPCompressionParam() { + type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; + rate = 2.0; + precision = 0; + tolerance = 0.0; + } +}; + +static bool FindZFPCompressionParam(ZFPCompressionParam *param, + const EXRAttribute *attributes, + int num_attributes, std::string *err) { + bool foundType = false; + + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionType") == 0)) { + if (attributes[i].size == 1) { + param->type = static_cast(attributes[i].value[0]); + foundType = true; + break; + } else { + if (err) { + (*err) += + "zfpCompressionType attribute must be uchar(1 byte) type.\n"; + } + return false; + } + } + } + + if (!foundType) { + if (err) { + (*err) += "`zfpCompressionType` attribute not found.\n"; + } + return false; + } + + if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && + (attributes[i].size == 8)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionRate` attribute not found.\n"; + } + + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && + (attributes[i].size == 4)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionPrecision` attribute not found.\n"; + } + + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && + (attributes[i].size == 8)) { + param->tolerance = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionTolerance` attribute not found.\n"; + } + } else { + if (err) { + (*err) += "Unknown value specified for `zfpCompressionType`.\n"; + } + } + + return false; +} + +// Assume pixel format is FLOAT for all channels. +static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, + size_t num_channels, const unsigned char *src, + unsigned long src_size, + const ZFPCompressionParam ¶m) { + size_t uncompressed_size = + size_t(dst_width) * size_t(dst_num_lines) * num_channels; + + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + } + + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + TINYEXR_CHECK_AND_RETURN_C((dst_width % 4) == 0, false); + TINYEXR_CHECK_AND_RETURN_C((dst_num_lines % 4) == 0, false); + + if ((size_t(dst_width) & 3U) || (size_t(dst_num_lines) & 3U)) { + return false; + } + + field = + zfp_field_2d(reinterpret_cast(const_cast(src)), + zfp_type_float, static_cast(dst_width), + static_cast(dst_num_lines) * + static_cast(num_channels)); + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimension */ 2, + /* write random access */ 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance); + } else { + return false; + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + std::vector buf(buf_size); + memcpy(&buf.at(0), src, src_size); + + bitstream *stream = stream_open(&buf.at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_stream_rewind(zfp); + + size_t image_size = size_t(dst_width) * size_t(dst_num_lines); + + for (size_t c = 0; c < size_t(num_channels); c++) { + // decompress 4x4 pixel block. + for (size_t y = 0; y < size_t(dst_num_lines); y += 4) { + for (size_t x = 0; x < size_t(dst_width); x += 4) { + float fblock[16]; + zfp_decode_block_float_2(zfp, fblock); + for (size_t j = 0; j < 4; j++) { + for (size_t i = 0; i < 4; i++) { + dst[c * image_size + ((y + j) * size_t(dst_width) + (x + i))] = + fblock[j * 4 + i]; + } + } + } + } + } + + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(stream); + + return true; +} + +// Assume pixel format is FLOAT for all channels. +static bool CompressZfp(std::vector *outBuf, + unsigned int *outSize, const float *inPtr, int width, + int num_lines, int num_channels, + const ZFPCompressionParam ¶m) { + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + TINYEXR_CHECK_AND_RETURN_C((width % 4) == 0, false); + TINYEXR_CHECK_AND_RETURN_C((num_lines % 4) == 0, false); + + if ((size_t(width) & 3U) || (size_t(num_lines) & 3U)) { + return false; + } + + // create input array. + field = zfp_field_2d(reinterpret_cast(const_cast(inPtr)), + zfp_type_float, static_cast(width), + static_cast(num_lines * num_channels)); + + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance); + } else { + return false; + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + + outBuf->resize(buf_size); + + bitstream *stream = stream_open(&outBuf->at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_field_free(field); + + size_t image_size = size_t(width) * size_t(num_lines); + + for (size_t c = 0; c < size_t(num_channels); c++) { + // compress 4x4 pixel block. + for (size_t y = 0; y < size_t(num_lines); y += 4) { + for (size_t x = 0; x < size_t(width); x += 4) { + float fblock[16]; + for (size_t j = 0; j < 4; j++) { + for (size_t i = 0; i < 4; i++) { + fblock[j * 4 + i] = + inPtr[c * image_size + ((y + j) * size_t(width) + (x + i))]; + } + } + zfp_encode_block_float_2(zfp, fblock); + } + } + } + + zfp_stream_flush(zfp); + (*outSize) = static_cast(zfp_stream_compressed_size(zfp)); + + zfp_stream_close(zfp); + + return true; +} + +#endif + +// +// ----------------------------------------------------------------- +// + +// heuristics +#define TINYEXR_DIMENSION_THRESHOLD (1024 * 8192) + +// TODO(syoyo): Refactor function arguments. +static bool DecodePixelData(/* out */ unsigned char **out_images, + const int *requested_pixel_types, + const unsigned char *data_ptr, size_t data_len, + int compression_type, int line_order, int width, + int height, int x_stride, int y, int line_no, + int num_lines, size_t pixel_data_size, + size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ +#if TINYEXR_USE_PIZ + if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) { + // Invalid input #90 + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast( + static_cast(width * num_lines) * pixel_data_size)); + size_t tmpBufLen = outBuf.size(); + + bool ret = tinyexr::DecompressPiz( + reinterpret_cast(&outBuf.at(0)), data_ptr, tmpBufLen, + data_len, static_cast(num_channels), channels, width, num_lines); + + if (!ret) { + return false; + } + + // For PIZ_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + FP16 hf; + + // hf.u = line_ptr[u]; + // use `cpy` to avoid unaligned memory access when compiler's + // optimization is on. + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + image += offset; + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast(&outBuf.at( + v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } +#else + return false; +#endif + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || + compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false); + if (!tinyexr::DecompressZip( + reinterpret_cast(&outBuf.at(0)), &dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For ZIP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + image += offset; + + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + if (dstLen == 0) { + return false; + } + + if (!tinyexr::DecompressRle( + reinterpret_cast(&outBuf.at(0)), dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For RLE_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + std::string e; + if (!tinyexr::FindZFPCompressionParam(&zfp_compression_param, attributes, + int(num_attributes), &e)) { + // This code path should not be reachable. + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = outBuf.size(); + TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false); + tinyexr::DecompressZfp(reinterpret_cast(&outBuf.at(0)), width, + num_lines, num_channels, data_ptr, + static_cast(data_len), + zfp_compression_param); + + // For ZFP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + TINYEXR_CHECK_AND_RETURN_C(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT, false); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } +#else + (void)attributes; + (void)num_attributes; + (void)num_channels; + return false; +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + for (size_t c = 0; c < num_channels; c++) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + const unsigned short *line_ptr = + reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + outLine[u] = hf.u; + } + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // address may not be aligned. use byte-wise copy for safety.#76 + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + tinyexr::FP32 f32 = half_to_float(hf); + + outLine[u] = f32.f; + } + } else { + return false; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + const float *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + const unsigned int *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + unsigned int *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Corrupted data + return false; + } + + for (int u = 0; u < width; u++) { + + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } + } + } + } + + return true; +} + +static bool DecodeTiledPixelData( + unsigned char **out_images, int *width, int *height, + const int *requested_pixel_types, const unsigned char *data_ptr, + size_t data_len, int compression_type, int line_order, int data_width, + int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, + int tile_size_y, size_t pixel_data_size, size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + // Here, data_width and data_height are the dimensions of the current (sub)level. + if (tile_size_x * tile_offset_x > data_width || + tile_size_y * tile_offset_y > data_height) { + return false; + } + + // Compute actual image size in a tile. + if ((tile_offset_x + 1) * tile_size_x >= data_width) { + (*width) = data_width - (tile_offset_x * tile_size_x); + } else { + (*width) = tile_size_x; + } + + if ((tile_offset_y + 1) * tile_size_y >= data_height) { + (*height) = data_height - (tile_offset_y * tile_size_y); + } else { + (*height) = tile_size_y; + } + + // Image size = tile size. + return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, + compression_type, line_order, (*width), tile_size_y, + /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, + (*height), pixel_data_size, num_attributes, attributes, + num_channels, channels, channel_offset_list); +} + +static bool ComputeChannelLayout(std::vector *channel_offset_list, + int *pixel_data_size, size_t *channel_offset, + int num_channels, + const EXRChannelInfo *channels) { + channel_offset_list->resize(static_cast(num_channels)); + + (*pixel_data_size) = 0; + (*channel_offset) = 0; + + for (size_t c = 0; c < static_cast(num_channels); c++) { + (*channel_offset_list)[c] = (*channel_offset); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + (*pixel_data_size) += sizeof(unsigned short); + (*channel_offset) += sizeof(unsigned short); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + (*pixel_data_size) += sizeof(float); + (*channel_offset) += sizeof(float); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + (*pixel_data_size) += sizeof(unsigned int); + (*channel_offset) += sizeof(unsigned int); + } else { + // ??? + return false; + } + } + return true; +} + +// TODO: Simply return nullptr when failed to allocate? +static unsigned char **AllocateImage(int num_channels, + const EXRChannelInfo *channels, + const int *requested_pixel_types, + int data_width, int data_height, bool *success) { + unsigned char **images = + reinterpret_cast(static_cast( + malloc(sizeof(float *) * static_cast(num_channels)))); + + for (size_t c = 0; c < static_cast(num_channels); c++) { + images[c] = NULL; + } + + bool valid = true; + + for (size_t c = 0; c < static_cast(num_channels); c++) { + size_t data_len = + static_cast(data_width) * static_cast(data_height); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // pixel_data_size += sizeof(unsigned short); + // channel_offset += sizeof(unsigned short); + // Alloc internal image for half type. + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + images[c] = + reinterpret_cast(static_cast( + malloc(sizeof(unsigned short) * data_len))); + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else { + images[c] = NULL; // just in case. + valid = false; + break; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + // pixel_data_size += sizeof(float); + // channel_offset += sizeof(float); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // pixel_data_size += sizeof(unsigned int); + // channel_offset += sizeof(unsigned int); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(unsigned int) * data_len))); + } else { + images[c] = NULL; // just in case. + valid = false; + break; + } + } + + if (!valid) { + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (images[c]) { + free(images[c]); + images[c] = NULL; + } + } + + if (success) { + (*success) = false; + } + } else { + if (success) { + (*success) = true; + } + } + + return images; +} + +#ifdef _WIN32 +static inline std::wstring UTF8ToWchar(const std::string &str) { + int wstr_size = + MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), NULL, 0); + std::wstring wstr(wstr_size, 0); + MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &wstr[0], + (int)wstr.size()); + return wstr; +} +#endif + + +static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, + const EXRVersion *version, std::string *err, + const unsigned char *buf, size_t size) { + const char *marker = reinterpret_cast(&buf[0]); + + if (empty_header) { + (*empty_header) = false; + } + + if (version->multipart) { + if (size > 0 && marker[0] == '\0') { + // End of header list. + if (empty_header) { + (*empty_header) = true; + } + return TINYEXR_SUCCESS; + } + } + + // According to the spec, the header of every OpenEXR file must contain at + // least the following attributes: + // + // channels chlist + // compression compression + // dataWindow box2i + // displayWindow box2i + // lineOrder lineOrder + // pixelAspectRatio float + // screenWindowCenter v2f + // screenWindowWidth float + bool has_channels = false; + bool has_compression = false; + bool has_data_window = false; + bool has_display_window = false; + bool has_line_order = false; + bool has_pixel_aspect_ratio = false; + bool has_screen_window_center = false; + bool has_screen_window_width = false; + bool has_name = false; + bool has_type = false; + + info->name.clear(); + info->type.clear(); + + info->data_window.min_x = 0; + info->data_window.min_y = 0; + info->data_window.max_x = 0; + info->data_window.max_y = 0; + info->line_order = 0; // @fixme + info->display_window.min_x = 0; + info->display_window.min_y = 0; + info->display_window.max_x = 0; + info->display_window.max_y = 0; + info->screen_window_center[0] = 0.0f; + info->screen_window_center[1] = 0.0f; + info->screen_window_width = -1.0f; + info->pixel_aspect_ratio = -1.0f; + + info->tiled = 0; + info->tile_size_x = -1; + info->tile_size_y = -1; + info->tile_level_mode = -1; + info->tile_rounding_mode = -1; + + info->attributes.clear(); + + // Read attributes + size_t orig_size = size; + for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { + if (0 == size) { + if (err) { + (*err) += "Insufficient data size for attributes.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + if (err) { + (*err) += "Failed to read attribute.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + // For a multipart file, the version field 9th bit is 0. + if ((version->tiled || version->multipart || version->non_image) && attr_name.compare("tiles") == 0) { + unsigned int x_size, y_size; + unsigned char tile_mode; + if (data.size() != 9) { + if (err) { + (*err) += "(ParseEXRHeader) Invalid attribute data size. Attribute data size must be 9.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&x_size, &data.at(0), sizeof(int)); + memcpy(&y_size, &data.at(4), sizeof(int)); + tile_mode = data[8]; + tinyexr::swap4(&x_size); + tinyexr::swap4(&y_size); + + if (x_size > static_cast(std::numeric_limits::max()) || + y_size > static_cast(std::numeric_limits::max())) { + if (err) { + (*err) = "Tile sizes were invalid."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->tile_size_x = static_cast(x_size); + info->tile_size_y = static_cast(y_size); + + // mode = levelMode + roundingMode * 16 + info->tile_level_mode = tile_mode & 0x3; + info->tile_rounding_mode = (tile_mode >> 4) & 0x1; + info->tiled = 1; + } else if (attr_name.compare("compression") == 0) { + bool ok = false; + if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { + ok = true; + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + ok = true; +#else + if (err) { + (*err) = "PIZ compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + ok = true; +#else + if (err) { + (*err) = "ZFP compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (!ok) { + if (err) { + (*err) = "Unknown compression type."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->compression_type = static_cast(data[0]); + has_compression = true; + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!ReadChannelInfo(info->channels, data)) { + if (err) { + (*err) += "Failed to parse channel info.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (info->channels.size() < 1) { + if (err) { + (*err) += "# of channels is zero.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + has_channels = true; + + } else if (attr_name.compare("dataWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->data_window.min_x, &data.at(0), sizeof(int)); + memcpy(&info->data_window.min_y, &data.at(4), sizeof(int)); + memcpy(&info->data_window.max_x, &data.at(8), sizeof(int)); + memcpy(&info->data_window.max_y, &data.at(12), sizeof(int)); + tinyexr::swap4(&info->data_window.min_x); + tinyexr::swap4(&info->data_window.min_y); + tinyexr::swap4(&info->data_window.max_x); + tinyexr::swap4(&info->data_window.max_y); + has_data_window = true; + } + } else if (attr_name.compare("displayWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->display_window.min_x, &data.at(0), sizeof(int)); + memcpy(&info->display_window.min_y, &data.at(4), sizeof(int)); + memcpy(&info->display_window.max_x, &data.at(8), sizeof(int)); + memcpy(&info->display_window.max_y, &data.at(12), sizeof(int)); + tinyexr::swap4(&info->display_window.min_x); + tinyexr::swap4(&info->display_window.min_y); + tinyexr::swap4(&info->display_window.max_x); + tinyexr::swap4(&info->display_window.max_y); + + has_display_window = true; + } + } else if (attr_name.compare("lineOrder") == 0) { + if (data.size() >= 1) { + info->line_order = static_cast(data[0]); + has_line_order = true; + } + } else if (attr_name.compare("pixelAspectRatio") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); + tinyexr::swap4(&info->pixel_aspect_ratio); + has_pixel_aspect_ratio = true; + } + } else if (attr_name.compare("screenWindowCenter") == 0) { + if (data.size() >= 8) { + memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); + memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); + tinyexr::swap4(&info->screen_window_center[0]); + tinyexr::swap4(&info->screen_window_center[1]); + has_screen_window_center = true; + } + } else if (attr_name.compare("screenWindowWidth") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); + tinyexr::swap4(&info->screen_window_width); + + has_screen_window_width = true; + } + } else if (attr_name.compare("chunkCount") == 0) { + if (data.size() >= sizeof(int)) { + memcpy(&info->chunk_count, &data.at(0), sizeof(int)); + tinyexr::swap4(&info->chunk_count); + } + } else if (attr_name.compare("name") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast(&data[0])); + info->name.resize(len); + info->name.assign(reinterpret_cast(&data[0]), len); + has_name = true; + } + } else if (attr_name.compare("type") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast(&data[0])); + info->type.resize(len); + info->type.assign(reinterpret_cast(&data[0]), len); + has_type = true; + } + } else { + // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) + if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + EXRAttribute attrib; +#ifdef _MSC_VER + strncpy_s(attrib.name, attr_name.c_str(), 255); + strncpy_s(attrib.type, attr_type.c_str(), 255); +#else + strncpy(attrib.name, attr_name.c_str(), 255); + strncpy(attrib.type, attr_type.c_str(), 255); +#endif + attrib.name[255] = '\0'; + attrib.type[255] = '\0'; + //std::cout << "i = " << info->attributes.size() << ", dsize = " << data.size() << "\n"; + attrib.size = static_cast(data.size()); + attrib.value = static_cast(malloc(data.size())); + memcpy(reinterpret_cast(attrib.value), &data.at(0), + data.size()); + info->attributes.push_back(attrib); + } + } + } + + // Check if required attributes exist + { + std::stringstream ss_err; + + if (!has_compression) { + ss_err << "\"compression\" attribute not found in the header." + << std::endl; + } + + if (!has_channels) { + ss_err << "\"channels\" attribute not found in the header." << std::endl; + } + + if (!has_line_order) { + ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; + } + + if (!has_display_window) { + ss_err << "\"displayWindow\" attribute not found in the header." + << std::endl; + } + + if (!has_data_window) { + ss_err << "\"dataWindow\" attribute not found in the header or invalid." + << std::endl; + } + + if (!has_pixel_aspect_ratio) { + ss_err << "\"pixelAspectRatio\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_width) { + ss_err << "\"screenWindowWidth\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_center) { + ss_err << "\"screenWindowCenter\" attribute not found in the header." + << std::endl; + } + + if (version->multipart || version->non_image) { + if (!has_name) { + ss_err << "\"name\" attribute not found in the header." + << std::endl; + } + if (!has_type) { + ss_err << "\"type\" attribute not found in the header." + << std::endl; + } + } + + if (!(ss_err.str().empty())) { + if (err) { + (*err) += ss_err.str(); + } + + return TINYEXR_ERROR_INVALID_HEADER; + } + } + + info->header_len = static_cast(orig_size - size); + + return TINYEXR_SUCCESS; +} + +// C++ HeaderInfo to C EXRHeader conversion. +static bool ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info, std::string *warn, std::string *err) { + exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; + exr_header->screen_window_center[0] = info.screen_window_center[0]; + exr_header->screen_window_center[1] = info.screen_window_center[1]; + exr_header->screen_window_width = info.screen_window_width; + exr_header->chunk_count = info.chunk_count; + exr_header->display_window.min_x = info.display_window.min_x; + exr_header->display_window.min_y = info.display_window.min_y; + exr_header->display_window.max_x = info.display_window.max_x; + exr_header->display_window.max_y = info.display_window.max_y; + exr_header->data_window.min_x = info.data_window.min_x; + exr_header->data_window.min_y = info.data_window.min_y; + exr_header->data_window.max_x = info.data_window.max_x; + exr_header->data_window.max_y = info.data_window.max_y; + exr_header->line_order = info.line_order; + exr_header->compression_type = info.compression_type; + exr_header->tiled = info.tiled; + exr_header->tile_size_x = info.tile_size_x; + exr_header->tile_size_y = info.tile_size_y; + exr_header->tile_level_mode = info.tile_level_mode; + exr_header->tile_rounding_mode = info.tile_rounding_mode; + + EXRSetNameAttr(exr_header, info.name.c_str()); + + + if (!info.type.empty()) { + bool valid = true; + if (info.type == "scanlineimage") { + if (exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be off for `scanlineimage` type.\n"; + } + valid = false; + } + } else if (info.type == "tiledimage") { + if (!exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be on for `tiledimage` type.\n"; + } + valid = false; + } + } else if (info.type == "deeptile") { + exr_header->non_image = 1; + if (!exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be on for `deeptile` type.\n"; + } + valid = false; + } + } else if (info.type == "deepscanline") { + exr_header->non_image = 1; + if (exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be off for `deepscanline` type.\n"; + } + //valid = false; + } + } else { + if (warn) { + std::stringstream ss; + ss << "(ConvertHeader) Unsupported or unknown info.type: " << info.type << "\n"; + (*warn) += ss.str(); + } + } + + if (!valid) { + return false; + } + } + + exr_header->num_channels = static_cast(info.channels.size()); + + exr_header->channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { +#ifdef _MSC_VER + strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#else + strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#endif + // manually add '\0' for safety. + exr_header->channels[c].name[255] = '\0'; + + exr_header->channels[c].pixel_type = info.channels[c].pixel_type; + exr_header->channels[c].p_linear = info.channels[c].p_linear; + exr_header->channels[c].x_sampling = info.channels[c].x_sampling; + exr_header->channels[c].y_sampling = info.channels[c].y_sampling; + } + + exr_header->pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->pixel_types[c] = info.channels[c].pixel_type; + } + + // Initially fill with values of `pixel_types` + exr_header->requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; + } + + exr_header->num_custom_attributes = static_cast(info.attributes.size()); + + if (exr_header->num_custom_attributes > 0) { + // TODO(syoyo): Report warning when # of attributes exceeds + // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` + if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; + } + + exr_header->custom_attributes = static_cast(malloc( + sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); + + for (size_t i = 0; i < size_t(exr_header->num_custom_attributes); i++) { + memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, + 256); + memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, + 256); + exr_header->custom_attributes[i].size = info.attributes[i].size; + // Just copy pointer + exr_header->custom_attributes[i].value = info.attributes[i].value; + } + + } else { + exr_header->custom_attributes = NULL; + } + + exr_header->header_len = info.header_len; + + return true; +} + +struct OffsetData { + OffsetData() : num_x_levels(0), num_y_levels(0) {} + std::vector > > offsets; + int num_x_levels; + int num_y_levels; +}; + +// -1 = error +static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) { + switch (tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + return 0; + + case TINYEXR_TILE_MIPMAP_LEVELS: + return lx; + + case TINYEXR_TILE_RIPMAP_LEVELS: + return lx + ly * num_x_levels; + + default: + return -1; + } + return 0; +} + +static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) { + if (level < 0) { + return -1; + } + + int b = static_cast(1u << static_cast(level)); + int level_size = toplevel_size / b; + + if (tile_rounding_mode == TINYEXR_TILE_ROUND_UP && level_size * b < toplevel_size) + level_size += 1; + + return std::max(level_size, 1); +} + +static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header, + const OffsetData& offset_data, + const std::vector& channel_offset_list, + int pixel_data_size, + const unsigned char* head, const size_t size, + std::string* err) { + int num_channels = exr_header->num_channels; + + int level_index = LevelIndex(exr_image->level_x, exr_image->level_y, exr_header->tile_level_mode, offset_data.num_x_levels); + int num_y_tiles = int(offset_data.offsets[size_t(level_index)].size()); + if (num_y_tiles < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + int num_x_tiles = int(offset_data.offsets[size_t(level_index)][0].size()); + if (num_x_tiles < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + int num_tiles = num_x_tiles * num_y_tiles; + + int err_code = TINYEXR_SUCCESS; + + enum { + EF_SUCCESS = 0, + EF_INVALID_DATA = 1, + EF_INSUFFICIENT_DATA = 2, + EF_FAILED_TO_DECODE = 4 + }; +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic error_flag(EF_SUCCESS); +#else + unsigned error_flag(EF_SUCCESS); +#endif + + // Although the spec says : "...the data window is subdivided into an array of smaller rectangles...", + // the IlmImf library allows the dimensions of the tile to be larger (or equal) than the dimensions of the data window. +#if 0 + if ((exr_header->tile_size_x > exr_image->width || exr_header->tile_size_y > exr_image->height) && + exr_image->level_x == 0 && exr_image->level_y == 0) { + if (err) { + (*err) += "Failed to decode tile data.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + } +#endif + exr_image->tiles = static_cast( + calloc(static_cast(num_tiles), sizeof(EXRTile))); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() + { + int tile_idx = 0; + while ((tile_idx = tile_count++) < num_tiles) { + +#else +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) { +#endif + // Allocate memory for each tile. + bool alloc_success = false; + exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( + num_channels, exr_header->channels, + exr_header->requested_pixel_types, exr_header->tile_size_x, + exr_header->tile_size_y, &alloc_success); + + if (!alloc_success) { + error_flag |= EF_INVALID_DATA; + continue; + } + + int x_tile = tile_idx % num_x_tiles; + int y_tile = tile_idx / num_x_tiles; + // 16 byte: tile coordinates + // 4 byte : data size + // ~ : data(uncompressed or compressed) + tinyexr::tinyexr_uint64 offset = offset_data.offsets[size_t(level_index)][size_t(y_tile)][size_t(x_tile)]; + if (offset + sizeof(int) * 5 > size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + size_t data_size = + size_t(size - (offset + sizeof(int) * 5)); + const unsigned char* data_ptr = + reinterpret_cast(head + offset); + + int tile_coordinates[4]; + memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); + tinyexr::swap4(&tile_coordinates[0]); + tinyexr::swap4(&tile_coordinates[1]); + tinyexr::swap4(&tile_coordinates[2]); + tinyexr::swap4(&tile_coordinates[3]); + + if (tile_coordinates[2] != exr_image->level_x) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + if (tile_coordinates[3] != exr_image->level_y) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + + int data_len; + memcpy(&data_len, data_ptr + 16, + sizeof(int)); // 16 = sizeof(tile_coordinates) + tinyexr::swap4(&data_len); + + if (data_len < 2 || size_t(data_len) > data_size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + // Move to data addr: 20 = 16 + 4; + data_ptr += 20; + bool ret = tinyexr::DecodeTiledPixelData( + exr_image->tiles[tile_idx].images, + &(exr_image->tiles[tile_idx].width), + &(exr_image->tiles[tile_idx].height), + exr_header->requested_pixel_types, data_ptr, + static_cast(data_len), exr_header->compression_type, + exr_header->line_order, + exr_image->width, exr_image->height, + tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, + exr_header->tile_size_y, static_cast(pixel_data_size), + static_cast(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list); + + if (!ret) { + // Failed to decode tile data. + error_flag |= EF_FAILED_TO_DECODE; + } + + exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; + exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; + exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; + exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } // num_thread loop + + for (auto& t : workers) { + t.join(); + } + +#else + } // parallel for +#endif + + // Even in the event of an error, the reserved memory may be freed. + exr_image->num_channels = num_channels; + exr_image->num_tiles = static_cast(num_tiles); + + if (error_flag) err_code = TINYEXR_ERROR_INVALID_DATA; + if (err) { + if (error_flag & EF_INSUFFICIENT_DATA) { + (*err) += "Insufficient data length.\n"; + } + if (error_flag & EF_FAILED_TO_DECODE) { + (*err) += "Failed to decode tile data.\n"; + } + } + return err_code; +} + +static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, + const OffsetData& offset_data, + const unsigned char *head, const size_t size, + std::string *err) { + int num_channels = exr_header->num_channels; + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + if (!FindZFPCompressionParam(&zfp_compression_param, + exr_header->custom_attributes, + int(exr_header->num_custom_attributes), err)) { + return TINYEXR_ERROR_INVALID_HEADER; + } +#endif + } + + if (exr_header->data_window.max_x < exr_header->data_window.min_x || + exr_header->data_window.max_y < exr_header->data_window.min_y) { + if (err) { + (*err) += "Invalid data window.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + tinyexr_int64 data_width = + static_cast(exr_header->data_window.max_x) - static_cast(exr_header->data_window.min_x) + static_cast(1); + tinyexr_int64 data_height = + static_cast(exr_header->data_window.max_y) - static_cast(exr_header->data_window.min_y) + static_cast(1); + + if (data_width <= 0) { + if (err) { + (*err) += "Invalid data window width.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (data_height <= 0) { + if (err) { + (*err) += "Invalid data window height.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + if ((data_width > TINYEXR_DIMENSION_THRESHOLD) || (data_height > TINYEXR_DIMENSION_THRESHOLD)) { + if (err) { + std::stringstream ss; + ss << "data_with or data_height too large. data_width: " << data_width + << ", " + << "data_height = " << data_height << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + if (exr_header->tiled) { + if ((exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) || (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD)) { + if (err) { + std::stringstream ss; + ss << "tile with or tile height too large. tile width: " << exr_header->tile_size_x + << ", " + << "tile height = " << exr_header->tile_size_y << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + const std::vector& offsets = offset_data.offsets[0][0]; + size_t num_blocks = offsets.size(); + + std::vector channel_offset_list; + int pixel_data_size = 0; + size_t channel_offset = 0; + if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, + &channel_offset, num_channels, + exr_header->channels)) { + if (err) { + (*err) += "Failed to compute channel layout.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); +#else + bool invalid_data(false); +#endif + + if (exr_header->tiled) { + // value check + if (exr_header->tile_size_x < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + + if (exr_header->tile_size_y < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + if (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) { + EXRImage* level_image = NULL; + for (int level = 0; level < offset_data.num_x_levels; ++level) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; + } + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level, exr_header->tile_rounding_mode); + if (level_image->width < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level, exr_header->tile_rounding_mode); + + if (level_image->height < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->level_x = level; + level_image->level_y = level; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; + } + } else { + EXRImage* level_image = NULL; + for (int level_y = 0; level_y < offset_data.num_y_levels; ++level_y) + for (int level_x = 0; level_x < offset_data.num_x_levels; ++level_x) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; + } + + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level_x, exr_header->tile_rounding_mode); + if (level_image->width < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level_y, exr_header->tile_rounding_mode); + if (level_image->height < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->level_x = level_x; + level_image->level_y = level_y; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; + } + } + } else { // scanline format + // Don't allow too large image(256GB * pixel_data_size or more). Workaround + // for #104. + size_t total_data_len = + size_t(data_width) * size_t(data_height) * size_t(num_channels); + const bool total_data_len_overflown = + sizeof(void *) == 8 ? (total_data_len >= 0x4000000000) : false; + if ((total_data_len == 0) || total_data_len_overflown) { + if (err) { + std::stringstream ss; + ss << "Image data size is zero or too large: width = " << data_width + << ", height = " << data_height << ", channels = " << num_channels + << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + + bool alloc_success = false; + exr_image->images = tinyexr::AllocateImage( + num_channels, exr_header->channels, exr_header->requested_pixel_types, + int(data_width), int(data_height), &alloc_success); + + if (!alloc_success) { + if (err) { + std::stringstream ss; + ss << "Failed to allocate memory for Images. Maybe EXR header is corrupted or Image data size is too large: width = " << data_width + << ", height = " << data_height << ", channels = " << num_channels + << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic y_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_blocks)) { + num_threads = int(num_blocks); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int y = 0; + while ((y = y_count++) < int(num_blocks)) { + +#else + +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int y = 0; y < static_cast(num_blocks); y++) { + +#endif + size_t y_idx = static_cast(y); + + if (offsets[y_idx] + sizeof(int) * 2 > size) { + invalid_data = true; + } else { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed or compressed) + size_t data_size = + size_t(size - (offsets[y_idx] + sizeof(int) * 2)); + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y_idx]); + + int line_no; + memcpy(&line_no, data_ptr, sizeof(int)); + int data_len; + memcpy(&data_len, data_ptr + 4, sizeof(int)); + tinyexr::swap4(&line_no); + tinyexr::swap4(&data_len); + + if (size_t(data_len) > data_size) { + invalid_data = true; + + } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) { + // Too large value. Assume this is invalid + // 2**20 = 1048576 = heuristic value. + invalid_data = true; + } else if (data_len == 0) { + // TODO(syoyo): May be ok to raise the threshold for example + // `data_len < 4` + invalid_data = true; + } else { + // line_no may be negative. + int end_line_no = (std::min)(line_no + num_scanline_blocks, + (exr_header->data_window.max_y + 1)); + + int num_lines = end_line_no - line_no; + + if (num_lines <= 0) { + invalid_data = true; + } else { + // Move to data addr: 8 = 4 + 4; + data_ptr += 8; + + // Adjust line_no with data_window.bmin.y + + // overflow check + tinyexr_int64 lno = + static_cast(line_no) - + static_cast(exr_header->data_window.min_y); + if (lno > std::numeric_limits::max()) { + line_no = -1; // invalid + } else if (lno < -std::numeric_limits::max()) { + line_no = -1; // invalid + } else { + line_no -= exr_header->data_window.min_y; + } + + if (line_no < 0) { + invalid_data = true; + } else { + if (!tinyexr::DecodePixelData( + exr_image->images, exr_header->requested_pixel_types, + data_ptr, static_cast(data_len), + exr_header->compression_type, exr_header->line_order, + int(data_width), int(data_height), int(data_width), y, line_no, + num_lines, static_cast(pixel_data_size), + static_cast( + exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list)) { + invalid_data = true; + } + } + } + } + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + } + + if (invalid_data) { + if (err) { + (*err) += "Invalid/Corrupted data found when decoding pixels.\n"; + } + + // free alloced image. + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (exr_image->images[c]) { + free(exr_image->images[c]); + exr_image->images[c] = NULL; + } + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Overwrite `pixel_type` with `requested_pixel_type`. + { + for (int c = 0; c < exr_header->num_channels; c++) { + exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; + } + } + + { + exr_image->num_channels = num_channels; + + exr_image->width = int(data_width); + exr_image->height = int(data_height); + } + + return TINYEXR_SUCCESS; +} + +static bool ReconstructLineOffsets( + std::vector *offsets, size_t n, + const unsigned char *head, const unsigned char *marker, const size_t size) { + if (head >= marker) { + return false; + } + if (offsets->size() != n) { + return false; + } + + for (size_t i = 0; i < n; i++) { + size_t offset = static_cast(marker - head); + // Offset should not exceed whole EXR file/data size. + if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) { + return false; + } + + int y; + unsigned int data_len; + + memcpy(&y, marker, sizeof(int)); + memcpy(&data_len, marker + 4, sizeof(unsigned int)); + + if (data_len >= size) { + return false; + } + + tinyexr::swap4(&y); + tinyexr::swap4(&data_len); + + (*offsets)[i] = offset; + + marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) + } + + return true; +} + + +static int FloorLog2(unsigned x) { + // + // For x > 0, floorLog2(y) returns floor(log(x)/log(2)). + // + int y = 0; + while (x > 1) { + y += 1; + x >>= 1u; + } + return y; +} + + +static int CeilLog2(unsigned x) { + // + // For x > 0, ceilLog2(y) returns ceil(log(x)/log(2)). + // + int y = 0; + int r = 0; + while (x > 1) { + if (x & 1) + r = 1; + + y += 1; + x >>= 1u; + } + return y + r; +} + +static int RoundLog2(int x, int tile_rounding_mode) { + return (tile_rounding_mode == TINYEXR_TILE_ROUND_DOWN) ? FloorLog2(static_cast(x)) : CeilLog2(static_cast(x)); +} + +static int CalculateNumXLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num = 0; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + num = RoundLog2(w, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + return -1; + } + + return num; +} + +static int CalculateNumYLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + int num = 0; + + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int h = max_y - min_y + 1; + num = RoundLog2(h, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + return -1; + } + + return num; +} + +static bool CalculateNumTiles(std::vector& numTiles, + int toplevel_size, + int size, + int tile_rounding_mode) { + for (unsigned i = 0; i < numTiles.size(); i++) { + int l = LevelSize(toplevel_size, int(i), tile_rounding_mode); + if (l < 0) { + return false; + } + TINYEXR_CHECK_AND_RETURN_C(l <= std::numeric_limits::max() - size + 1, false); + + numTiles[i] = (l + size - 1) / size; + } + return true; +} + +static bool PrecalculateTileInfo(std::vector& num_x_tiles, + std::vector& num_y_tiles, + const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num_x_levels = CalculateNumXLevels(exr_header); + + if (num_x_levels < 0) { + return false; + } + + int num_y_levels = CalculateNumYLevels(exr_header); + + if (num_y_levels < 0) { + return false; + } + + num_x_tiles.resize(size_t(num_x_levels)); + num_y_tiles.resize(size_t(num_y_levels)); + + if (!CalculateNumTiles(num_x_tiles, + max_x - min_x + 1, + exr_header->tile_size_x, + exr_header->tile_rounding_mode)) { + return false; + } + + if (!CalculateNumTiles(num_y_tiles, + max_y - min_y + 1, + exr_header->tile_size_y, + exr_header->tile_rounding_mode)) { + return false; + } + + return true; +} + +static void InitSingleResolutionOffsets(OffsetData& offset_data, size_t num_blocks) { + offset_data.offsets.resize(1); + offset_data.offsets[0].resize(1); + offset_data.offsets[0][0].resize(num_blocks); + offset_data.num_x_levels = 1; + offset_data.num_y_levels = 1; +} + +// Return sum of tile blocks. +// 0 = error +static int InitTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const std::vector& num_x_tiles, + const std::vector& num_y_tiles) { + int num_tile_blocks = 0; + offset_data.num_x_levels = static_cast(num_x_tiles.size()); + offset_data.num_y_levels = static_cast(num_y_tiles.size()); + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + case TINYEXR_TILE_MIPMAP_LEVELS: + TINYEXR_CHECK_AND_RETURN_C(offset_data.num_x_levels == offset_data.num_y_levels, 0); + offset_data.offsets.resize(size_t(offset_data.num_x_levels)); + + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + offset_data.offsets[l].resize(size_t(num_y_tiles[l])); + + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + offset_data.offsets[l][dy].resize(size_t(num_x_tiles[l])); + num_tile_blocks += num_x_tiles[l]; + } + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + offset_data.offsets.resize(static_cast(offset_data.num_x_levels) * static_cast(offset_data.num_y_levels)); + + for (int ly = 0; ly < offset_data.num_y_levels; ++ly) { + for (int lx = 0; lx < offset_data.num_x_levels; ++lx) { + int l = ly * offset_data.num_x_levels + lx; + offset_data.offsets[size_t(l)].resize(size_t(num_y_tiles[size_t(ly)])); + + for (size_t dy = 0; dy < offset_data.offsets[size_t(l)].size(); ++dy) { + offset_data.offsets[size_t(l)][dy].resize(size_t(num_x_tiles[size_t(lx)])); + num_tile_blocks += num_x_tiles[size_t(lx)]; + } + } + } + break; + + default: + return 0; + } + return num_tile_blocks; +} + +static bool IsAnyOffsetsAreInvalid(const OffsetData& offset_data) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) + if (reinterpret_cast(offset_data.offsets[l][dy][dx]) <= 0) + return true; + + return false; +} + +static bool isValidTile(const EXRHeader* exr_header, + const OffsetData& offset_data, + int dx, int dy, int lx, int ly) { + if (lx < 0 || ly < 0 || dx < 0 || dy < 0) return false; + int num_x_levels = offset_data.num_x_levels; + int num_y_levels = offset_data.num_y_levels; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + if (lx == 0 && + ly == 0 && + offset_data.offsets.size() > 0 && + offset_data.offsets[0].size() > static_cast(dy) && + offset_data.offsets[0][size_t(dy)].size() > static_cast(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + if (lx < num_x_levels && + ly < num_y_levels && + offset_data.offsets.size() > static_cast(lx) && + offset_data.offsets[size_t(lx)].size() > static_cast(dy) && + offset_data.offsets[size_t(lx)][size_t(dy)].size() > static_cast(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + { + size_t idx = static_cast(lx) + static_cast(ly)* static_cast(num_x_levels); + if (lx < num_x_levels && + ly < num_y_levels && + (offset_data.offsets.size() > idx) && + offset_data.offsets[idx].size() > static_cast(dy) && + offset_data.offsets[idx][size_t(dy)].size() > static_cast(dx)) { + return true; + } + } + + break; + + default: + + return false; + } + + return false; +} + +static bool ReconstructTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const unsigned char* head, const unsigned char* marker, const size_t size, + bool isMultiPartFile, + bool isDeep) { + int numXLevels = offset_data.num_x_levels; + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 tileOffset = tinyexr::tinyexr_uint64(marker - head); + + + if (isMultiPartFile) { + if ((marker + sizeof(int)) >= (head + size)) { + return false; + } + + //int partNumber; + marker += sizeof(int); + } + + if ((marker + 4 * sizeof(int)) >= (head + size)) { + return false; + } + + int tileX; + memcpy(&tileX, marker, sizeof(int)); + tinyexr::swap4(&tileX); + marker += sizeof(int); + + int tileY; + memcpy(&tileY, marker, sizeof(int)); + tinyexr::swap4(&tileY); + marker += sizeof(int); + + int levelX; + memcpy(&levelX, marker, sizeof(int)); + tinyexr::swap4(&levelX); + marker += sizeof(int); + + int levelY; + memcpy(&levelY, marker, sizeof(int)); + tinyexr::swap4(&levelY); + marker += sizeof(int); + + if (isDeep) { + if ((marker + 2 * sizeof(tinyexr::tinyexr_int64)) >= (head + size)) { + return false; + } + tinyexr::tinyexr_int64 packed_offset_table_size; + memcpy(&packed_offset_table_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&packed_offset_table_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + tinyexr::tinyexr_int64 packed_sample_size; + memcpy(&packed_sample_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&packed_sample_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + // next Int64 is unpacked sample size - skip that too + marker += packed_offset_table_size + packed_sample_size + 8; + + if (marker >= (head + size)) { + return false; + } + + } else { + + if ((marker + sizeof(uint32_t)) >= (head + size)) { + return false; + } + + uint32_t dataSize; + memcpy(&dataSize, marker, sizeof(uint32_t)); + tinyexr::swap4(&dataSize); + marker += sizeof(uint32_t); + + marker += dataSize; + + if (marker >= (head + size)) { + return false; + } + } + + if (!isValidTile(exr_header, offset_data, + tileX, tileY, levelX, levelY)) { + return false; + } + + int level_idx = LevelIndex(levelX, levelY, exr_header->tile_level_mode, numXLevels); + if (level_idx < 0) { + return false; + } + + if (size_t(level_idx) >= offset_data.offsets.size()) { + return false; + } + + if (size_t(tileY) >= offset_data.offsets[size_t(level_idx)].size()) { + return false; + } + + if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) { + return false; + } + + offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset; + } + } + } + return true; +} + +// marker output is also +static int ReadOffsets(OffsetData& offset_data, + const unsigned char* head, + const unsigned char*& marker, + const size_t size, + const char** err) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offset_data.offsets[l][dy][dx] = offset; + } + } + } + return TINYEXR_SUCCESS; +} + +static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *head, + const unsigned char *marker, const size_t size, + const char **err) { + if (exr_image == NULL || exr_header == NULL || head == NULL || + marker == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + if (exr_header->data_window.max_x < exr_header->data_window.min_x || + exr_header->data_window.max_x - exr_header->data_window.min_x == + std::numeric_limits::max()) { + // Issue 63 + tinyexr::SetErrorMessage("Invalid data width value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + tinyexr_int64 data_width = + static_cast(exr_header->data_window.max_x) - static_cast(exr_header->data_window.min_x) + static_cast(1); + if (data_width <= 0) { + tinyexr::SetErrorMessage("Invalid data window width value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (exr_header->data_window.max_y < exr_header->data_window.min_y || + exr_header->data_window.max_y - exr_header->data_window.min_y == + std::numeric_limits::max()) { + tinyexr::SetErrorMessage("Invalid data height value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + tinyexr_int64 data_height = + static_cast(exr_header->data_window.max_y) - static_cast(exr_header->data_window.min_y) + static_cast(1); + + if (data_height <= 0) { + tinyexr::SetErrorMessage("Invalid data window height value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + if (data_width > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("data width too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + if (data_height > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("data height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + if (exr_header->tiled) { + if (exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile width too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + if (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + // Read offset tables. + OffsetData offset_data; + size_t num_blocks = 0; + // For a multi-resolution image, the size of the offset table will be calculated from the other attributes of the header. + // If chunk_count > 0 then chunk_count must be equal to the calculated tile count. + if (exr_header->tiled) { + { + std::vector num_x_tiles, num_y_tiles; + if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_header)) { + tinyexr::SetErrorMessage("Failed to precalculate tile info.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + num_blocks = size_t(InitTileOffsets(offset_data, exr_header, num_x_tiles, num_y_tiles)); + if (exr_header->chunk_count > 0) { + if (exr_header->chunk_count != static_cast(num_blocks)) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + int ret = ReadOffsets(offset_data, head, marker, size, err); + if (ret != TINYEXR_SUCCESS) return ret; + if (IsAnyOffsetsAreInvalid(offset_data)) { + if (!ReconstructTileOffsets(offset_data, exr_header, + head, marker, size, + exr_header->multipart, exr_header->non_image)) { + + tinyexr::SetErrorMessage("Invalid Tile Offsets data.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } else if (exr_header->chunk_count > 0) { + // Use `chunkCount` attribute. + num_blocks = static_cast(exr_header->chunk_count); + InitSingleResolutionOffsets(offset_data, num_blocks); + } else { + num_blocks = static_cast(data_height) / + static_cast(num_scanline_blocks); + if (num_blocks * static_cast(num_scanline_blocks) < + static_cast(data_height)) { + num_blocks++; + } + + InitSingleResolutionOffsets(offset_data, num_blocks); + } + + if (!exr_header->tiled) { + std::vector& offsets = offset_data.offsets[0][0]; + for (size_t y = 0; y < num_blocks; y++) { + tinyexr::tinyexr_uint64 offset; + // Issue #81 + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offsets[y] = offset; + } + + // If line offsets are invalid, we try to reconstruct it. + // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. + for (size_t y = 0; y < num_blocks; y++) { + if (offsets[y] <= 0) { + // TODO(syoyo) Report as warning? + // if (err) { + // stringstream ss; + // ss << "Incomplete lineOffsets." << std::endl; + // (*err) += ss.str(); + //} + bool ret = + ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); + if (ret) { + // OK + break; + } else { + tinyexr::SetErrorMessage( + "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + } + + { + std::string e; + int ret = DecodeChunk(exr_image, exr_header, offset_data, head, size, &e); + + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + +#if 1 + FreeEXRImage(exr_image); +#else + // release memory(if exists) + if ((exr_header->num_channels > 0) && exr_image && exr_image->images) { + for (size_t c = 0; c < size_t(exr_header->num_channels); c++) { + if (exr_image->images[c]) { + free(exr_image->images[c]); + exr_image->images[c] = NULL; + } + } + free(exr_image->images); + exr_image->images = NULL; + } +#endif + } + + return ret; + } +} + +static void GetLayers(const EXRHeader &exr_header, + std::vector &layer_names) { + // Naive implementation + // Group channels by layers + // go over all channel names, split by periods + // collect unique names + layer_names.clear(); + for (int c = 0; c < exr_header.num_channels; c++) { + std::string full_name(exr_header.channels[c].name); + const size_t pos = full_name.find_last_of('.'); + if (pos != std::string::npos && pos != 0 && pos + 1 < full_name.size()) { + full_name.erase(pos); + if (std::find(layer_names.begin(), layer_names.end(), full_name) == + layer_names.end()) + layer_names.push_back(full_name); + } + } +} + +struct LayerChannel { + explicit LayerChannel(size_t i, std::string n) : index(i), name(n) {} + size_t index; + std::string name; +}; + +static void ChannelsInLayer(const EXRHeader &exr_header, + const std::string &layer_name, + std::vector &channels) { + channels.clear(); + //std::cout << "layer_name = " << layer_name << "\n"; + for (int c = 0; c < exr_header.num_channels; c++) { + //std::cout << "chan[" << c << "] = " << exr_header.channels[c].name << "\n"; + std::string ch_name(exr_header.channels[c].name); + if (layer_name.empty()) { + const size_t pos = ch_name.find_last_of('.'); + if (pos != std::string::npos && pos < ch_name.size()) { + if (pos != 0) continue; + ch_name = ch_name.substr(pos + 1); + } + } else { + const size_t pos = ch_name.find(layer_name + '.'); + if (pos == std::string::npos) continue; + if (pos == 0) { + ch_name = ch_name.substr(layer_name.size() + 1); + } + } + LayerChannel ch(size_t(c), ch_name); + channels.push_back(ch); + } +} + +} // namespace tinyexr + +int EXRLayers(const char *filename, const char **layer_names[], int *num_layers, + const char **err) { + EXRVersion exr_version; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + tinyexr::SetErrorMessage("Invalid EXR header.", err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + + std::vector layer_vec; + tinyexr::GetLayers(exr_header, layer_vec); + + (*num_layers) = int(layer_vec.size()); + (*layer_names) = static_cast( + malloc(sizeof(const char *) * static_cast(layer_vec.size()))); + for (size_t c = 0; c < static_cast(layer_vec.size()); c++) { +#ifdef _MSC_VER + (*layer_names)[c] = _strdup(layer_vec[c].c_str()); +#else + (*layer_names)[c] = strdup(layer_vec[c].c_str()); +#endif + } + + FreeEXRHeader(&exr_header); + return TINYEXR_SUCCESS; +} + +int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, + const char **err) { + return LoadEXRWithLayer(out_rgba, width, height, filename, + /* layername */ NULL, err); +} + +int LoadEXRWithLayer(float **out_rgba, int *width, int *height, + const char *filename, const char *layername, + const char **err) { + if (out_rgba == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + InitEXRImage(&exr_image); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to open EXR file or read version info from EXR file. code(" + << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + { + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + // TODO: Probably limit loading to layers (channels) selected by layer index + { + int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + + std::vector layer_names; + tinyexr::GetLayers(exr_header, layer_names); + + std::vector channels; + tinyexr::ChannelsInLayer( + exr_header, layername == NULL ? "" : std::string(layername), channels); + + + if (channels.size() < 1) { + if (layername == NULL) { + tinyexr::SetErrorMessage("Layer Not Found. Seems EXR contains channels with layer(e.g. `diffuse.R`). if you are using LoadEXR(), please try LoadEXRWithLayer(). LoadEXR() cannot load EXR having channels with layer.", err); + + } else { + tinyexr::SetErrorMessage("Layer Not Found", err); + } + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_LAYER_NOT_FOUND; + } + + size_t ch_count = channels.size() < 4 ? channels.size() : 4; + for (size_t c = 0; c < ch_count; c++) { + const tinyexr::LayerChannel &ch = channels[c]; + + if (ch.name == "R") { + idxR = int(ch.index); + } else if (ch.name == "G") { + idxG = int(ch.index); + } else if (ch.name == "B") { + idxB = int(ch.index); + } else if (ch.name == "A") { + idxA = int(ch.index); + } + } + + if (channels.size() == 1) { + int chIdx = int(channels.front().index); + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[chIdx][srcIdx]; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + const float val = + reinterpret_cast(exr_image.images)[chIdx][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + } else { + // Assume RGB(A) + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +int IsEXR(const char *filename) { + EXRVersion exr_version; + + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + return TINYEXR_SUCCESS; +} + +int IsEXRFromMemory(const unsigned char *memory, size_t size) { + EXRVersion exr_version; + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_header == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument. `memory` or `exr_header` argument is null in " + "ParseEXRHeaderFromMemory()", + err); + + // Invalid argument + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Insufficient header/data size.\n", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + tinyexr::HeaderInfo info; + info.clear(); + + int ret; + { + std::string err_str; + ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + if (err && !err_str.empty()) { + tinyexr::SetErrorMessage(err_str, err); + } + } + } + + { + std::string warn; + std::string err_str; + + if (!ConvertHeader(exr_header, info, &warn, &err_str)) { + // release mem + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + if (err && !err_str.empty()) { + tinyexr::SetErrorMessage(err_str, err); + } + ret = TINYEXR_ERROR_INVALID_HEADER; + } + } + + exr_header->multipart = version->multipart ? 1 : 0; + exr_header->non_image = version->non_image ? 1 : 0; + + return ret; +} + +int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err) { + if (out_rgba == NULL || memory == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + + InitEXRHeader(&exr_header); + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to parse EXR version. code(" << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + InitEXRImage(&exr_image); + ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + for (int c = 0; c < exr_header.num_channels; c++) { + if (strcmp(exr_header.channels[c].name, "R") == 0) { + idxR = c; + } else if (strcmp(exr_header.channels[c].name, "G") == 0) { + idxG = c; + } else if (strcmp(exr_header.channels[c].name, "B") == 0) { + idxB = c; + } else if (strcmp(exr_header.channels[c].name, "A") == 0) { + idxA = c; + } + } + + // TODO(syoyo): Refactor removing same code as used in LoadEXR(). + if (exr_header.num_channels == 1) { + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[0][srcIdx]; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + const float val = reinterpret_cast(exr_image.images)[0][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + + } else { + // TODO(syoyo): Support non RGBA image. + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +// Represents a read-only file mapped to an address space in memory. +// If no memory-mapping API is available, falls back to allocating a buffer +// with a copy of the file's data. +struct MemoryMappedFile { + unsigned char *data; // To the start of the file's data. + size_t size; // The size of the file in bytes. +#ifdef TINYEXR_USE_WIN32_MMAP + HANDLE windows_file; + HANDLE windows_file_mapping; +#elif defined(TINYEXR_USE_POSIX_MMAP) + int posix_descriptor; +#endif + + // MemoryMappedFile's constructor tries to map memory to a file. + // If this succeeds, valid() will return true and all fields + // are usable; otherwise, valid() will return false. + MemoryMappedFile(const char *filename) { + data = NULL; + size = 0; +#ifdef TINYEXR_USE_WIN32_MMAP + windows_file_mapping = NULL; + windows_file = + CreateFileW(tinyexr::UTF8ToWchar(filename).c_str(), // lpFileName + GENERIC_READ, // dwDesiredAccess + FILE_SHARE_READ, // dwShareMode + NULL, // lpSecurityAttributes + OPEN_EXISTING, // dwCreationDisposition + FILE_ATTRIBUTE_READONLY, // dwFlagsAndAttributes + NULL); // hTemplateFile + if (windows_file == INVALID_HANDLE_VALUE) { + return; + } + + windows_file_mapping = CreateFileMapping(windows_file, // hFile + NULL, // lpFileMappingAttributes + PAGE_READONLY, // flProtect + 0, // dwMaximumSizeHigh + 0, // dwMaximumSizeLow + NULL); // lpName + if (windows_file_mapping == NULL) { + return; + } + + data = reinterpret_cast( + MapViewOfFile(windows_file_mapping, // hFileMappingObject + FILE_MAP_READ, // dwDesiredAccess + 0, // dwFileOffsetHigh + 0, // dwFileOffsetLow + 0)); // dwNumberOfBytesToMap + if (!data) { + return; + } + + LARGE_INTEGER windows_file_size = {}; + if (!GetFileSizeEx(windows_file, &windows_file_size) || + static_cast(windows_file_size.QuadPart) > + std::numeric_limits::max()) { + UnmapViewOfFile(data); + data = NULL; + return; + } + size = static_cast(windows_file_size.QuadPart); +#elif defined(TINYEXR_USE_POSIX_MMAP) + posix_descriptor = open(filename, O_RDONLY); + if (posix_descriptor == -1) { + return; + } + + struct stat info; + if (fstat(posix_descriptor, &info) < 0) { + return; + } + // Make sure st_size is in the valid range for a size_t. The second case + // can only fail if a POSIX implementation defines off_t to be a larger + // type than size_t - for instance, compiling with _FILE_OFFSET_BITS=64 + // on a 32-bit system. On current 64-bit systems, this check can never + // fail, so we turn off clang's Wtautological-type-limit-compare warning + // around this code. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-type-limit-compare" +#endif + if (info.st_size < 0 || + info.st_size > std::numeric_limits::max()) { + return; + } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + size = static_cast(info.st_size); + + data = reinterpret_cast( + mmap(0, size, PROT_READ, MAP_SHARED, posix_descriptor, 0)); + if (data == MAP_FAILED) { + data = nullptr; + return; + } +#else + FILE *fp = fopen(filename, "rb"); + if (!fp) { + return; + } + + // Calling fseek(fp, 0, SEEK_END) isn't strictly-conforming C code, but + // since neither the WIN32 nor POSIX APIs are available in this branch, this + // is a reasonable fallback option. + if (fseek(fp, 0, SEEK_END) != 0) { + fclose(fp); + return; + } + const long ftell_result = ftell(fp); + if (ftell_result < 0) { + // Error from ftell + fclose(fp); + return; + } + size = static_cast(ftell_result); + if (fseek(fp, 0, SEEK_SET) != 0) { + fclose(fp); + size = 0; + return; + } + + data = reinterpret_cast(malloc(size)); + if (!data) { + size = 0; + fclose(fp); + return; + } + size_t read_bytes = fread(data, 1, size, fp); + if (read_bytes != size) { + // TODO: Try to read data until reading `size` bytes. + fclose(fp); + size = 0; + data = nullptr; + return; + } + fclose(fp); +#endif + } + + // MemoryMappedFile's destructor closes all its handles. + ~MemoryMappedFile() { +#ifdef TINYEXR_USE_WIN32_MMAP + if (data) { + (void)UnmapViewOfFile(data); + data = NULL; + } + + if (windows_file_mapping != NULL) { + (void)CloseHandle(windows_file_mapping); + } + + if (windows_file != INVALID_HANDLE_VALUE) { + (void)CloseHandle(windows_file); + } +#elif defined(TINYEXR_USE_POSIX_MMAP) + if (data) { + (void)munmap(data, size); + data = NULL; + } + + if (posix_descriptor != -1) { + (void)close(posix_descriptor); + } +#else + if (data) { + (void)free(data); + } + data = NULL; +#endif + } + + // A MemoryMappedFile cannot be copied or moved. + // Only check for this when compiling with C++11 or higher, since deleted + // function definitions were added then. +#if TINYEXR_HAS_CXX11 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + MemoryMappedFile(const MemoryMappedFile &) = delete; + MemoryMappedFile &operator=(const MemoryMappedFile &) = delete; + MemoryMappedFile(MemoryMappedFile &&other) noexcept = delete; + MemoryMappedFile &operator=(MemoryMappedFile &&other) noexcept = delete; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + + // Returns whether this was successfully opened. + bool valid() const { return data; } +}; + +int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + if (file.size < 16) { + tinyexr::SetErrorMessage("File size too short : " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + return LoadEXRImageFromMemory(exr_image, exr_header, file.data, file.size, + err); +} + +int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *memory, const size_t size, + const char **err) { + if (exr_image == NULL || memory == NULL || + (size < tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + const unsigned char *head = memory; + const unsigned char *marker = reinterpret_cast( + memory + exr_header->header_len + + 8); // +8 for magic number + version header. + return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, + err); +} + +namespace tinyexr +{ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#endif + +// out_data must be allocated initially with the block-header size +// of the current image(-part) type +static bool EncodePixelData(/* out */ std::vector& out_data, + const unsigned char* const* images, + int compression_type, + int /*line_order*/, + int width, // for tiled : tile.width + int /*height*/, // for tiled : header.tile_size_y + int x_stride, // for tiled : header.tile_size_x + int line_no, // for tiled : 0 + int num_lines, // for tiled : tile.height + size_t pixel_data_size, + const std::vector& channels, + const std::vector& channel_offset_list, + std::string *err, + const void* compression_param = 0) // zfp compression param +{ + size_t buf_size = static_cast(width) * + static_cast(num_lines) * + static_cast(pixel_data_size); + //int last2bit = (buf_size & 3); + // buf_size must be multiple of four + //if(last2bit) buf_size += 4 - last2bit; + std::vector buf(buf_size); + + size_t start_y = static_cast(line_no); + for (size_t c = 0; c < channels.size(); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * size_t(y) * size_t(width)) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP16 h16; + h16.u = reinterpret_cast( + images)[c][(y + start_y) * size_t(x_stride) + size_t(x)]; + + tinyexr::FP32 f32 = half_to_float(h16); + + tinyexr::swap4(&f32.f); + + // line_ptr[x] = f32.f; + tinyexr::cpy4(line_ptr + x, &(f32.f)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + unsigned short val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap2(&val); + + // line_ptr[x] = val; + tinyexr::cpy2(line_ptr + x, &val); + } + } + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return false; + } + + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP32 f32; + f32.f = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::FP16 h16; + h16 = float_to_half_full(f32); + + tinyexr::swap2(reinterpret_cast(&h16.u)); + + // line_ptr[x] = h16.u; + tinyexr::cpy2(line_ptr + x, &(h16.u)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + float val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return false; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned int *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * width) + + channel_offset_list[c] * static_cast(width))); + for (int x = 0; x < width; x++) { + unsigned int val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed) + out_data.insert(out_data.end(), buf.begin(), buf.end()); + + } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + std::vector block(mz_compressBound( + static_cast(buf.size()))); +#elif TINYEXR_USE_STB_ZLIB + // there is no compressBound() function, so we use a value that + // is grossly overestimated, but should always work + std::vector block(256 + 2 * buf.size()); +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB == 1) + std::vector block(nanoz_compressBound( + static_cast(buf.size()))); +#else + std::vector block( + compressBound(static_cast(buf.size()))); +#endif + tinyexr::tinyexr_uint64 outSize = block.size(); + + if (!tinyexr::CompressZip(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size()))) { + if (err) { + (*err) += "Zip compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast(outSize); // truncate + + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // (buf.size() * 3) / 2 would be enough. + std::vector block((buf.size() * 3) / 2); + + tinyexr::tinyexr_uint64 outSize = block.size(); + + if (!tinyexr::CompressRle(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size()))) { + if (err) { + (*err) += "RLE compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast(outSize); // truncate + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + unsigned int bufLen = + 8192 + static_cast( + 2 * static_cast( + buf.size())); // @fixme { compute good bound. } + std::vector block(bufLen); + unsigned int outSize = static_cast(block.size()); + + if (!CompressPiz(&block.at(0), &outSize, + reinterpret_cast(&buf.at(0)), + buf.size(), channels, width, num_lines)) { + if (err) { + (*err) += "PIZ compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + +#else + if (err) { + (*err) += "PIZ compression is disabled in this build.\n"; + } + return false; +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + const ZFPCompressionParam* zfp_compression_param = reinterpret_cast(compression_param); + std::vector block; + unsigned int outSize; + + tinyexr::CompressZfp( + &block, &outSize, reinterpret_cast(&buf.at(0)), + width, num_lines, static_cast(channels.size()), *zfp_compression_param); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + +#else + if (err) { + (*err) += "ZFP compression is disabled in this build.\n"; + } + (void)compression_param; + return false; +#endif + } else { + return false; + } + + return true; +} + +static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_header, + const std::vector& channels, + std::vector >& data_list, + size_t start_index, // for data_list + int num_x_tiles, int num_y_tiles, + const std::vector& channel_offset_list, + int pixel_data_size, + const void* compression_param, // must be set if zfp compression is enabled + std::string* err) { + int num_tiles = num_x_tiles * num_y_tiles; + if (num_tiles != level_image->num_tiles) { + if (err) { + (*err) += "Invalid number of tiles in argument.\n"; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if ((exr_header->tile_size_x > level_image->width || exr_header->tile_size_y > level_image->height) && + level_image->level_x == 0 && level_image->level_y == 0) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); +#else + bool invalid_data(false); +#endif + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = tile_count++) < num_tiles) { + +#else + // Use signed int since some OpenMP compiler doesn't allow unsigned type for + // `parallel for` +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_tiles; i++) { + +#endif + size_t tile_idx = static_cast(i); + size_t data_idx = tile_idx + start_index; + + int x_tile = i % num_x_tiles; + int y_tile = i / num_x_tiles; + + EXRTile& tile = level_image->tiles[tile_idx]; + + const unsigned char* const* images = + static_cast(tile.images); + + data_list[data_idx].resize(5*sizeof(int)); + size_t data_header_size = data_list[data_idx].size(); + bool ret = EncodePixelData(data_list[data_idx], + images, + exr_header->compression_type, + 0, // increasing y + tile.width, + exr_header->tile_size_y, + exr_header->tile_size_x, + 0, + tile.height, + pixel_data_size, + channels, + channel_offset_list, + err, compression_param); + if (!ret) { + invalid_data = true; + continue; + } + if (data_list[data_idx].size() <= data_header_size) { + invalid_data = true; + continue; + } + + int data_len = static_cast(data_list[data_idx].size() - data_header_size); + //tileX, tileY, levelX, levelY // pixel_data_size(int) + memcpy(&data_list[data_idx][0], &x_tile, sizeof(int)); + memcpy(&data_list[data_idx][4], &y_tile, sizeof(int)); + memcpy(&data_list[data_idx][8], &level_image->level_x, sizeof(int)); + memcpy(&data_list[data_idx][12], &level_image->level_y, sizeof(int)); + memcpy(&data_list[data_idx][16], &data_len, sizeof(int)); + + swap4(reinterpret_cast(&data_list[data_idx][0])); + swap4(reinterpret_cast(&data_list[data_idx][4])); + swap4(reinterpret_cast(&data_list[data_idx][8])); + swap4(reinterpret_cast(&data_list[data_idx][12])); + swap4(reinterpret_cast(&data_list[data_idx][16])); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } +})); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + return TINYEXR_SUCCESS; +} + +static int NumScanlines(int compression_type) { + int num_scanlines = 1; + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanlines = 16; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanlines = 32; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanlines = 16; + } + return num_scanlines; +} + +static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header, + const std::vector& channels, + int num_blocks, + tinyexr_uint64 chunk_offset, // starting offset of current chunk + bool is_multipart, + OffsetData& offset_data, // output block offsets, must be initialized + std::vector >& data_list, // output + tinyexr_uint64& total_size, // output: ending offset of current chunk + std::string* err) { + int num_scanlines = NumScanlines(exr_header->compression_type); + + data_list.resize(num_blocks); + + std::vector channel_offset_list( + static_cast(exr_header->num_channels)); + + int pixel_data_size = 0; + { + size_t channel_offset = 0; + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + channel_offset_list[c] = channel_offset; + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixel_data_size += sizeof(unsigned short); + channel_offset += sizeof(unsigned short); + } else if (channels[c].requested_pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { + pixel_data_size += sizeof(float); + channel_offset += sizeof(float); + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_UINT) { + pixel_data_size += sizeof(unsigned int); + channel_offset += sizeof(unsigned int); + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + const void* compression_param = 0; +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + + // Use ZFP compression parameter from custom attributes(if such a parameter + // exists) + { + std::string e; + bool ret = tinyexr::FindZFPCompressionParam( + &zfp_compression_param, exr_header->custom_attributes, + exr_header->num_custom_attributes, &e); + + if (!ret) { + // Use predefined compression parameter. + zfp_compression_param.type = 0; + zfp_compression_param.rate = 2; + } + compression_param = &zfp_compression_param; + } +#endif + + tinyexr_uint64 offset = chunk_offset; + tinyexr_uint64 doffset = is_multipart ? 4u : 0u; + + if (exr_image->tiles) { + const EXRImage* level_image = exr_image; + size_t block_idx = 0; + //tinyexr::tinyexr_uint64 block_data_size = 0; + int num_levels = (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data.num_x_levels : (offset_data.num_x_levels * offset_data.num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + if (!level_image) { + if (err) { + (*err) += "Invalid number of tiled levels for EncodeChunk\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + int level_index_from_image = LevelIndex(level_image->level_x, level_image->level_y, + exr_header->tile_level_mode, offset_data.num_x_levels); + if (level_index_from_image < 0) { + if (err) { + (*err) += "Invalid tile level mode\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (level_index_from_image != level_index) { + if (err) { + (*err) += "Incorrect level ordering in tiled image\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + int num_y_tiles = int(offset_data.offsets[level_index].size()); + if (num_y_tiles <= 0) { + if (err) { + (*err) += "Invalid Y tile size\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + int num_x_tiles = int(offset_data.offsets[level_index][0].size()); + if (num_x_tiles <= 0) { + if (err) { + (*err) += "Invalid X tile size\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + std::string e; + int ret = EncodeTiledLevel(level_image, + exr_header, + channels, + data_list, + block_idx, + num_x_tiles, + num_y_tiles, + channel_offset_list, + pixel_data_size, + compression_param, + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty() && err) { + (*err) += e; + } + return ret; + } + + for (size_t j = 0; j < static_cast(num_y_tiles); ++j) + for (size_t i = 0; i < static_cast(num_x_tiles); ++i) { + offset_data.offsets[level_index][j][i] = offset; + swap8(reinterpret_cast(&offset_data.offsets[level_index][j][i])); + offset += data_list[block_idx].size() + doffset; + //block_data_size += data_list[block_idx].size(); + ++block_idx; + } + level_image = level_image->next_level; + } + TINYEXR_CHECK_AND_RETURN_C(static_cast(block_idx) == num_blocks, TINYEXR_ERROR_INVALID_DATA); + total_size = offset; + } else { // scanlines + std::vector& offsets = offset_data.offsets[0][0]; + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); + std::vector workers; + std::atomic block_count(0); + + int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks); + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = block_count++) < num_blocks) { + +#else + bool invalid_data(false); +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_blocks; i++) { + +#endif + int start_y = num_scanlines * i; + int end_Y = (std::min)(num_scanlines * (i + 1), exr_image->height); + int num_lines = end_Y - start_y; + + const unsigned char* const* images = + static_cast(exr_image->images); + + data_list[i].resize(2*sizeof(int)); + size_t data_header_size = data_list[i].size(); + + bool ret = EncodePixelData(data_list[i], + images, + exr_header->compression_type, + 0, // increasing y + exr_image->width, + exr_image->height, + exr_image->width, + start_y, + num_lines, + pixel_data_size, + channels, + channel_offset_list, + err, + compression_param); + if (!ret) { + invalid_data = true; + continue; // "break" cannot be used with OpenMP + } + if (data_list[i].size() <= data_header_size) { + invalid_data = true; + continue; // "break" cannot be used with OpenMP + } + int data_len = static_cast(data_list[i].size() - data_header_size); + memcpy(&data_list[i][0], &start_y, sizeof(int)); + memcpy(&data_list[i][4], &data_len, sizeof(int)); + + swap4(reinterpret_cast(&data_list[i][0])); + swap4(reinterpret_cast(&data_list[i][4])); +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode scanline data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + for (size_t i = 0; i < static_cast(num_blocks); i++) { + offsets[i] = offset; + tinyexr::swap8(reinterpret_cast(&offsets[i])); + offset += data_list[i].size() + doffset; + } + + total_size = static_cast(offset); + } + return TINYEXR_SUCCESS; +} + +// can save a single or multi-part image (no deep* formats) +static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory_out == NULL) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + { + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_headers[i]->compression_type < 0) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } +#if !TINYEXR_USE_PIZ + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + SetErrorMessage("PIZ compression is not supported in this build", + err); + return 0; + } +#endif + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if !TINYEXR_USE_ZFP + SetErrorMessage("ZFP compression is not supported in this build", + err); + return 0; +#else + // All channels must be fp32. + // No fp16 support in ZFP atm(as of 2023 June) + // https://github.com/LLNL/fpzip/issues/2 + for (int c = 0; c < exr_headers[i]->num_channels; ++c) { + if (exr_headers[i]->requested_pixel_types[c] != TINYEXR_PIXELTYPE_FLOAT) { + SetErrorMessage("Pixel type must be FLOAT for ZFP compression", + err); + return 0; + } + } +#endif + } + } + } + + std::vector memory; + + // Header + { + const char header[] = { 0x76, 0x2f, 0x31, 0x01 }; + memory.insert(memory.end(), header, header + 4); + } + + // Version + // using value from the first header + int long_name = exr_headers[0]->long_name; + { + char marker[] = { 2, 0, 0, 0 }; + /* @todo + if (exr_header->non_image) { + marker[1] |= 0x8; + } + */ + // tiled + if (num_parts == 1 && exr_images[0].tiles) { + marker[1] |= 0x2; + } + // long_name + if (long_name) { + marker[1] |= 0x4; + } + // multipart + if (num_parts > 1) { + marker[1] |= 0x10; + } + memory.insert(memory.end(), marker, marker + 4); + } + + int total_chunk_count = 0; + std::vector chunk_count(num_parts); + std::vector offset_data(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + if (!exr_images[i].tiles) { + int num_scanlines = NumScanlines(exr_headers[i]->compression_type); + chunk_count[i] = + (exr_images[i].height + num_scanlines - 1) / num_scanlines; + InitSingleResolutionOffsets(offset_data[i], chunk_count[i]); + total_chunk_count += chunk_count[i]; + } else { + { + std::vector num_x_tiles, num_y_tiles; + if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) { + SetErrorMessage("Failed to precalculate Tile info", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles); + if (ntiles > 0) { + chunk_count[i] = ntiles; + } else { + SetErrorMessage("Failed to compute Tile offsets", + err); + return TINYEXR_ERROR_INVALID_DATA; + + } + total_chunk_count += chunk_count[i]; + } + } + } + // Write attributes to memory buffer. + std::vector< std::vector > channels(num_parts); + { + std::set partnames; + for (unsigned int i = 0; i < num_parts; ++i) { + //channels + { + std::vector data; + + for (int c = 0; c < exr_headers[i]->num_channels; c++) { + tinyexr::ChannelInfo info; + info.p_linear = 0; + info.pixel_type = exr_headers[i]->pixel_types[c]; + info.requested_pixel_type = exr_headers[i]->requested_pixel_types[c]; + info.x_sampling = 1; + info.y_sampling = 1; + info.name = std::string(exr_headers[i]->channels[c].name); + channels[i].push_back(info); + } + + tinyexr::WriteChannelInfo(data, channels[i]); + + tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), + static_cast(data.size())); + } + + { + int comp = exr_headers[i]->compression_type; + swap4(&comp); + WriteAttributeToMemory( + &memory, "compression", "compression", + reinterpret_cast(&comp), 1); + } + + { + int data[4] = { 0, 0, exr_images[i].width - 1, exr_images[i].height - 1 }; + swap4(&data[0]); + swap4(&data[1]); + swap4(&data[2]); + swap4(&data[3]); + WriteAttributeToMemory( + &memory, "dataWindow", "box2i", + reinterpret_cast(data), sizeof(int) * 4); + + int data0[4] = { 0, 0, exr_images[0].width - 1, exr_images[0].height - 1 }; + swap4(&data0[0]); + swap4(&data0[1]); + swap4(&data0[2]); + swap4(&data0[3]); + // Note: must be the same across parts (currently, using value from the first header) + WriteAttributeToMemory( + &memory, "displayWindow", "box2i", + reinterpret_cast(data0), sizeof(int) * 4); + } + + { + unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } + WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", + &line_order, 1); + } + + { + // Note: must be the same across parts + float aspectRatio = 1.0f; + swap4(&aspectRatio); + WriteAttributeToMemory( + &memory, "pixelAspectRatio", "float", + reinterpret_cast(&aspectRatio), sizeof(float)); + } + + { + float center[2] = { 0.0f, 0.0f }; + swap4(¢er[0]); + swap4(¢er[1]); + WriteAttributeToMemory( + &memory, "screenWindowCenter", "v2f", + reinterpret_cast(center), 2 * sizeof(float)); + } + + { + float w = 1.0f; + swap4(&w); + WriteAttributeToMemory(&memory, "screenWindowWidth", "float", + reinterpret_cast(&w), + sizeof(float)); + } + + if (exr_images[i].tiles) { + unsigned char tile_mode = static_cast(exr_headers[i]->tile_level_mode & 0x3); + if (exr_headers[i]->tile_rounding_mode) tile_mode |= (1u << 4u); + //unsigned char data[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + unsigned int datai[3] = { 0, 0, 0 }; + unsigned char* data = reinterpret_cast(&datai[0]); + datai[0] = static_cast(exr_headers[i]->tile_size_x); + datai[1] = static_cast(exr_headers[i]->tile_size_y); + data[8] = tile_mode; + swap4(reinterpret_cast(&data[0])); + swap4(reinterpret_cast(&data[4])); + WriteAttributeToMemory( + &memory, "tiles", "tiledesc", + reinterpret_cast(data), 9); + } + + // must be present for multi-part files - according to spec. + if (num_parts > 1) { + // name + { + size_t len = 0; + if ((len = strlen(exr_headers[i]->name)) > 0) { +#if TINYEXR_HAS_CXX11 + partnames.emplace(exr_headers[i]->name); +#else + partnames.insert(std::string(exr_headers[i]->name)); +#endif + if (partnames.size() != i + 1) { + SetErrorMessage("'name' attributes must be unique for a multi-part file", err); + return 0; + } + WriteAttributeToMemory( + &memory, "name", "string", + reinterpret_cast(exr_headers[i]->name), + static_cast(len)); + } else { + SetErrorMessage("Invalid 'name' attribute for a multi-part file", err); + return 0; + } + } + // type + { + const char* type = "scanlineimage"; + if (exr_images[i].tiles) type = "tiledimage"; + WriteAttributeToMemory( + &memory, "type", "string", + reinterpret_cast(type), + static_cast(strlen(type))); + } + // chunkCount + { + WriteAttributeToMemory( + &memory, "chunkCount", "int", + reinterpret_cast(&chunk_count[i]), + 4); + } + } + + // Custom attributes + if (exr_headers[i]->num_custom_attributes > 0) { + for (int j = 0; j < exr_headers[i]->num_custom_attributes; j++) { + tinyexr::WriteAttributeToMemory( + &memory, exr_headers[i]->custom_attributes[j].name, + exr_headers[i]->custom_attributes[j].type, + reinterpret_cast( + exr_headers[i]->custom_attributes[j].value), + exr_headers[i]->custom_attributes[j].size); + } + } + + { // end of header + memory.push_back(0); + } + } + } + if (num_parts > 1) { + // end of header list + memory.push_back(0); + } + + tinyexr_uint64 chunk_offset = memory.size() + size_t(total_chunk_count) * sizeof(tinyexr_uint64); + + tinyexr_uint64 total_size = 0; + std::vector< std::vector< std::vector > > data_lists(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + std::string e; + int ret = EncodeChunk(&exr_images[i], exr_headers[i], + channels[i], + chunk_count[i], + // starting offset of current chunk after part-number + chunk_offset, + num_parts > 1, + offset_data[i], // output: block offsets, must be initialized + data_lists[i], // output + total_size, // output + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return 0; + } + chunk_offset = total_size; + } + + // Allocating required memory + if (total_size == 0) { // something went wrong + tinyexr::SetErrorMessage("Output memory size is zero", err); + return TINYEXR_ERROR_INVALID_DATA; + } + (*memory_out) = static_cast(malloc(size_t(total_size))); + + // Writing header + memcpy((*memory_out), &memory[0], memory.size()); + unsigned char* memory_ptr = *memory_out + memory.size(); + size_t sum = memory.size(); + + // Writing offset data for chunks + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_images[i].tiles) { + const EXRImage* level_image = &exr_images[i]; + int num_levels = (exr_headers[i]->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data[i].num_x_levels : (offset_data[i].num_x_levels * offset_data[i].num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + for (size_t j = 0; j < offset_data[i].offsets[level_index].size(); ++j) { + size_t num_bytes = sizeof(tinyexr_uint64) * offset_data[i].offsets[level_index][j].size(); + sum += num_bytes; + if (sum > total_size) { + tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(memory_ptr, + reinterpret_cast(&offset_data[i].offsets[level_index][j][0]), + num_bytes); + memory_ptr += num_bytes; + } + level_image = level_image->next_level; + } + } else { + size_t num_bytes = sizeof(tinyexr::tinyexr_uint64) * static_cast(chunk_count[i]); + sum += num_bytes; + if (sum > total_size) { + tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + std::vector& offsets = offset_data[i].offsets[0][0]; + memcpy(memory_ptr, reinterpret_cast(&offsets[0]), num_bytes); + memory_ptr += num_bytes; + } + } + + // Writing chunk data + for (unsigned int i = 0; i < num_parts; ++i) { + for (size_t j = 0; j < static_cast(chunk_count[i]); ++j) { + if (num_parts > 1) { + sum += 4; + if (sum > total_size) { + tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + unsigned int part_number = i; + swap4(&part_number); + memcpy(memory_ptr, &part_number, 4); + memory_ptr += 4; + } + sum += data_lists[i][j].size(); + if (sum > total_size) { + tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size()); + memory_ptr += data_lists[i][j].size(); + } + } + + if (sum != total_size) { + tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + return size_t(total_size); // OK +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // tinyexr + +size_t SaveEXRImageToMemory(const EXRImage* exr_image, + const EXRHeader* exr_header, + unsigned char** memory_out, const char** err) { + return tinyexr::SaveEXRNPartImageToMemory(exr_image, &exr_header, 1, memory_out, err); +} + +int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL || filename == NULL || + exr_header->compression_type < 0) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + tinyexr::SetErrorMessage("PIZ compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + tinyexr::SetErrorMessage("ZFP compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang + errno_t errcode = + _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); + if (errcode != 0) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "wb"); +#endif +#else + fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); + if (mem_size == 0) { + fclose(fp); + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + +size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2 || + memory_out == NULL) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + return tinyexr::SaveEXRNPartImageToMemory(exr_images, exr_headers, num_parts, memory_out, err); +} + +int SaveEXRMultipartImageToFile(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + const char* filename, + const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. + errno_t errcode = + _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); + if (errcode != 0) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "wb"); +#endif +#else + fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRMultipartImageToMemory(exr_images, exr_headers, num_parts, &mem, err); + if (mem_size == 0) { + fclose(fp); + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + +int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { + if (deep_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + if (file.size == 0) { + tinyexr::SetErrorMessage("File size is zero : " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + const char *head = reinterpret_cast(file.data); + const char *marker = reinterpret_cast(file.data); + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + tinyexr::SetErrorMessage("Invalid magic number", err); + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + // Version, scanline. + { + // ver 2.0, scanline, deep bit on(0x800) + // must be [2, 0, 0, 0] + if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { + tinyexr::SetErrorMessage("Unsupported version or scanline", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + marker += 4; + } + + int dx = -1; + int dy = -1; + int dw = -1; + int dh = -1; + int num_scanline_blocks = 1; // 16 for ZIP compression. + int compression_type = -1; + int num_channels = -1; + std::vector channels; + + // Read attributes + size_t size = file.size - tinyexr::kEXRVersionSize; + for (;;) { + if (0 == size) { + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + marker++; + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + std::stringstream ss; + ss << "Failed to parse attribute\n"; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (attr_name.compare("compression") == 0) { + compression_type = data[0]; + if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { + std::stringstream ss; + ss << "Unsupported compression type : " << compression_type; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!tinyexr::ReadChannelInfo(channels, data)) { + tinyexr::SetErrorMessage("Failed to parse channel info", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + num_channels = static_cast(channels.size()); + + if (num_channels < 1) { + tinyexr::SetErrorMessage("Invalid channels format", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + } else if (attr_name.compare("dataWindow") == 0) { + memcpy(&dx, &data.at(0), sizeof(int)); + memcpy(&dy, &data.at(4), sizeof(int)); + memcpy(&dw, &data.at(8), sizeof(int)); + memcpy(&dh, &data.at(12), sizeof(int)); + tinyexr::swap4(&dx); + tinyexr::swap4(&dy); + tinyexr::swap4(&dw); + tinyexr::swap4(&dh); + + } else if (attr_name.compare("displayWindow") == 0) { + int x; + int y; + int w; + int h; + memcpy(&x, &data.at(0), sizeof(int)); + memcpy(&y, &data.at(4), sizeof(int)); + memcpy(&w, &data.at(8), sizeof(int)); + memcpy(&h, &data.at(12), sizeof(int)); + tinyexr::swap4(&x); + tinyexr::swap4(&y); + tinyexr::swap4(&w); + tinyexr::swap4(&h); + } + } + + TINYEXR_CHECK_AND_RETURN_C(dx >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dy >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dw >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dh >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(num_channels >= 1, TINYEXR_ERROR_INVALID_DATA); + + int data_width = dw - dx + 1; + int data_height = dh - dy + 1; + + // Read offset tables. + int num_blocks = data_height / num_scanline_blocks; + if (num_blocks * num_scanline_blocks < data_height) { + num_blocks++; + } + + std::vector offsets(static_cast(num_blocks)); + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + tinyexr::tinyexr_int64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&offset)); + marker += sizeof(tinyexr::tinyexr_int64); // = 8 + offsets[y] = offset; + } + +#if TINYEXR_USE_PIZ + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || + (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { +#else + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#endif + // OK + } else { + tinyexr::SetErrorMessage("Unsupported compression format", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + deep_image->image = static_cast( + malloc(sizeof(float **) * static_cast(num_channels))); + for (int c = 0; c < num_channels; c++) { + deep_image->image[c] = static_cast( + malloc(sizeof(float *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + } + } + + deep_image->offset_table = static_cast( + malloc(sizeof(int *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + deep_image->offset_table[y] = static_cast( + malloc(sizeof(int) * static_cast(data_width))); + } + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y]); + + // int: y coordinate + // int64: packed size of pixel offset table + // int64: packed size of sample data + // int64: unpacked size of sample data + // compressed pixel offset table + // compressed sample data + int line_no; + tinyexr::tinyexr_int64 packedOffsetTableSize; + tinyexr::tinyexr_int64 packedSampleDataSize; + tinyexr::tinyexr_int64 unpackedSampleDataSize; + memcpy(&line_no, data_ptr, sizeof(int)); + memcpy(&packedOffsetTableSize, data_ptr + 4, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&packedSampleDataSize, data_ptr + 12, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&unpackedSampleDataSize, data_ptr + 20, + sizeof(tinyexr::tinyexr_int64)); + + tinyexr::swap4(&line_no); + tinyexr::swap8( + reinterpret_cast(&packedOffsetTableSize)); + tinyexr::swap8( + reinterpret_cast(&packedSampleDataSize)); + tinyexr::swap8( + reinterpret_cast(&unpackedSampleDataSize)); + + std::vector pixelOffsetTable(static_cast(data_width)); + + // decode pixel offset table. + { + unsigned long dstLen = + static_cast(pixelOffsetTable.size() * sizeof(int)); + if (!tinyexr::DecompressZip( + reinterpret_cast(&pixelOffsetTable.at(0)), + &dstLen, data_ptr + 28, + static_cast(packedOffsetTableSize))) { + return false; + } + + TINYEXR_CHECK_AND_RETURN_C(dstLen == pixelOffsetTable.size() * sizeof(int), TINYEXR_ERROR_INVALID_DATA); + for (size_t i = 0; i < static_cast(data_width); i++) { + deep_image->offset_table[y][i] = pixelOffsetTable[i]; + } + } + + std::vector sample_data( + static_cast(unpackedSampleDataSize)); + + // decode sample data. + { + unsigned long dstLen = static_cast(unpackedSampleDataSize); + if (dstLen) { + if (!tinyexr::DecompressZip( + reinterpret_cast(&sample_data.at(0)), &dstLen, + data_ptr + 28 + packedOffsetTableSize, + static_cast(packedSampleDataSize))) { + return false; + } + TINYEXR_CHECK_AND_RETURN_C(dstLen == static_cast(unpackedSampleDataSize), TINYEXR_ERROR_INVALID_DATA); + } + } + + // decode sample + int sampleSize = -1; + std::vector channel_offset_list(static_cast(num_channels)); + { + int channel_offset = 0; + for (size_t i = 0; i < static_cast(num_channels); i++) { + channel_offset_list[i] = channel_offset; + if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT + channel_offset += 4; + } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half + channel_offset += 2; + } else if (channels[i].pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { // float + channel_offset += 4; + } else { + tinyexr::SetErrorMessage("Invalid pixel_type in chnnels.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + sampleSize = channel_offset; + } + TINYEXR_CHECK_AND_RETURN_C(sampleSize >= 2, TINYEXR_ERROR_INVALID_DATA); + + TINYEXR_CHECK_AND_RETURN_C(static_cast( + pixelOffsetTable[static_cast(data_width - 1)] * + sampleSize) == sample_data.size(), TINYEXR_ERROR_INVALID_DATA); + int samples_per_line = static_cast(sample_data.size()) / sampleSize; + + // + // Alloc memory + // + + // + // pixel data is stored as image[channels][pixel_samples] + // + { + tinyexr::tinyexr_uint64 data_offset = 0; + for (size_t c = 0; c < static_cast(num_channels); c++) { + deep_image->image[c][y] = static_cast( + malloc(sizeof(float) * static_cast(samples_per_line))); + + if (channels[c].pixel_type == 0) { // UINT + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + unsigned int ui; + unsigned int *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(int))); + tinyexr::cpy4(&ui, src_ptr); + deep_image->image[c][y][x] = static_cast(ui); // @fixme + } + data_offset += + sizeof(unsigned int) * static_cast(samples_per_line); + } else if (channels[c].pixel_type == 1) { // half + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + tinyexr::FP16 f16; + const unsigned short *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(short))); + tinyexr::cpy2(&(f16.u), src_ptr); + tinyexr::FP32 f32 = half_to_float(f16); + deep_image->image[c][y][x] = f32.f; + } + data_offset += sizeof(short) * static_cast(samples_per_line); + } else { // float + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + float f; + const float *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(float))); + tinyexr::cpy4(&f, src_ptr); + deep_image->image[c][y][x] = f; + } + data_offset += sizeof(float) * static_cast(samples_per_line); + } + } + } + } // y + + deep_image->width = data_width; + deep_image->height = data_height; + + deep_image->channel_names = static_cast( + malloc(sizeof(const char *) * static_cast(num_channels))); + for (size_t c = 0; c < static_cast(num_channels); c++) { +#ifdef _WIN32 + deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); +#else + deep_image->channel_names[c] = strdup(channels[c].name.c_str()); +#endif + } + deep_image->num_channels = num_channels; + + return TINYEXR_SUCCESS; +} + +void InitEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return; + } + + exr_image->width = 0; + exr_image->height = 0; + exr_image->num_channels = 0; + + exr_image->images = NULL; + exr_image->tiles = NULL; + exr_image->next_level = NULL; + exr_image->level_x = 0; + exr_image->level_y = 0; + + exr_image->num_tiles = 0; +} + +void FreeEXRErrorMessage(const char *msg) { + if (msg) { + free(reinterpret_cast(const_cast(msg))); + } + return; +} + +void InitEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return; + } + + memset(exr_header, 0, sizeof(EXRHeader)); +} + +int FreeEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->channels) { + free(exr_header->channels); + } + + if (exr_header->pixel_types) { + free(exr_header->pixel_types); + } + + if (exr_header->requested_pixel_types) { + free(exr_header->requested_pixel_types); + } + + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + if (exr_header->custom_attributes[i].value) { + free(exr_header->custom_attributes[i].value); + } + } + + if (exr_header->custom_attributes) { + free(exr_header->custom_attributes); + } + + EXRSetNameAttr(exr_header, NULL); + + return TINYEXR_SUCCESS; +} + +void EXRSetNameAttr(EXRHeader* exr_header, const char* name) { + if (exr_header == NULL) { + return; + } + memset(exr_header->name, 0, 256); + if (name != NULL) { + size_t len = std::min(strlen(name), size_t(255)); + if (len) { + memcpy(exr_header->name, name, len); + } + } +} + +int EXRNumLevels(const EXRImage* exr_image) { + if (exr_image == NULL) return 0; + if(exr_image->images) return 1; // scanlines + int levels = 1; + const EXRImage* level_image = exr_image; + while((level_image = level_image->next_level)) ++levels; + return levels; +} + +int FreeEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_image->next_level) { + FreeEXRImage(exr_image->next_level); + delete exr_image->next_level; + } + + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->images && exr_image->images[i]) { + free(exr_image->images[i]); + } + } + + if (exr_image->images) { + free(exr_image->images); + } + + if (exr_image->tiles) { + for (int tid = 0; tid < exr_image->num_tiles; tid++) { + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { + free(exr_image->tiles[tid].images[i]); + } + } + if (exr_image->tiles[tid].images) { + free(exr_image->tiles[tid].images); + } + } + free(exr_image->tiles); + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_header == NULL || exr_version == NULL || filename == NULL) { + tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return ParseEXRHeaderFromMemory(exr_header, exr_version, file.data, file.size, + err); +} + +int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, + int *num_headers, + const EXRVersion *exr_version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_headers == NULL || num_headers == NULL || + exr_version == NULL) { + // Invalid argument + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Data size too short", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + std::vector infos; + + for (;;) { + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + bool empty_header = false; + int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, + marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + + // Free malloc-allocated memory here. + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + + tinyexr::SetErrorMessage(err_str, err); + return ret; + } + + if (empty_header) { + marker += 1; // skip '\0' + break; + } + + // `chunkCount` must exist in the header. + if (info.chunk_count == 0) { + + // Free malloc-allocated memory here. + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + + tinyexr::SetErrorMessage( + "`chunkCount' attribute is not found in the header.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + infos.push_back(info); + + // move to next header. + marker += info.header_len; + size -= info.header_len; + } + + // allocate memory for EXRHeader and create array of EXRHeader pointers. + (*exr_headers) = + static_cast(malloc(sizeof(EXRHeader *) * infos.size())); + + + int retcode = TINYEXR_SUCCESS; + + for (size_t i = 0; i < infos.size(); i++) { + EXRHeader *exr_header = static_cast(malloc(sizeof(EXRHeader))); + memset(exr_header, 0, sizeof(EXRHeader)); + + std::string warn; + std::string _err; + if (!ConvertHeader(exr_header, infos[i], &warn, &_err)) { + + // Free malloc-allocated memory here. + for (size_t k = 0; k < infos[i].attributes.size(); k++) { + if (infos[i].attributes[k].value) { + free(infos[i].attributes[k].value); + } + } + + if (!_err.empty()) { + tinyexr::SetErrorMessage( + _err, err); + } + // continue to converting headers + retcode = TINYEXR_ERROR_INVALID_HEADER; + } + + exr_header->multipart = exr_version->multipart ? 1 : 0; + + (*exr_headers)[i] = exr_header; + } + + (*num_headers) = static_cast(infos.size()); + + return retcode; +} + +int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, + const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || + filename == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromFile()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return ParseEXRMultipartHeaderFromMemory( + exr_headers, num_headers, exr_version, file.data, file.size, err); +} + +int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, + size_t size) { + if (version == NULL || memory == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + version->tiled = false; + version->long_name = false; + version->non_image = false; + version->multipart = false; + + // Parse version header. + { + // must be 2 + if (marker[0] != 2) { + return TINYEXR_ERROR_INVALID_EXR_VERSION; + } + + if (version == NULL) { + return TINYEXR_SUCCESS; // May OK + } + + version->version = 2; + + if (marker[1] & 0x2) { // 9th bit + version->tiled = true; + } + if (marker[1] & 0x4) { // 10th bit + version->long_name = true; + } + if (marker[1] & 0x8) { // 11th bit + version->non_image = true; // (deep image) + } + if (marker[1] & 0x10) { // 12th bit + version->multipart = true; + } + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { + if (filename == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. + errno_t err = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); + if (err != 0) { + // TODO(syoyo): return wfopen_s erro code + return TINYEXR_ERROR_CANT_OPEN_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "rb"); +#endif +#else + fp = fopen(filename, "rb"); +#endif + if (!fp) { + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + // Try to read kEXRVersionSize bytes; if the file is shorter than + // kEXRVersionSize, this will produce an error. This avoids a call to + // fseek(fp, 0, SEEK_END), which is not required to be supported by C + // implementations. + unsigned char buf[tinyexr::kEXRVersionSize]; + size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); + fclose(fp); + + if (ret != tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); +} + +int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromMemory()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + // compute total header size. + size_t total_header_size = 0; + for (unsigned int i = 0; i < num_parts; i++) { + if (exr_headers[i]->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + total_header_size += exr_headers[i]->header_len; + } + + const char *marker = reinterpret_cast( + memory + total_header_size + 4 + + 4); // +8 for magic number and version header. + + marker += 1; // Skip empty header. + + // NOTE 1: + // In multipart image, There is 'part number' before chunk data. + // 4 byte : part number + // 4+ : chunk + // + // NOTE 2: + // EXR spec says 'part number' is 'unsigned long' but actually this is + // 'unsigned int(4 bytes)' in OpenEXR implementation... + // http://www.openexr.com/openexrfilelayout.pdf + + // Load chunk offset table. + std::vector chunk_offset_table_list; + chunk_offset_table_list.reserve(num_parts); + for (size_t i = 0; i < static_cast(num_parts); i++) { + chunk_offset_table_list.resize(chunk_offset_table_list.size() + 1); + tinyexr::OffsetData& offset_data = chunk_offset_table_list.back(); + if (!exr_headers[i]->tiled || exr_headers[i]->tile_level_mode == TINYEXR_TILE_ONE_LEVEL) { + tinyexr::InitSingleResolutionOffsets(offset_data, size_t(exr_headers[i]->chunk_count)); + std::vector& offset_table = offset_data.offsets[0][0]; + + for (size_t c = 0; c < offset_table.size(); c++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, 8); + tinyexr::swap8(&offset); + + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + + offset_table[c] = offset + 4; // +4 to skip 'part number' + marker += 8; + } + } else { + { + std::vector num_x_tiles, num_y_tiles; + if (!tinyexr::PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) { + tinyexr::SetErrorMessage("Invalid tile info.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + int num_blocks = InitTileOffsets(offset_data, exr_headers[i], num_x_tiles, num_y_tiles); + if (num_blocks != exr_headers[i]->chunk_count) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + offset_data.offsets[l][dy][dx] = offset + 4; // +4 to skip 'part number' + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + } + } + } + } + } + + // Decode image. + for (size_t i = 0; i < static_cast(num_parts); i++) { + tinyexr::OffsetData &offset_data = chunk_offset_table_list[i]; + + // First check 'part number' is identical to 'i' + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + + const unsigned char *part_number_addr = + memory + offset_data.offsets[l][dy][dx] - 4; // -4 to move to 'part number' field. + unsigned int part_no; + memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 + tinyexr::swap4(&part_no); + + if (part_no != i) { + tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + std::string e; + int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_data, + memory, size, &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return ret; + } + } + + return TINYEXR_SUCCESS; +} + +int LoadEXRMultipartImageFromFile(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, const char *filename, + const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, + file.data, file.size, err); +} + +int SaveEXRToMemory(const float *data, int width, int height, int components, + const int save_as_fp16, unsigned char **outbuf, const char **err) { + + if ((components == 1) || components == 3 || components == 4) { + // OK + } else { + std::stringstream ss; + ss << "Unsupported component value : " << components << std::endl; + + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRHeader header; + InitEXRHeader(&header); + + if ((width < 16) && (height < 16)) { + // No compression for small image. + header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + } else { + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + } + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector images[4]; + + if (components == 1) { + images[0].resize(static_cast(width * height)); + memcpy(images[0].data(), data, sizeof(float) * size_t(width * height)); + } else { + images[0].resize(static_cast(width * height)); + images[1].resize(static_cast(width * height)); + images[2].resize(static_cast(width * height)); + images[3].resize(static_cast(width * height)); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < static_cast(width * height); i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast(components) * i + 3]; + } + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else if (components == 3) { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } else if (components == 1) { + image_ptr[0] = &(images[0].at(0)); // A + } + + image.images = reinterpret_cast(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); + strncpy_s(header.channels[1].name, "B", 255); + strncpy_s(header.channels[2].name, "G", 255); + strncpy_s(header.channels[3].name, "R", 255); +#else + strncpy(header.channels[0].name, "A", 255); + strncpy(header.channels[1].name, "B", 255); + strncpy(header.channels[2].name, "G", 255); + strncpy(header.channels[3].name, "R", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + header.channels[1].name[strlen("B")] = '\0'; + header.channels[2].name[strlen("G")] = '\0'; + header.channels[3].name[strlen("R")] = '\0'; + } else if (components == 3) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "B", 255); + strncpy_s(header.channels[1].name, "G", 255); + strncpy_s(header.channels[2].name, "R", 255); +#else + strncpy(header.channels[0].name, "B", 255); + strncpy(header.channels[1].name, "G", 255); + strncpy(header.channels[2].name, "R", 255); +#endif + header.channels[0].name[strlen("B")] = '\0'; + header.channels[1].name[strlen("G")] = '\0'; + header.channels[2].name[strlen("R")] = '\0'; + } else { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); +#else + strncpy(header.channels[0].name, "A", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + } + + header.pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + header.requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + + if (save_as_fp16 > 0) { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format + } else { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. + // no precision reduction) + } + } + + + unsigned char *mem_buf; + size_t mem_size = SaveEXRImageToMemory(&image, &header, &mem_buf, err); + + if (mem_size == 0) { + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + if (mem_size > size_t(std::numeric_limits::max())) { + free(mem_buf); + return TINYEXR_ERROR_DATA_TOO_LARGE; + } + + (*outbuf) = mem_buf; + + return int(mem_size); +} + +int SaveEXR(const float *data, int width, int height, int components, + const int save_as_fp16, const char *outfilename, const char **err) { + if ((components == 1) || components == 3 || components == 4) { + // OK + } else { + std::stringstream ss; + ss << "Unsupported component value : " << components << std::endl; + + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRHeader header; + InitEXRHeader(&header); + + if ((width < 16) && (height < 16)) { + // No compression for small image. + header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + } else { + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + } + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector images[4]; + const size_t pixel_count = + static_cast(width) * static_cast(height); + + if (components == 1) { + images[0].resize(pixel_count); + memcpy(images[0].data(), data, sizeof(float) * pixel_count); + } else { + images[0].resize(pixel_count); + images[1].resize(pixel_count); + images[2].resize(pixel_count); + images[3].resize(pixel_count); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < pixel_count; i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast(components) * i + 3]; + } + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else if (components == 3) { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } else if (components == 1) { + image_ptr[0] = &(images[0].at(0)); // A + } + + image.images = reinterpret_cast(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); + strncpy_s(header.channels[1].name, "B", 255); + strncpy_s(header.channels[2].name, "G", 255); + strncpy_s(header.channels[3].name, "R", 255); +#else + strncpy(header.channels[0].name, "A", 255); + strncpy(header.channels[1].name, "B", 255); + strncpy(header.channels[2].name, "G", 255); + strncpy(header.channels[3].name, "R", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + header.channels[1].name[strlen("B")] = '\0'; + header.channels[2].name[strlen("G")] = '\0'; + header.channels[3].name[strlen("R")] = '\0'; + } else if (components == 3) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "B", 255); + strncpy_s(header.channels[1].name, "G", 255); + strncpy_s(header.channels[2].name, "R", 255); +#else + strncpy(header.channels[0].name, "B", 255); + strncpy(header.channels[1].name, "G", 255); + strncpy(header.channels[2].name, "R", 255); +#endif + header.channels[0].name[strlen("B")] = '\0'; + header.channels[1].name[strlen("G")] = '\0'; + header.channels[2].name[strlen("R")] = '\0'; + } else { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); +#else + strncpy(header.channels[0].name, "A", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + } + + header.pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + header.requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + + if (save_as_fp16 > 0) { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format + } else { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. + // no precision reduction) + } + } + + int ret = SaveEXRImageToFile(&image, &header, outfilename, err); + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return ret; +} + +#ifdef __clang__ +// zero-as-null-pointer-constant +#pragma clang diagnostic pop +#endif + +#endif // TINYEXR_IMPLEMENTATION_DEFINED +#endif // TINYEXR_IMPLEMENTATION diff --git a/include/bsppp/LumpData.h b/include/bsppp/LumpData.h index df7d21986..20af29078 100644 --- a/include/bsppp/LumpData.h +++ b/include/bsppp/LumpData.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace bsppp { diff --git a/include/dmxpp/structs/Value.h b/include/dmxpp/structs/Value.h index 31c69374b..3aab46e43 100644 --- a/include/dmxpp/structs/Value.h +++ b/include/dmxpp/structs/Value.h @@ -1,19 +1,14 @@ #pragma once #include -#include #include #include #include #include -#include #include #include -#include -#include -#include -#include +#include namespace dmxpp { @@ -49,7 +44,7 @@ using EulerAngles = sourcepp::math::EulerAngles; using Quaternion = sourcepp::math::Quat; -using Matrix4x4 = sourcepp::math::Matrix<4,4>; +using Matrix4x4 = sourcepp::math::Mat4x4f; using Generic = std::variant< Invalid, @@ -148,33 +143,20 @@ std::string IDToString(ID id); // NOLINTNEXTLINE(*-no-recursion) constexpr ID stringToID(std::string_view id) { - if (id == "element") { - return ID::ELEMENT; - } else if (id == "int") { - return ID::INT; - } else if (id == "float") { - return ID::FLOAT; - } else if (id == "bool") { - return ID::BOOL; - } else if (id == "string") { - return ID::STRING; - } else if (id == "binary") { - return ID::BYTEARRAY; - } else if (id == "time") { - return ID::TIME; - } else if (id == "color") { - return ID::COLOR; - } else if (id == "vector2") { - return ID::VECTOR2; - } else if (id == "vector3") { - return ID::VECTOR3; - } else if (id == "vector4") { - return ID::VECTOR4; - } else if (id == "quaternion") { - return ID::QUATERNION; - } else if (id == "matrix") { - return ID::MATRIX_4X4; - } else if (id.ends_with("_array")) { + if (id == "element") return ID::ELEMENT; + if (id == "int") return ID::INT; + if (id == "float") return ID::FLOAT; + if (id == "bool") return ID::BOOL; + if (id == "string") return ID::STRING; + if (id == "binary") return ID::BYTEARRAY; + if (id == "time") return ID::TIME; + if (id == "color") return ID::COLOR; + if (id == "vector2") return ID::VECTOR2; + if (id == "vector3") return ID::VECTOR3; + if (id == "vector4") return ID::VECTOR4; + if (id == "quaternion") return ID::QUATERNION; + if (id == "matrix") return ID::MATRIX_4X4; + if (id.ends_with("_array")) { return innerIDToArrayID(stringToID(id.substr(0, id.length() - 6))); } return ID::INVALID; diff --git a/include/gamepp/gamepp.h b/include/gamepp/gamepp.h index f8b57e232..f2e12e869 100644 --- a/include/gamepp/gamepp.h +++ b/include/gamepp/gamepp.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace gamepp { diff --git a/include/kvpp/KV1.h b/include/kvpp/KV1.h index eb2ff3567..378bd45fe 100644 --- a/include/kvpp/KV1.h +++ b/include/kvpp/KV1.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include diff --git a/include/mdlpp/structs/Generic.h b/include/mdlpp/structs/Generic.h index 6b6bf4979..c8f5c127e 100644 --- a/include/mdlpp/structs/Generic.h +++ b/include/mdlpp/structs/Generic.h @@ -2,8 +2,8 @@ #include -#include #include +#include namespace mdlpp { diff --git a/include/mdlpp/structs/MDL.h b/include/mdlpp/structs/MDL.h index f3066a4b1..cbad48d95 100644 --- a/include/mdlpp/structs/MDL.h +++ b/include/mdlpp/structs/MDL.h @@ -6,10 +6,6 @@ #include #include -#include -#include -#include - #include "Generic.h" namespace mdlpp::MDL { @@ -24,13 +20,13 @@ struct Bone { std::string name; int32_t parent; - int32_t boneController[6]; + std::array boneController; sourcepp::math::Vec3f position; sourcepp::math::Quat rotationQuat; sourcepp::math::Vec3f rotationEuler; sourcepp::math::Vec3f positionScale; sourcepp::math::Vec3f rotationScale; - sourcepp::math::Matrix<3,4> poseToBose; + sourcepp::math::Mat3x4f poseToBose; sourcepp::math::Quat alignment; Flags flags; int32_t procType; diff --git a/include/sourcepp/Math.h b/include/sourcepp/Math.h new file mode 100644 index 000000000..aedcf157a --- /dev/null +++ b/include/sourcepp/Math.h @@ -0,0 +1,423 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +// Numeric types are intentionally outside the sourcepp namespace +using std::int8_t; +using std::int16_t; +using std::int32_t; +using std::int64_t; +using std::uint8_t; +using std::uint16_t; +using std::uint32_t; +using std::uint64_t; +using half_float::half; + +namespace sourcepp::math { + +template +concept Arithmetic = std::is_arithmetic_v || std::same_as; + +template +[[nodiscard]] constexpr T remap(T value, T l1, T h1, T l2, T h2) { + return l2 + (value - l1) * (h2 - l2) / (h1 - l1); +} + +template +[[nodiscard]] constexpr T remap(T value, T h1, T h2) { + return value * h2 / h1; +} + +[[nodiscard]] constexpr bool isPowerOf2(std::integral auto n) { + return n && !(n & (n - 1)); +} + +template +[[nodiscard]] constexpr T nearestPowerOf2(T n) { + if (isPowerOf2(n)) { + return n; + } + auto bigger = std::bit_ceil(n); + auto smaller = std::bit_floor(n); + return (n - smaller) < (bigger - n) ? smaller : bigger; +} + +[[nodiscard]] constexpr uint16_t paddingForAlignment(uint16_t alignment, uint64_t n) { + if (const auto rest = n % alignment; rest > 0) { + return alignment - rest; + } + return 0; +} + +template +struct Vec { + static_assert(S >= 2, "Vectors must have at least two values!"); + + P values[S]; + + // By defining these constructors, the type becomes nontrivial... +#if 1 + constexpr Vec() = default; + + template... Vals> + requires (sizeof...(Vals) == S) + constexpr Vec(Vals... vals) // NOLINT(*-explicit-constructor) + : values{static_cast

(vals)...} {} +#endif + + using value_type = P; + + [[nodiscard]] constexpr const P* data() const { + return this->values; + } + + [[nodiscard]] constexpr P* data() { + return this->values; + } + + [[nodiscard]] constexpr uint8_t size() const { + return S; + } + + [[nodiscard]] constexpr P& operator[](uint8_t index) { + if (index < S) { + return this->values[index]; + } + return this->operator[](index % S); + } + + [[nodiscard]] constexpr P operator[](uint8_t index) const { + if (index < S) { + return this->values[index]; + } + return this->operator[](index % S); + } + + [[nodiscard]] constexpr Vec operator+() const { + return *this; + } + + template + [[nodiscard]] constexpr Vec operator+(const Vec& other) const { + auto out = *this; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + out[i] += static_cast

(other[i]); + } + return out; + } + + template + constexpr void operator+=(const Vec& other) { + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + (*this)[i] += static_cast

(other[i]); + } + } + + [[nodiscard]] constexpr Vec operator-() const { + auto out = *this; + for (uint8_t i = 0; i < S; i++) { + out[i] *= -1; + } + return out; + } + + template + [[nodiscard]] constexpr Vec operator-(const Vec& other) const { + auto out = *this; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + out[i] -= static_cast

(other[i]); + } + return out; + } + + template + constexpr void operator-=(const Vec& other) { + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + (*this)[i] -= static_cast

(other[i]); + } + } + + [[nodiscard]] constexpr Vec operator*(Arithmetic auto scalar) const { + auto out = *this; + for (uint8_t i = 0; i < S; i++) { + out[i] *= static_cast

(scalar); + } + return out; + } + + constexpr void operator*=(Arithmetic auto scalar) { + for (uint8_t i = 0; i < S; i++) { + (*this)[i] *= static_cast

(scalar); + } + } + + [[nodiscard]] constexpr Vec operator/(Arithmetic auto scalar) const { + auto out = *this; + for (uint8_t i = 0; i < S; i++) { + out[i] /= static_cast

(scalar); + } + return out; + } + + constexpr void operator/=(Arithmetic auto scalar) { + for (uint8_t i = 0; i < S; i++) { + (*this)[i] /= static_cast

(scalar); + } + } + + [[nodiscard]] constexpr Vec operator%(Arithmetic auto scalar) const { + auto out = *this; + for (uint8_t i = 0; i < S; i++) { + out[i] %= static_cast

(scalar); + } + return out; + } + + constexpr void operator%=(Arithmetic auto scalar) { + for (uint8_t i = 0; i < S; i++) { + (*this)[i] %= static_cast

(scalar); + } + } + + template + [[nodiscard]] constexpr bool operator==(const Vec& other) const { + if constexpr (S != SO) { + return false; + } else { + for (uint8_t i = 0; i < S; i++) { + if ((*this)[i] != static_cast

(other[i])) { + return false; + } + } + return true; + } + } + + template + [[nodiscard]] constexpr Vec to() const { + Vec out{}; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + out[i] = static_cast((*this)[i]); + } + return out; + } + + template + [[nodiscard]] constexpr Vec mul(const Vec& other) const { + auto out = *this; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + out[i] *= static_cast

(other[i]); + } + return out; + } + + template + [[nodiscard]] constexpr Vec div(const Vec& other) const { + auto out = *this; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + out[i] /= static_cast

(other[i]); + } + return out; + } + + template + [[nodiscard]] constexpr Vec mod(const Vec& other) const { + auto out = *this; + for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { + if constexpr ((std::floating_point

&& std::floating_point) || std::floating_point

) { + out[i] = std::fmod(out[i], static_cast

(other[i])); + } else { + out[i] %= static_cast

(other[i]); + } + } + return out; + } + + [[nodiscard]] constexpr float magf() const { + float out = 0.0; + for (uint8_t i = 0; i < S; i++) { + out += std::pow((*this)[i], 2); + } + return std::sqrt(out); + } + + [[nodiscard]] constexpr double mag() const { + double out = 0.0; + for (uint8_t i = 0; i < S; i++) { + out += std::pow((*this)[i], 2); + } + return std::sqrt(out); + } + + [[nodiscard]] constexpr P sum() const { + P out{}; + for (uint8_t i = 0; i < S; i++) { + out += (*this)[i]; + } + return out; + } + + template + [[nodiscard]] constexpr Vec scale(const Vec& other) const { + Vec out; + for (uint8_t i = 0; i < S; i++) { + out[i] = (*this)[i] * static_cast

(other[i]); + } + return out; + } + + template + [[nodiscard]] constexpr P dot(const Vec& other) const { + return this->scale(other).sum(); + } + + [[nodiscard]] constexpr Vec abs() const { + auto out = *this; + for (uint8_t i = 0; i < S; i++) { + out[i] = std::abs(out[i]); + } + return out; + } + + [[nodiscard]] static constexpr Vec zero() { + return {}; + } + + [[nodiscard]] constexpr bool isZero() const { + return *this == zero(); + } +}; +static_assert(std::is_trivially_copyable_v>); + +#define SOURCEPP_VEC_DEFINE(S) \ + template \ + using Vec##S = Vec; \ + using Vec##S##i8 = Vec##S; \ + using Vec##S##i16 = Vec##S; \ + using Vec##S##i32 = Vec##S; \ + using Vec##S##i64 = Vec##S; \ + using Vec##S##i = Vec##S##i32; \ + using Vec##S##ui8 = Vec##S; \ + using Vec##S##ui16 = Vec##S; \ + using Vec##S##ui32 = Vec##S; \ + using Vec##S##ui64 = Vec##S; \ + using Vec##S##ui = Vec##S##ui32; \ + using Vec##S##f16 = Vec##S; \ + using Vec##S##f32 = Vec##S; \ + using Vec##S##f64 = Vec##S; \ + using Vec##S##f = Vec##S##f32 + +SOURCEPP_VEC_DEFINE(2); +SOURCEPP_VEC_DEFINE(3); +SOURCEPP_VEC_DEFINE(4); + +#undef SOURCEPP_VEC_DEFINE + +using EulerAngles = Vec3f; + +using Quat = Vec4f; + +/// Lower precision Quat compressed to 6 bytes +struct QuatCompressed48 { + uint16_t x : 16; + uint16_t y : 16; + uint16_t z : 15; + uint16_t wn : 1; + + [[nodiscard]] Quat decompress() const { + // Convert from 16-bit (or 15-bit) integers to floating point values in the range [-1, 1] + const float fx = (static_cast(this->x) / 32767.5f) - 1.f; // x / ((2^16 - 1) / 2) - 1 + const float fy = (static_cast(this->y) / 32767.5f) - 1.f; // y / ((2^16 - 1) / 2) - 1 + const float fz = (static_cast(this->z) / 16383.5f) - 1.f; // z / ((2^15 - 1) / 2) - 1 + + // Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1 + float fw = std::sqrt(1.f - fx * fx - fy * fy - fz * fz); + + // Adjust w based on the stored sign bit + if (this->wn) { + fw = -fw; + } + + return {fx, fy, fz, fw}; + } +}; +static_assert(std::is_trivially_copyable_v); + +/// Lower precision Quat compressed to 8 bytes +struct QuatCompressed64 { + uint32_t x : 21; + uint32_t y : 21; + uint32_t z : 21; + uint32_t wn : 1; + + [[nodiscard]] Quat decompress() const { + // Convert from 21-bit integers to floating point values in the range [-1, 1] + const double fx = (static_cast(this->x) / 1048575.5) - 1.0f; // x / ((2^21 - 1) / 2) - 1 + const double fy = (static_cast(this->y) / 1048575.5) - 1.0f; // y / ((2^21 - 1) / 2) - 1 + const double fz = (static_cast(this->z) / 1048575.5) - 1.0f; // z / ((2^21 - 1) / 2) - 1 + + // Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1 + double fw = std::sqrt(1.0 - fx * fx - fy * fy - fz * fz); + + // Adjust w based on the stored sign bit + if (this->wn) { + fw = -fw; + } + + return {static_cast(fx), static_cast(fy), static_cast(fz), static_cast(fw)}; + } +}; +static_assert(std::is_trivially_copyable_v); + +template +class Mat { + static_assert(M >= 2, "Matrices must have at least two rows!"); + static_assert(N >= 2, "Matrices must have at least two columns!"); + +public: + [[nodiscard]] P* operator[](uint8_t i) { return this->data[i]; } + + [[nodiscard]] const P* operator[](uint8_t i) const { return this->data[i]; } + +private: + P data[M][N]; +}; +static_assert(std::is_trivially_copyable_v>); + +#define SOURCEPP_MAT_DEFINE(M, N) \ + template \ + using Mat##M##x##N = Mat; \ + using Mat##M##x##N##i8 = Mat##M##x##N; \ + using Mat##M##x##N##i16 = Mat##M##x##N; \ + using Mat##M##x##N##i32 = Mat##M##x##N; \ + using Mat##M##x##N##i64 = Mat##M##x##N; \ + using Mat##M##x##N##i = Mat##M##x##N##i32; \ + using Mat##M##x##N##ui8 = Mat##M##x##N; \ + using Mat##M##x##N##ui16 = Mat##M##x##N; \ + using Mat##M##x##N##ui32 = Mat##M##x##N; \ + using Mat##M##x##N##ui64 = Mat##M##x##N; \ + using Mat##M##x##N##ui = Mat##M##x##N##ui32; \ + using Mat##M##x##N##f16 = Mat##M##x##N; \ + using Mat##M##x##N##f32 = Mat##M##x##N; \ + using Mat##M##x##N##f64 = Mat##M##x##N; \ + using Mat##M##x##N##f = Mat##M##x##N##f32 + +SOURCEPP_MAT_DEFINE(2, 2); +SOURCEPP_MAT_DEFINE(3, 3); +SOURCEPP_MAT_DEFINE(4, 4); +SOURCEPP_MAT_DEFINE(2, 3); +SOURCEPP_MAT_DEFINE(3, 2); +SOURCEPP_MAT_DEFINE(2, 4); +SOURCEPP_MAT_DEFINE(4, 2); +SOURCEPP_MAT_DEFINE(3, 4); +SOURCEPP_MAT_DEFINE(4, 3); + +#undef SOURCEPP_MAT_DEFINE + +} // namespace sourcepp::math diff --git a/include/sourcepp/String.h b/include/sourcepp/String.h index 51c2703ef..78b6d93f9 100644 --- a/include/sourcepp/String.h +++ b/include/sourcepp/String.h @@ -6,7 +6,7 @@ #include #include -#include +#include namespace sourcepp::string { diff --git a/include/sourcepp/Templates.h b/include/sourcepp/Templates.h new file mode 100644 index 000000000..74a93c867 --- /dev/null +++ b/include/sourcepp/Templates.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace sourcepp { + +namespace detail { + +template +struct member_type_helper; + +template +struct member_type_helper { using type = T; }; + +} // namespace detail + +template +struct member_type : detail::member_type_helper> {}; + +template +using member_type_t = typename member_type::type; + +} // namespace sourcepp diff --git a/include/sourcepp/crypto/Adler32.h b/include/sourcepp/crypto/Adler32.h index 5d3824b25..e18c1674e 100644 --- a/include/sourcepp/crypto/Adler32.h +++ b/include/sourcepp/crypto/Adler32.h @@ -3,7 +3,7 @@ #include #include -#include +#include namespace sourcepp::crypto { diff --git a/include/sourcepp/crypto/CRC32.h b/include/sourcepp/crypto/CRC32.h index 964042536..96d0869cb 100644 --- a/include/sourcepp/crypto/CRC32.h +++ b/include/sourcepp/crypto/CRC32.h @@ -3,7 +3,7 @@ #include #include -#include +#include namespace sourcepp::crypto { diff --git a/include/sourcepp/crypto/MD5.h b/include/sourcepp/crypto/MD5.h index bc7b17f66..bfceb29cb 100644 --- a/include/sourcepp/crypto/MD5.h +++ b/include/sourcepp/crypto/MD5.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace sourcepp::crypto { diff --git a/include/sourcepp/crypto/RSA.h b/include/sourcepp/crypto/RSA.h index 44d7e913a..af3a3056e 100644 --- a/include/sourcepp/crypto/RSA.h +++ b/include/sourcepp/crypto/RSA.h @@ -6,7 +6,7 @@ #include #include -#include +#include namespace sourcepp::crypto { diff --git a/include/sourcepp/math/Angles.h b/include/sourcepp/math/Angles.h deleted file mode 100644 index 168552913..000000000 --- a/include/sourcepp/math/Angles.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include - -#include "Vector.h" - -namespace sourcepp::math { - -using EulerAngles = Vec3f; - -using Quat = Vec4f; - -/// Lower precision Quat compressed to 6 bytes -struct QuatCompressed48 { - uint16_t x : 16; - uint16_t y : 16; - uint16_t z : 15; - uint16_t wn : 1; - - [[nodiscard]] Quat decompress() const { - // Convert from 16-bit (or 15-bit) integers to floating point values in the range [-1, 1] - float fx = (static_cast(this->x) / 32767.5f) - 1.f; // x / ((2^16 - 1) / 2) - 1 - float fy = (static_cast(this->y) / 32767.5f) - 1.f; // y / ((2^16 - 1) / 2) - 1 - float fz = (static_cast(this->z) / 16383.5f) - 1.f; // z / ((2^15 - 1) / 2) - 1 - - // Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1 - float fw = std::sqrt(1.f - fx * fx - fy * fy - fz * fz); - - // Adjust w based on the stored sign bit - if (this->wn) { - fw = -fw; - } - - return {fx, fy, fz, fw}; - } -}; - -/// Lower precision Quat compressed to 8 bytes -struct QuatCompressed64 { - uint32_t x : 21; - uint32_t y : 21; - uint32_t z : 21; - uint32_t wn : 1; - - [[nodiscard]] Quat decompress() const { - // Convert from 21-bit integers to floating point values in the range [-1, 1] - double fx = (static_cast(this->x) / 1048575.5) - 1.0f; // x / ((2^21 - 1) / 2) - 1 - double fy = (static_cast(this->y) / 1048575.5) - 1.0f; // y / ((2^21 - 1) / 2) - 1 - double fz = (static_cast(this->z) / 1048575.5) - 1.0f; // z / ((2^21 - 1) / 2) - 1 - - // Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1 - double fw = std::sqrt(1.0 - fx * fx - fy * fy - fz * fz); - - // Adjust w based on the stored sign bit - if (this->wn) { - fw = -fw; - } - - return {static_cast(fx), static_cast(fy), static_cast(fz), static_cast(fw)}; - } -}; - -} // namespace sourcepp::math diff --git a/include/sourcepp/math/Float.h b/include/sourcepp/math/Float.h deleted file mode 100644 index c2ae7d891..000000000 --- a/include/sourcepp/math/Float.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "Integer.h" - -namespace sourcepp::math { - -// https://stackoverflow.com/a/60047308 -class FloatCompressed16 { -public: - explicit FloatCompressed16(uint16_t in) - : data(in) {} - - // NOLINTNEXTLINE(*-explicit-constructor) - FloatCompressed16(float in) { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits - const auto b = *reinterpret_cast(&in) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa - const auto e = (b & 0x7F800000) >> 23; // exponent - const auto m = b & 0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding - this->data = (b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) | ((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) | (e > 143) * 0x7FFF; // sign : normalized : denormalized : saturate - } - - [[nodiscard]] uint16_t toFloat16() const { - return this->data; - } - - [[nodiscard]] float toFloat32() const { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits - const uint32_t e = (this->data & 0x7C00) >> 10; // exponent - const uint32_t m = (this->data & 0x03FF) << 13; // mantissa - const auto mf = static_cast(m); - const uint32_t v = *reinterpret_cast(&mf) >> 23; // evil log2 bit hack to count leading zeros in denormalized format - const uint32_t vu = (this->data & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) | ((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000)); // sign : normalized : denormalized - return *reinterpret_cast(&vu); - } - - [[nodiscard]] float operator*() const { - return this->toFloat32(); - } - -private: - uint16_t data; -}; - -} // namespace sourcepp::math diff --git a/include/sourcepp/math/Integer.h b/include/sourcepp/math/Integer.h deleted file mode 100644 index d245fc014..000000000 --- a/include/sourcepp/math/Integer.h +++ /dev/null @@ -1,166 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -// Integer types are intentionally outside the sourcepp namespace -using std::int8_t; -using std::int16_t; -using std::int32_t; -using std::int64_t; -using std::uint8_t; -using std::uint16_t; -using std::uint32_t; -using std::uint64_t; - -/// 3-byte wide unsigned integer -struct uint24_t { - uint24_t() = default; - - template - constexpr uint24_t(T value) // NOLINT(*-explicit-constructor) - : bytes{ - static_cast((value >> 16) & 0xff), - static_cast((value >> 8) & 0xff), - static_cast( value & 0xff), - } {} - - template - [[nodiscard]] constexpr operator T() const { // NOLINT(*-explicit-constructor) - return static_cast((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]); - } - - template - constexpr uint24_t& operator=(T value) { - *this = {value}; - return *this; - } - - template - [[nodiscard]] constexpr uint24_t operator+(T value) const { - return {uint32_t{*this} + value}; - } - - template - constexpr void operator+=(T value) const { - *this = {uint32_t{*this} + value}; - } - - constexpr uint24_t operator++() { - return *this = {uint32_t{*this} + 1}; - } - - constexpr uint24_t operator++(int) { - uint24_t out{*this}; - *this = {uint32_t{*this} + 1}; - return out; - } - - template - [[nodiscard]] constexpr uint24_t operator-(T value) const { - return {uint32_t{*this} - value}; - } - - template - constexpr void operator-=(T value) const { - return *this = {uint32_t{*this} - value}; - } - - constexpr uint24_t operator--() { - return *this = {uint32_t{*this} - 1}; - } - - constexpr uint24_t operator--(int) { - uint24_t out{*this}; - *this = {uint32_t{*this} - 1}; - return out; - } - - template - [[nodiscard]] constexpr uint24_t operator*(T value) const { - return {uint32_t{*this} * value}; - } - - template - constexpr void operator*=(T value) const { - *this = {uint32_t{*this} * value}; - } - - template - [[nodiscard]] constexpr uint24_t operator/(T value) const { - return {uint32_t{*this} / value}; - } - - template - constexpr void operator/=(T value) const { - *this = {uint32_t{*this} / value}; - } - - template - [[nodiscard]] constexpr uint24_t operator%(T value) const { - return {uint32_t{*this} % value}; - } - - template - constexpr void operator%=(T value) const { - *this = {uint32_t{*this} % value}; - } - - template - [[nodiscard]] constexpr bool operator==(T value) const { - return uint32_t{*this} == value; - } - - template - [[nodiscard]] constexpr auto operator<=>(T value) const { - return uint32_t{*this} <=> value; - } - - [[nodiscard]] constexpr operator bool() const { // NOLINT(*-explicit-constructor) - return static_cast(uint32_t{*this}); - } - - uint8_t bytes[3]; -}; -static_assert(sizeof(uint24_t) == 3, "uint24_t is not 3 bytes wide!"); -static_assert(std::is_trivially_copyable_v, "uint24_t is not a POD type!"); - -namespace sourcepp::math { - -template -concept Arithmetic = std::is_arithmetic_v || std::same_as; - -template -[[nodiscard]] constexpr T remap(T value, T l1, T h1, T l2, T h2) { - return l2 + (value - l1) * (h2 - l2) / (h1 - l1); -} - -template -[[nodiscard]] constexpr T remap(T value, T h1, T h2) { - return value * h2 / h1; -} - -[[nodiscard]] constexpr bool isPowerOf2(std::integral auto n) { - return n && !(n & (n - 1)); -} - -template -[[nodiscard]] constexpr T nearestPowerOf2(T n) { - if (math::isPowerOf2(n)) { - return n; - } - auto bigger = std::bit_ceil(n); - auto smaller = std::bit_floor(n); - return (n - smaller) < (bigger - n) ? smaller : bigger; -} - -[[nodiscard]] constexpr uint16_t getPaddingForAlignment(uint16_t alignment, uint64_t n) { - if (const auto rest = n % alignment; rest > 0) { - return alignment - rest; - } - return 0; -} - -} // namespace sourcepp::math diff --git a/include/sourcepp/math/Matrix.h b/include/sourcepp/math/Matrix.h deleted file mode 100644 index 5a03ae4da..000000000 --- a/include/sourcepp/math/Matrix.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include "Integer.h" - -namespace sourcepp::math { - -template -class Matrix { -public: - [[nodiscard]] P* operator[](uint8_t i) { return this->data[i]; } - - [[nodiscard]] const P* operator[](uint8_t i) const { return this->data[i]; } - -private: - P data[M][N]; -}; - -} // namespace sourcepp::math diff --git a/include/sourcepp/math/Vector.h b/include/sourcepp/math/Vector.h deleted file mode 100644 index b50ad76fd..000000000 --- a/include/sourcepp/math/Vector.h +++ /dev/null @@ -1,304 +0,0 @@ -#pragma once - -#include -#include - -#include "Integer.h" - -namespace sourcepp::math { - -template -struct Vec { - static_assert(S >= 2, "Vectors must have at least two values!"); - - std::array values; - - // By defining these constructors, the type becomes nontrivial... -#if 0 - constexpr Vec() = default; - - constexpr explicit Vec(values_type vals) - : values{vals} {} - - template... Vals> - requires (sizeof...(Vals) == S) - constexpr Vec(Vals... vals) // NOLINT(*-explicit-constructor) - : values{static_cast

(vals)...} {} -#endif - - using value_type = P; - - [[nodiscard]] consteval uint8_t size() const { - return S; - } - - [[nodiscard]] constexpr P& operator[](uint8_t index) { - if (index < S) { - return this->values[index]; - } - return this->operator[](index % S); - } - - [[nodiscard]] constexpr P operator[](uint8_t index) const { - if (index < S) { - return this->values[index]; - } - return this->operator[](index % S); - } - - [[nodiscard]] constexpr Vec operator+() const { - return *this; - } - - template - [[nodiscard]] constexpr Vec operator+(const Vec& other) const { - auto out = *this; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - out[i] += static_cast

(other[i]); - } - return out; - } - - template - constexpr void operator+=(const Vec& other) { - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - (*this)[i] += static_cast

(other[i]); - } - } - - [[nodiscard]] constexpr Vec operator-() const { - auto out = *this; - for (uint8_t i = 0; i < S; i++) { - out[i] *= -1; - } - return out; - } - - template - [[nodiscard]] constexpr Vec operator-(const Vec& other) const { - auto out = *this; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - out[i] -= static_cast

(other[i]); - } - return out; - } - - template - constexpr void operator-=(const Vec& other) { - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - (*this)[i] -= static_cast

(other[i]); - } - } - - [[nodiscard]] constexpr Vec operator*(Arithmetic auto scalar) const { - auto out = *this; - for (uint8_t i = 0; i < S; i++) { - out[i] *= static_cast

(scalar); - } - return out; - } - - constexpr void operator*=(Arithmetic auto scalar) { - for (uint8_t i = 0; i < S; i++) { - (*this)[i] *= static_cast

(scalar); - } - } - - [[nodiscard]] constexpr Vec operator/(Arithmetic auto scalar) const { - auto out = *this; - for (uint8_t i = 0; i < S; i++) { - out[i] /= static_cast

(scalar); - } - return out; - } - - constexpr void operator/=(Arithmetic auto scalar) { - for (uint8_t i = 0; i < S; i++) { - (*this)[i] /= static_cast

(scalar); - } - } - - [[nodiscard]] constexpr Vec operator%(Arithmetic auto scalar) const { - auto out = *this; - for (uint8_t i = 0; i < S; i++) { - out[i] %= static_cast

(scalar); - } - return out; - } - - constexpr void operator%=(Arithmetic auto scalar) { - for (uint8_t i = 0; i < S; i++) { - (*this)[i] %= static_cast

(scalar); - } - } - - template - [[nodiscard]] constexpr bool operator==(const Vec& other) const { - if constexpr (S != SO) { - return false; - } else { - for (uint8_t i = 0; i < S; i++) { - if ((*this)[i] != static_cast

(other[i])) { - return false; - } - } - return true; - } - } - - template - [[nodiscard]] constexpr Vec to() const { - Vec out{}; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - out[i] = static_cast((*this)[i]); - } - return out; - } - - template - [[nodiscard]] constexpr Vec mul(const Vec& other) const { - auto out = *this; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - out[i] *= static_cast

(other[i]); - } - return out; - } - - template - [[nodiscard]] constexpr Vec div(const Vec& other) const { - auto out = *this; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - out[i] /= static_cast

(other[i]); - } - return out; - } - - template - [[nodiscard]] constexpr Vec mod(const Vec& other) const { - auto out = *this; - for (uint8_t i = 0; i < (S > SO ? SO : S); i++) { - if constexpr ((std::floating_point

&& std::floating_point) || std::floating_point

) { - out[i] = std::fmod(out[i], static_cast

(other[i])); - } else { - out[i] %= static_cast

(other[i]); - } - } - return out; - } - - [[nodiscard]] constexpr float magf() const { - float out = 0.0; - for (uint8_t i = 0; i < S; i++) { - out += std::pow((*this)[i], 2); - } - return std::sqrt(out); - } - - [[nodiscard]] constexpr double mag() const { - double out = 0.0; - for (uint8_t i = 0; i < S; i++) { - out += std::pow((*this)[i], 2); - } - return std::sqrt(out); - } - - [[nodiscard]] constexpr P sum() const { - P out{}; - for (uint8_t i = 0; i < S; i++) { - out += (*this)[i]; - } - return out; - } - - template - [[nodiscard]] constexpr Vec scale(const Vec& other) const { - Vec out; - for (uint8_t i = 0; i < S; i++) { - out[i] = (*this)[i] * static_cast

(other[i]); - } - return out; - } - - template - [[nodiscard]] constexpr P dot(const Vec& other) const { - return this->scale(other).sum(); - } - - [[nodiscard]] constexpr Vec abs() const { - auto out = *this; - for (uint8_t i = 0; i < S; i++) { - out[i] = std::abs(out[i]); - } - return out; - } - - [[nodiscard]] static constexpr Vec zero() { - return {}; - } - - [[nodiscard]] constexpr bool isZero() const { - return *this == zero(); - } -}; - -template -using Vec2 = Vec<2, P>; - -using Vec2i8 = Vec2; -using Vec2i16 = Vec2; -using Vec2i32 = Vec2; -using Vec2i64 = Vec2; -using Vec2i = Vec2i32; - -using Vec2ui8 = Vec2; -using Vec2ui16 = Vec2; -using Vec2ui24 = Vec2; -using Vec2ui32 = Vec2; -using Vec2ui64 = Vec2; -using Vec2ui = Vec2ui32; - -using Vec2f32 = Vec2; -using Vec2f64 = Vec2; -using Vec2f = Vec2f32; - -template -using Vec3 = Vec<3, P>; - -using Vec3i8 = Vec3; -using Vec3i16 = Vec3; -using Vec3i32 = Vec3; -using Vec3i64 = Vec3; -using Vec3i = Vec3i32; - -using Vec3ui8 = Vec3; -using Vec3ui16 = Vec3; -using Vec3ui24 = Vec3; -using Vec3ui32 = Vec3; -using Vec3ui64 = Vec3; -using Vec3ui = Vec3ui32; - -using Vec3f32 = Vec3; -using Vec3f64 = Vec3; -using Vec3f = Vec3f32; - -template -using Vec4 = Vec<4, P>; - -using Vec4i8 = Vec4; -using Vec4i16 = Vec4; -using Vec4i32 = Vec4; -using Vec4i64 = Vec4; -using Vec4i = Vec4i32; - -using Vec4ui8 = Vec4; -using Vec4ui16 = Vec4; -using Vec4ui24 = Vec4; -using Vec4ui32 = Vec4; -using Vec4ui64 = Vec4; -using Vec4ui = Vec4ui32; - -using Vec4f32 = Vec4; -using Vec4f64 = Vec4; -using Vec4f = Vec4f32; - -} // namespace sourcepp::math diff --git a/include/sourcepp/parser/Binary.h b/include/sourcepp/parser/Binary.h index 163323b99..11e5b8c16 100644 --- a/include/sourcepp/parser/Binary.h +++ b/include/sourcepp/parser/Binary.h @@ -3,9 +3,8 @@ #include #include #include -#include -#include +#include class BufferStream; diff --git a/include/steampp/steampp.h b/include/steampp/steampp.h index 374d07c2a..23b02f7d4 100644 --- a/include/steampp/steampp.h +++ b/include/steampp/steampp.h @@ -10,7 +10,7 @@ #include #include -#include +#include namespace steampp { diff --git a/include/toolpp/CmdSeq.h b/include/toolpp/CmdSeq.h index 8e69d9f19..695bead1f 100644 --- a/include/toolpp/CmdSeq.h +++ b/include/toolpp/CmdSeq.h @@ -1,11 +1,10 @@ #pragma once #include -#include #include #include -#include +#include namespace toolpp { @@ -20,9 +19,12 @@ class CmdSeq { COPY_FILE = 257, DELETE_FILE = 258, RENAME_FILE = 259, - COPY_FILE_IF_EXISTS_ALT = 260, + // This used to be a different thing - Strata changes it to be an alias for 261 + //COPY_FILE_IF_EXISTS_ALT = 260, COPY_FILE_IF_EXISTS = 261, } special; + static constexpr auto SPECIAL_COPY_FILE_IF_EXISTS_ALIAS = static_cast(260); + std::string executable; std::string arguments; @@ -32,6 +34,10 @@ class CmdSeq { bool useProcessWindow; bool waitForKeypress; + + [[nodiscard]] static std::string getSpecialDisplayNameFor(Special special); + + [[nodiscard]] std::string getExecutableDisplayName() const; }; struct Sequence { @@ -39,7 +45,21 @@ class CmdSeq { std::vector commands; }; - explicit CmdSeq(std::string path_); + enum class Type { + INVALID, + BINARY, + KEYVALUES_STRATA, + }; + + explicit CmdSeq(const std::string& path); + + explicit CmdSeq(Type type_); + + [[nodiscard]] explicit operator bool() const; + + [[nodiscard]] Type getType() const; + + void setType(Type type_); [[nodiscard]] float getVersion() const; @@ -51,20 +71,19 @@ class CmdSeq { [[nodiscard]] std::vector bake() const; - [[nodiscard]] std::vector bake(bool overrideUsingKeyValues) const; - - bool bake(const std::string& path_); - - bool bake(const std::string& path_, bool overrideUsingKeyValues); + bool bake(const std::string& path) const; // NOLINT(*-use-nodiscard) protected: void parseBinary(const std::string& path); - void parseKeyValues(const std::string& path); + void parseKeyValuesStrata(const std::string& path); + + [[nodiscard]] std::vector bakeBinary() const; + + [[nodiscard]] std::vector bakeKeyValuesStrata() const; - bool usingKeyValues = false; + Type type; float version; - std::string path; std::vector sequences; }; diff --git a/include/toolpp/FGD.h b/include/toolpp/FGD.h index bb36c53d1..28b1a4e81 100644 --- a/include/toolpp/FGD.h +++ b/include/toolpp/FGD.h @@ -7,7 +7,7 @@ #include #include -#include +#include namespace toolpp { @@ -187,11 +187,11 @@ class FGDWriter { AutoVisGroupWriter beginAutoVisGroup(const std::string& parentName); - EntityWriter beginEntity(const std::string& classType, const std::vector& classProperties, const std::string& name, const std::string& description); + EntityWriter beginEntity(const std::string& classType, const std::vector& classProperties, const std::string& name, const std::string& description = ""); - [[nodiscard]] std::string bake(); + [[nodiscard]] std::string bake() const; - bool bake(const std::string& fgdPath); + bool bake(const std::string& fgdPath) const; // NOLINT(*-use-nodiscard) protected: FGDWriter(); diff --git a/include/vcryptpp/VFONT.h b/include/vcryptpp/VFONT.h index 011a8db0f..ba2c93ab7 100644 --- a/include/vcryptpp/VFONT.h +++ b/include/vcryptpp/VFONT.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace vcryptpp::VFONT { diff --git a/include/vpkpp/Entry.h b/include/vpkpp/Entry.h index c9b09c0d8..b327d2498 100644 --- a/include/vpkpp/Entry.h +++ b/include/vpkpp/Entry.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace vpkpp { diff --git a/include/vpkpp/Options.h b/include/vpkpp/Options.h index 2e083754e..4102057b7 100644 --- a/include/vpkpp/Options.h +++ b/include/vpkpp/Options.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace vpkpp { diff --git a/include/vpkpp/format/GCF.h b/include/vpkpp/format/GCF.h index 109e4b32a..43d0b58a8 100644 --- a/include/vpkpp/format/GCF.h +++ b/include/vpkpp/format/GCF.h @@ -118,6 +118,10 @@ class GCF : public PackFileReadOnly { [[nodiscard]] std::vector verifyEntryChecksums() const override; + [[nodiscard]] constexpr bool isCaseSensitive() const noexcept override { + return true; + } + [[nodiscard]] std::optional> readEntry(const std::string& path_) const override; [[nodiscard]] Attribute getSupportedEntryAttributes() const override; diff --git a/include/vpkpp/format/PCK.h b/include/vpkpp/format/PCK.h index 844c80ad7..68712d624 100644 --- a/include/vpkpp/format/PCK.h +++ b/include/vpkpp/format/PCK.h @@ -12,10 +12,16 @@ constexpr std::string_view PCK_EXTENSION = ".pck"; class PCK : public PackFile { protected: - enum FlagsV2 : uint32_t { - FLAG_NONE = 0, - FLAG_ENCRYPTED = 1 << 0, - FLAG_RELATIVE_FILE_DATA = 1 << 1, + enum FlagsDirV2 : uint32_t { + FLAG_DIR_NONE = 0, + FLAG_DIR_ENCRYPTED = 1 << 0, + FLAG_DIR_RELATIVE_FILE_DATA = 1 << 1, + }; + + enum FlagsFileV2 : uint32_t { + FLAG_FILE_NONE = 0, + FLAG_FILE_ENCRYPTED = 1 << 0, + FLAG_FILE_REMOVED = 1 << 1, }; struct Header { @@ -23,7 +29,7 @@ class PCK : public PackFile { uint32_t godotVersionMajor; uint32_t godotVersionMinor; uint32_t godotVersionPatch; - FlagsV2 flags; // packVersion >= 2 + FlagsDirV2 flags; // packVersion >= 2 }; public: diff --git a/include/vpkpp/format/ZIP.h b/include/vpkpp/format/ZIP.h index 2ac101c18..a0d7249d4 100644 --- a/include/vpkpp/format/ZIP.h +++ b/include/vpkpp/format/ZIP.h @@ -5,13 +5,9 @@ namespace vpkpp { constexpr std::string_view BMZ_EXTENSION = ".bmz"; -constexpr std::string_view BZ2_EXTENSION = ".bz2"; -constexpr std::string_view GZIP_EXTENSION = ".gz"; constexpr std::string_view PK3_EXTENSION = ".pk3"; constexpr std::string_view PK4_EXTENSION = ".pk4"; -constexpr std::string_view XZ_EXTENSION = ".xz"; constexpr std::string_view ZIP_EXTENSION = ".zip"; -constexpr std::string_view ZST_EXTENSION = ".zst"; class ZIP : public PackFile { public: @@ -67,14 +63,10 @@ class ZIP : public PackFile { bool zipOpen = false; private: - VPKPP_REGISTER_PACKFILE_OPEN(BMZ_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(BZ2_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(GZIP_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(PK3_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(PK4_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(XZ_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(ZIP_EXTENSION, &ZIP::open); - VPKPP_REGISTER_PACKFILE_OPEN(ZST_EXTENSION, &ZIP::open); + VPKPP_REGISTER_PACKFILE_OPEN(BMZ_EXTENSION, &ZIP::open); + VPKPP_REGISTER_PACKFILE_OPEN(PK3_EXTENSION, &ZIP::open); + VPKPP_REGISTER_PACKFILE_OPEN(PK4_EXTENSION, &ZIP::open); + VPKPP_REGISTER_PACKFILE_OPEN(ZIP_EXTENSION, &ZIP::open); }; } // namespace vpkpp diff --git a/include/vtfpp/ImageConversion.h b/include/vtfpp/ImageConversion.h index ae1f19c34..a8e65f7ff 100644 --- a/include/vtfpp/ImageConversion.h +++ b/include/vtfpp/ImageConversion.h @@ -1,10 +1,12 @@ #pragma once +#include #include #include #include -#include +#include +#include #include "ImageFormats.h" @@ -172,10 +174,10 @@ VTFPP_CHECK_SIZE(UVWQ8888); struct RGBA16161616F { static constexpr auto FORMAT = ImageFormat::RGBA16161616F; - sourcepp::math::FloatCompressed16 r; - sourcepp::math::FloatCompressed16 g; - sourcepp::math::FloatCompressed16 b; - sourcepp::math::FloatCompressed16 a; + half r; + half g; + half b; + half a; }; VTFPP_CHECK_SIZE(RGBA16161616F); @@ -222,8 +224,8 @@ VTFPP_CHECK_SIZE(RGBA32323232F); struct RG1616F { static constexpr auto FORMAT = ImageFormat::RG1616F; - sourcepp::math::FloatCompressed16 r; - sourcepp::math::FloatCompressed16 g; + half r; + half g; }; VTFPP_CHECK_SIZE(RG1616F); @@ -263,7 +265,7 @@ VTFPP_CHECK_SIZE(BGRA1010102); struct R16F { static constexpr auto FORMAT = ImageFormat::R16F; - sourcepp::math::FloatCompressed16 r; + half r; }; VTFPP_CHECK_SIZE(R16F); @@ -275,6 +277,42 @@ VTFPP_CHECK_SIZE(R8); #undef VTFPP_CHECK_SIZE +template +concept PixelType = + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as || + std::same_as; + } // namespace ImagePixel namespace ImageConversion { @@ -292,9 +330,13 @@ enum class FileFormat { BMP, TGA, HDR, + EXR, }; -/// Converts image data to a PNG or HDR file. HDR output will be used for floating-point formats. +/// PNG for integer formats, EXR for floating point formats +[[nodiscard]] FileFormat getDefaultFileFormatForImageFormat(ImageFormat format); + +/// Converts image data to a PNG or EXR file. EXR format will be used for floating-point image formats. [[nodiscard]] std::vector convertImageDataToFile(std::span imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat = FileFormat::DEFAULT); [[nodiscard]] std::vector convertFileToImageData(std::span fileData, ImageFormat& format, int& width, int& height, int& frameCount); @@ -340,6 +382,47 @@ void setResizedDims(uint16_t& width, ResizeMethod widthResize, uint16_t& height, [[nodiscard]] std::vector cropImageData(const std::span full_image, uint16_t full_width, uint16_t full_height, uint16_t channels, uint16_t x, uint16_t y, uint16_t subrect_width, uint16_t subrect_height); +/// Extracts a single channel from the given image data. +/// May have unexpected behavior if called on formats that use bitfields like BGRA5551! +/// Data is packed according to pixel channel C++ type size +/// (e.g. in the case of BGRA5551's green channel, it'll be 2 bytes per green value despite only 5 bits being used in the original data) +template +[[nodiscard]] std::vector extractChannelFromImageData(std::span imageData, auto P::*channel) { + using C = sourcepp::member_type_t; + if (imageData.empty() || imageData.size() % sizeof(P) != 0) { + return {}; + } + + std::span pixels{reinterpret_cast(imageData.data()), imageData.size() / sizeof(P)}; + + std::vector out(imageData.size() / sizeof(P) * sizeof(C)); + BufferStream stream{out, false}; + for (const auto& pixel : pixels) { + stream << pixel.*channel; + } + return out; +} + +/// Applies a single channel to the given image data. +/// May have unexpected behavior if called on formats that use bitfields like BGRA5551! +/// Data is packed according to pixel channel C++ type size +/// (e.g. in the case of BGRA5551's green channel, it'll be 2 bytes per green value despite only 5 bits being used in the original data) +template +bool applyChannelToImageData(std::span imageData, std::span channelData, auto P::*channel) { + using C = sourcepp::member_type_t; + if (imageData.empty() || imageData.size() % sizeof(P) != 0 || channelData.empty() || channelData.size() % sizeof(C) != 0 || imageData.size() / sizeof(P) != channelData.size() / sizeof(C)) { + return false; + } + + std::span pixels{reinterpret_cast(imageData.data()), imageData.size() / sizeof(P)}; + std::span values{reinterpret_cast(channelData.data()), channelData.size() / sizeof(C)}; + + for (int i = 0; i < pixels.size(); i++) { + pixels[i].*channel = values[i]; + } + return true; +} + } // namespace ImageConversion } // namespace vtfpp diff --git a/include/vtfpp/ImageFormats.h b/include/vtfpp/ImageFormats.h index cb2c01225..f515275ed 100644 --- a/include/vtfpp/ImageFormats.h +++ b/include/vtfpp/ImageFormats.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace vtfpp { diff --git a/include/vtfpp/PPL.h b/include/vtfpp/PPL.h index 8ed727c4c..18a8329c4 100644 --- a/include/vtfpp/PPL.h +++ b/include/vtfpp/PPL.h @@ -7,8 +7,6 @@ #include #include -#include - #include "ImageConversion.h" namespace vtfpp { @@ -55,7 +53,7 @@ class PPL { bool setImage(std::span imageData, ImageFormat format_, uint32_t width, uint32_t height, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR); - bool setImage(const std::string& imagePath, uint32_t lod); + bool setImage(const std::string& imagePath, uint32_t lod = 0); bool setImage(const std::string& imagePath, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR); diff --git a/include/vtfpp/VTF.h b/include/vtfpp/VTF.h index 0976a8ef4..a44897da3 100644 --- a/include/vtfpp/VTF.h +++ b/include/vtfpp/VTF.h @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -20,6 +19,11 @@ namespace vtfpp { constexpr uint32_t VTF_SIGNATURE = sourcepp::parser::binary::makeFourCC("VTF\0"); +enum class CompressionMethod : int16_t { + DEFLATE = 8, + ZSTD = 93, +}; + struct Resource { enum Type : uint32_t { TYPE_UNKNOWN = 0, // Unknown @@ -32,12 +36,7 @@ struct Resource { TYPE_KEYVALUES_DATA = sourcepp::parser::binary::makeFourCC("KVD\0"), TYPE_AUX_COMPRESSION = sourcepp::parser::binary::makeFourCC("AXC\0"), }; - static constexpr std::array TYPE_ARRAY_ORDER{ - // These don't really matter - Resource::TYPE_CRC, Resource::TYPE_EXTENDED_FLAGS, Resource::TYPE_LOD_CONTROL_INFO, Resource::TYPE_KEYVALUES_DATA, Resource::TYPE_PARTICLE_SHEET_DATA, - // These matter - Resource::TYPE_THUMBNAIL_DATA, Resource::TYPE_AUX_COMPRESSION, Resource::TYPE_IMAGE_DATA, - }; + static const std::array& getOrder(); enum Flags : uint8_t { FLAG_NONE = 0, @@ -73,8 +72,16 @@ struct Resource { return std::get(this->convertData()); } - [[nodiscard]] int32_t getDataAsAuxCompressionLevel() const { - return static_cast(std::get>(this->convertData())[1]); + [[nodiscard]] int16_t getDataAsAuxCompressionLevel() const { + return static_cast(std::get>(this->convertData())[1] & 0xffff); + } + + [[nodiscard]] CompressionMethod getDataAsAuxCompressionMethod() const { + auto method = static_cast((std::get>(this->convertData())[1] & 0xffff0000) >> 16); + if (method <= 0) { + return CompressionMethod::DEFLATE; + } + return static_cast(method); } [[nodiscard]] uint32_t getDataAsAuxCompressionLength(uint8_t mip, uint8_t mipCount, uint16_t frame, uint16_t frameCount, uint16_t face, uint16_t faceCount) const { @@ -124,7 +131,7 @@ class VTF { FLAG_NORMAL = 1 << 7, FLAG_NO_MIP = 1 << 8, // Added at VTF creation time FLAG_NO_LOD = 1 << 9, // Added at VTF creation time - FLAG_MIN_MIP = 1 << 10, + FLAG_LOAD_LOWEST_MIPS = 1 << 10, FLAG_PROCEDURAL = 1 << 11, FLAG_ONE_BIT_ALPHA = 1 << 12, // Added at VTF creation time FLAG_MULTI_BIT_ALPHA = 1 << 13, // Added at VTF creation time @@ -136,7 +143,7 @@ class VTF { FLAG_ONE_OVER_MIP_LEVEL_IN_ALPHA = 1 << 19, // Internal to vtex, removed FLAG_PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL = 1 << 20, // Internal to vtex, removed FLAG_NORMAL_TO_DUDV = 1 << 21, // Internal to vtex, removed - FLAG_ALPHA_TEST_MIP_GENERATION = 1 << 22, + FLAG_ALPHA_TEST_MIP_GENERATION = 1 << 22, // Internal to vtex, removed FLAG_NO_DEPTH_BUFFER = 1 << 23, FLAG_NICE_FILTERED = 1 << 24, // Internal to vtex, removed FLAG_CLAMP_U = 1 << 25, @@ -165,7 +172,8 @@ class VTF { bool createMips = true; bool createThumbnail = true; bool createReflectivity = true; - uint8_t compressionLevel = 6; + int16_t compressionLevel = -1; + CompressionMethod compressionMethod = CompressionMethod::ZSTD; float bumpMapScale = 1.f; }; @@ -223,6 +231,10 @@ class VTF { void setImageResizeMethods(ImageConversion::ResizeMethod imageWidthResizeMethod_, ImageConversion::ResizeMethod imageHeightResizeMethod_); + void setImageWidthResizeMethod(ImageConversion::ResizeMethod imageWidthResizeMethod_); + + void setImageHeightResizeMethod(ImageConversion::ResizeMethod imageHeightResizeMethod_); + [[nodiscard]] uint16_t getWidth(uint8_t mip = 0) const; [[nodiscard]] uint16_t getHeight(uint8_t mip = 0) const; @@ -305,13 +317,17 @@ class VTF { void removeExtendedFlagsResource(); - void setKeyValuesData(const std::string& value); + void setKeyValuesDataResource(const std::string& value); + + void removeKeyValuesDataResource(); - void removeKeyValuesData(); + [[nodiscard]] int16_t getCompressionLevel() const; - [[nodiscard]] uint8_t getCompressionLevel() const; + void setCompressionLevel(int16_t newCompressionLevel); - void setCompressionLevel(uint8_t newCompressionLevel); + [[nodiscard]] CompressionMethod getCompressionMethod() const; + + void setCompressionMethod(CompressionMethod newCompressionMethod); [[nodiscard]] bool hasImageData() const; @@ -342,6 +358,8 @@ class VTF { [[nodiscard]] std::vector getThumbnailDataAsRGBA8888() const; + void setThumbnail(std::span imageData_, ImageFormat format_, uint16_t width_, uint16_t height_); + void computeThumbnail(ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR); void removeThumbnail(); @@ -405,7 +423,8 @@ class VTF { //uint8_t _padding3[4]; // These aren't in the header, these are for VTF modification - uint8_t compressionLevel = 0; + int16_t compressionLevel = 0; + CompressionMethod compressionMethod = CompressionMethod::ZSTD; ImageConversion::ResizeMethod imageWidthResizeMethod = ImageConversion::ResizeMethod::POWER_OF_TWO_BIGGER; ImageConversion::ResizeMethod imageHeightResizeMethod = ImageConversion::ResizeMethod::POWER_OF_TWO_BIGGER; }; diff --git a/lang/c/include/gameppc/Convert.hpp b/lang/c/include/gameppc/Convert.hpp new file mode 100644 index 000000000..ce6eb85f8 --- /dev/null +++ b/lang/c/include/gameppc/Convert.hpp @@ -0,0 +1,23 @@ +#pragma once + +/* + * This is a header designed to be included in C++ source code. + * It should not be included in applications using any C wrapper libraries! + */ +#ifndef __cplusplus +#error "This header can only be used in C++!" +#endif + +#include "gamepp.h" + +namespace gamepp { + +class GameInstance; + +} // namespace gamepp + +namespace Convert { + +gamepp::GameInstance* gameInstance(gamepp_game_instance_handle_t handle); + +} // namespace Convert diff --git a/lang/c/include/gameppc/gamepp.h b/lang/c/include/gameppc/gamepp.h new file mode 100644 index 000000000..7205fc261 --- /dev/null +++ b/lang/c/include/gameppc/gamepp.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* gamepp_game_instance_handle_t; + +#ifdef __cplusplus +} // extern "C" +#endif + +// REQUIRES MANUAL FREE: gamepp_game_instance_free +SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance(); + +// REQUIRES MANUAL FREE: gamepp_game_instance_free +SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance_with_name(const char* windowNameOverride); + +SOURCEPP_API void gamepp_game_instance_free(gamepp_game_instance_handle_t* handle); + +// REQUIRES MANUAL FREE: sourcepp_string_free +SOURCEPP_API sourcepp_string_t gamepp_get_window_title(gamepp_game_instance_handle_t handle); + +SOURCEPP_API int gamepp_get_window_pos_x(gamepp_game_instance_handle_t handle); + +SOURCEPP_API int gamepp_get_window_pos_y(gamepp_game_instance_handle_t handle); + +SOURCEPP_API int gamepp_get_window_width(gamepp_game_instance_handle_t handle); + +SOURCEPP_API int gamepp_get_window_height(gamepp_game_instance_handle_t handle); + +SOURCEPP_API void gamepp_command(gamepp_game_instance_handle_t handle, const char* command); + +SOURCEPP_API void gamepp_input_begin(gamepp_game_instance_handle_t handle, const char* input); + +SOURCEPP_API void gamepp_input_end(gamepp_game_instance_handle_t handle, const char* input); + +SOURCEPP_API void gamepp_input_once(gamepp_game_instance_handle_t handle, const char* input); + +SOURCEPP_API void gamepp_input_hold(gamepp_game_instance_handle_t handle, const char* input, double sec); + +SOURCEPP_API void gamepp_wait(gamepp_game_instance_handle_t handle, double sec); diff --git a/lang/c/src/gameppc/Convert.cpp b/lang/c/src/gameppc/Convert.cpp new file mode 100644 index 000000000..f0a22c653 --- /dev/null +++ b/lang/c/src/gameppc/Convert.cpp @@ -0,0 +1,9 @@ +#include + +#include + +using namespace gamepp; + +GameInstance* Convert::gameInstance(gamepp_game_instance_handle_t handle) { + return static_cast(handle); +} diff --git a/lang/c/src/gameppc/_gameppc.cmake b/lang/c/src/gameppc/_gameppc.cmake new file mode 100644 index 000000000..8516bd855 --- /dev/null +++ b/lang/c/src/gameppc/_gameppc.cmake @@ -0,0 +1,6 @@ +add_pretty_parser(gamepp C + SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/lang/c/include/gameppc/Convert.hpp" + "${CMAKE_CURRENT_SOURCE_DIR}/lang/c/include/gameppc/gamepp.h" + "${CMAKE_CURRENT_LIST_DIR}/Convert.cpp" + "${CMAKE_CURRENT_LIST_DIR}/gamepp.cpp") diff --git a/lang/c/src/gameppc/gamepp.cpp b/lang/c/src/gameppc/gamepp.cpp new file mode 100644 index 000000000..12c660899 --- /dev/null +++ b/lang/c/src/gameppc/gamepp.cpp @@ -0,0 +1,98 @@ +#include + +#include + +#include +#include +#include + +using namespace gamepp; + +SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance() { + auto instance = GameInstance::find(); + if (!instance) { + return nullptr; + } + return new GameInstance{*instance}; +} + +SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance_with_name(const char* windowNameOverride) { + auto instance = GameInstance::find(windowNameOverride); + if (!instance) { + return nullptr; + } + return new GameInstance{*instance}; +} + +SOURCEPP_API void gamepp_game_instance_free(gamepp_game_instance_handle_t* handle) { + SOURCEPP_EARLY_RETURN(handle); + + delete Convert::gameInstance(*handle); + *handle = nullptr; +} + +SOURCEPP_API sourcepp_string_t gamepp_get_window_title(gamepp_game_instance_handle_t handle) { + SOURCEPP_EARLY_RETURN_VAL(handle, SOURCEPP_STRING_INVALID); + + return Convert::toString(Convert::gameInstance(handle)->getWindowTitle()); +} + +SOURCEPP_API int gamepp_get_window_pos_x(gamepp_game_instance_handle_t handle) { + SOURCEPP_EARLY_RETURN_VAL(handle, 0); + + return Convert::gameInstance(handle)->getWindowPos()[0]; +} + +SOURCEPP_API int gamepp_get_window_pos_y(gamepp_game_instance_handle_t handle) { + SOURCEPP_EARLY_RETURN_VAL(handle, 0); + + return Convert::gameInstance(handle)->getWindowPos()[1]; +} + +SOURCEPP_API int gamepp_get_window_width(gamepp_game_instance_handle_t handle) { + SOURCEPP_EARLY_RETURN_VAL(handle, 0); + + return Convert::gameInstance(handle)->getWindowSize()[0]; +} + +SOURCEPP_API int gamepp_get_window_height(gamepp_game_instance_handle_t handle) { + SOURCEPP_EARLY_RETURN_VAL(handle, 0); + + return Convert::gameInstance(handle)->getWindowSize()[1]; +} + +SOURCEPP_API void gamepp_command(gamepp_game_instance_handle_t handle, const char* command) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->command(command); +} + +SOURCEPP_API void gamepp_input_begin(gamepp_game_instance_handle_t handle, const char* input) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->inputBegin(input); +} + +SOURCEPP_API void gamepp_input_end(gamepp_game_instance_handle_t handle, const char* input) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->inputEnd(input); +} + +SOURCEPP_API void gamepp_input_once(gamepp_game_instance_handle_t handle, const char* input) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->inputOnce(input); +} + +SOURCEPP_API void gamepp_input_hold(gamepp_game_instance_handle_t handle, const char* input, double sec) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->inputHold(input, sec); +} + +SOURCEPP_API void gamepp_wait(gamepp_game_instance_handle_t handle, double sec) { + SOURCEPP_EARLY_RETURN(handle); + + Convert::gameInstance(handle)->wait(sec); +} diff --git a/lang/python/cfg/CMakeLists.txt b/lang/python/cfg/CMakeLists.txt new file mode 100644 index 000000000..65080cc88 --- /dev/null +++ b/lang/python/cfg/CMakeLists.txt @@ -0,0 +1,46 @@ +# Load this to build the sourcepp Python package + +cmake_minimum_required(VERSION 3.25 FATAL_ERROR) +set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE INTERNAL "" FORCE) +project(sourcepp_python) + +if (NOT SKBUILD) + message(WARNING "\ +This CMake file is meant to be executed using 'scikit-build-core'. +Running it directly will almost certainly not produce the desired +result. If you are a user trying to install this package, use the +command below, which will install all necessary build dependencies, +compile the package in an isolated environment, and then install it. +===================================================================== + $ pip install . +===================================================================== +If you are a software developer, and this is your own package, then +it is usually much more efficient to install the build dependencies +in your environment once and use the following command that avoids +a costly creation of a new virtual environment at every compilation: +===================================================================== + $ pip install nanobind scikit-build-core[pyproject] + $ pip install --no-build-isolation -ve . +===================================================================== +You may optionally add -Ceditable.rebuild=true to auto-rebuild when +the package is imported. Otherwise, you need to rerun the above +after editing C++ files.") +endif() + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(SOURCEPP_BUILD_PYTHON_WRAPPERS ON CACHE INTERNAL "" FORCE) +set(SOURCEPP_PYTHON_VERSION "@SOURCEPP_PYTHON_VERSION@") + +# As weird as this looks, this is necessary for sdist wheel +set(SOURCEPP_PYTHON_IS_SUBDIR OFF) +if(SOURCEPP_PYTHON_IS_SUBDIR) + add_subdirectory("../.." "${CMAKE_CURRENT_BINARY_DIR}/sourcepp") +else() + include(FetchContent) + FetchContent_Declare( + sourcepp + GIT_REPOSITORY "https://github.com/craftablescience/sourcepp.git" + GIT_TAG "@SOURCEPP_GIT_TAG@") + FetchContent_MakeAvailable(sourcepp) +endif() diff --git a/lang/python/cfg/__init__.py b/lang/python/cfg/__init__.py new file mode 100644 index 000000000..75430694c --- /dev/null +++ b/lang/python/cfg/__init__.py @@ -0,0 +1,5 @@ +from ._sourcepp_impl import __doc__, gamepp, sourcepp, steampp, toolpp, vcryptpp, vtfpp + +__author__ = "craftablescience" +__version__ = "${SOURCEPP_PYTHON_VERSION}" +__all__ = ['__author__', '__doc__', '__version__', 'gamepp', 'sourcepp', 'steampp', 'toolpp', 'vcryptpp', 'vtfpp'] diff --git a/lang/python/cfg/pyproject.toml b/lang/python/cfg/pyproject.toml new file mode 100644 index 000000000..c82a7f56e --- /dev/null +++ b/lang/python/cfg/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["scikit-build-core >=0.10.7", "nanobind >=1.3.2"] +build-backend = "scikit_build_core.build" + + +[project] +name = "sourcepp" +version = "${SOURCEPP_PYTHON_VERSION}" +authors = [{ name = "craftablescience", email = "lauralewisdev@gmail.com" }] +maintainers = [{ name = "craftablescience", email = "lauralewisdev@gmail.com" }] +description = "Several modern C++20 libraries for sanely parsing Valve formats." +readme = "README.md" +requires-python = ">=3.8" +classifiers = [ + "License :: OSI Approved :: MIT License", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +"homepage" = "https://github.com/craftablescience/sourcepp" +"repository" = "https://github.com/craftablescience/sourcepp" +"issue tracker" = "https://github.com/craftablescience/sourcepp/issues" +"funding" = "https://ko-fi.com/craftablescience" + +[tool.scikit-build] +minimum-version = "build-system.requires" +build-dir = "build/{wheel_tag}" +build.targets = ["sourcepp_python_all"] +sdist.reproducible = true +sdist.include = ["src/*", "test/*", "CMakeLists.txt", "LICENSE", "pyproject.toml", "README.md", "THIRDPARTY_LEGAL_NOTICES.txt"] +sdist.exclude = ["cfg/*"] +wheel.py-api = "cp312" +wheel.license-files = ["LICENSE", "THIRDPARTY_LEGAL_NOTICES.txt"] +build.verbose = true +logging.level = "INFO" + + +[tool.cibuildwheel] +archs = ["auto64"] +build-verbosity = 1 +#test-command = "pytest {project}/test" +#test-requires = "pytest" + +[tool.cibuildwheel.macos] +archs = ["arm64"] + +[tool.cibuildwheel.macos.environment] +MACOSX_DEPLOYMENT_TARGET = "14.7" diff --git a/lang/python/src/gamepp.h b/lang/python/src/gamepp.h new file mode 100644 index 000000000..5353c4134 --- /dev/null +++ b/lang/python/src/gamepp.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include + +namespace py = nanobind; + +#include + +namespace gamepp { + +inline void register_python(py::module_& m) { + auto gamepp = m.def_submodule("gamepp"); + using namespace gamepp; + + py::class_(gamepp, "GameInstance") + .def_static("find", &GameInstance::find, py::arg("window_name_override") = "") + .def_prop_ro("window_title", &GameInstance::getWindowTitle) + .def_prop_ro("window_pos", &GameInstance::getWindowPos) + .def_prop_ro("window_size", &GameInstance::getWindowSize) + .def("command", &GameInstance::command, py::arg("command"), py::rv_policy::reference) + .def("input_begin", &GameInstance::inputBegin, py::arg("input"), py::rv_policy::reference) + .def("input_end", &GameInstance::inputEnd, py::arg("input"), py::rv_policy::reference) + .def("input_once", &GameInstance::inputOnce, py::arg("input"), py::rv_policy::reference) + .def("input_hold", &GameInstance::inputHold, py::arg("input"), py::arg("sec"), py::rv_policy::reference) + .def("wait", &GameInstance::wait, py::arg("sec"), py::rv_policy::reference); +} + +} // namespace gamepp diff --git a/lang/python/src/sourcepp.cpp b/lang/python/src/sourcepp.cpp new file mode 100644 index 000000000..35c33c697 --- /dev/null +++ b/lang/python/src/sourcepp.cpp @@ -0,0 +1,57 @@ +#include "sourcepp.h" + +#ifdef GAMEPP +#include "gamepp.h" +#endif + +#ifdef STEAMPP +#include "steampp.h" +#endif + +#ifdef TOOLPP +#include "toolpp.h" +#endif + +#ifdef VCRYPTPP +#include "vcryptpp.h" +#endif + +#ifdef VTFPP +#include "vtfpp.h" +#endif + +NB_MODULE(_sourcepp_impl, m) { + m.doc() = "SourcePP: A Python wrapper around several modern C++20 libraries for sanely parsing Valve's formats."; + + sourcepp::register_python(m); + +#ifdef GAMEPP + gamepp::register_python(m); +#else + m.def_submodule("gamepp"); +#endif + +#ifdef STEAMPP + steampp::register_python(m); +#else + m.def_submodule("steampp"); +#endif + +#ifdef TOOLPP + toolpp::register_python(m); +#else + m.def_submodule("toolpp"); +#endif + +#ifdef VCRYPTPP + vcryptpp::register_python(m); +#else + m.def_submodule("vcryptpp"); +#endif + +#ifdef VTFPP + vtfpp::register_python(m); +#else + m.def_submodule("vtfpp"); +#endif +} diff --git a/lang/python/src/sourcepp.h b/lang/python/src/sourcepp.h new file mode 100644 index 000000000..079c37ae0 --- /dev/null +++ b/lang/python/src/sourcepp.h @@ -0,0 +1,84 @@ +#pragma once + +#include + +#include +#include + +namespace py = nanobind; + +#include + +namespace sourcepp { + +inline void register_python(py::module_& m) { + auto sourcepp = m.def_submodule("sourcepp"); + using namespace sourcepp; + + { + auto math = sourcepp.def_submodule("math"); + using namespace math; + + const auto registerVecType = [&math](std::string_view name) { + py::class_(math, name.data()) + .def("__len__", &V::size) + .def("__setitem__", [](V& self, uint8_t index, typename V::value_type val) { self[index] = val; }) + .def("__getitem__", [](V& self, uint8_t index) { return self[index]; }) + .def_static("zero", &V::zero) + .def("is_zero", &V::isZero); + }; + + registerVecType.operator()("Vec2i8"); + registerVecType.operator()("Vec2i16"); + registerVecType.operator()("Vec2i32"); + registerVecType.operator()("Vec2i64"); + //registerVecType.operator()("Vec2i"); + + registerVecType.operator()("Vec2ui8"); + registerVecType.operator()("Vec2ui16"); + registerVecType.operator()("Vec2ui32"); + registerVecType.operator()("Vec2ui64"); + //registerVecType.operator()("Vec2ui"); + + //registerVecType.operator()("Vec2f16"); + registerVecType.operator()("Vec2f32"); + registerVecType.operator()("Vec2f64"); + //registerVecType.operator()("Vec2f"); + + registerVecType.operator()("Vec3i8"); + registerVecType.operator()("Vec3i16"); + registerVecType.operator()("Vec3i32"); + registerVecType.operator()("Vec3i64"); + //registerVecType.operator()("Vec3i"); + + registerVecType.operator()("Vec3ui8"); + registerVecType.operator()("Vec3ui16"); + registerVecType.operator()("Vec3ui32"); + registerVecType.operator()("Vec3ui64"); + //registerVecType.operator()("Vec3ui"); + + //registerVecType.operator()("Vec3f16"); + registerVecType.operator()("Vec3f32"); + registerVecType.operator()("Vec3f64"); + //registerVecType.operator()("Vec3f"); + + registerVecType.operator()("Vec4i8"); + registerVecType.operator()("Vec4i16"); + registerVecType.operator()("Vec4i32"); + registerVecType.operator()("Vec4i64"); + //registerVecType.operator()("Vec4i"); + + registerVecType.operator()("Vec4ui8"); + registerVecType.operator()("Vec4ui16"); + registerVecType.operator()("Vec4ui32"); + registerVecType.operator()("Vec4ui64"); + //registerVecType.operator()("Vec4ui"); + + //registerVecType.operator()("Vec4f16"); + registerVecType.operator()("Vec4f32"); + registerVecType.operator()("Vec4f64"); + //registerVecType.operator()("Vec4f"); + } +} + +} // namespace sourcepp diff --git a/lang/python/src/steampp.h b/lang/python/src/steampp.h new file mode 100644 index 000000000..bd44ebfd3 --- /dev/null +++ b/lang/python/src/steampp.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include +#include + +namespace py = nanobind; + +#include + +namespace steampp { + +inline void register_python(py::module_& m) { + auto steampp = m.def_submodule("steampp"); + using namespace steampp; + + py::class_(steampp, "Steam") + .def(py::init<>()) + .def_prop_ro("install_dir", &Steam::getInstallDir) + .def_prop_ro("library_dirs", &Steam::getLibraryDirs) + .def_prop_ro("sourcemod_dir", &Steam::getSourceModDir) + .def_prop_ro("installed_apps", &Steam::getInstalledApps) + .def("is_app_installed", &Steam::isAppInstalled, py::arg("appID")) + .def("get_app_name", &Steam::getAppName, py::arg("appID")) + .def("get_app_install_dir", &Steam::getAppInstallDir, py::arg("appID")) + .def("get_app_icon_path", &Steam::getAppIconPath, py::arg("appID")) + .def("get_app_logo_path", &Steam::getAppLogoPath, py::arg("appID")) + .def("get_app_box_art_path", &Steam::getAppBoxArtPath, py::arg("appID")) + .def("get_app_store_art_path", &Steam::getAppStoreArtPath, py::arg("appID")) + .def("is_app_using_source_engine", &Steam::isAppUsingSourceEngine, py::arg("appID")) + .def("is_app_using_source_2_engine", &Steam::isAppUsingSource2Engine, py::arg("appID")) + .def("__bool__", &Steam::operator bool, py::is_operator()); +} + +} // namespace steampp diff --git a/lang/python/src/toolpp.h b/lang/python/src/toolpp.h new file mode 100644 index 000000000..a81a303cb --- /dev/null +++ b/lang/python/src/toolpp.h @@ -0,0 +1,169 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace py = nanobind; + +#include + +namespace toolpp { + +inline void register_python(py::module_& m) { + auto toolpp = m.def_submodule("toolpp"); + using namespace toolpp; + + py::enum_(toolpp, "CmdSeqCommandSpecial") + .value("NONE", CmdSeq::Command::Special::NONE) + .value("CHANGE_DIRECTORY", CmdSeq::Command::Special::CHANGE_DIRECTORY) + .value("COPY_FILE", CmdSeq::Command::Special::COPY_FILE) + .value("DELETE_FILE", CmdSeq::Command::Special::DELETE_FILE) + .value("RENAME_FILE", CmdSeq::Command::Special::RENAME_FILE) + .value("COPY_FILE_IF_EXISTS_ALIAS", CmdSeq::Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS) + .value("COPY_FILE_IF_EXISTS", CmdSeq::Command::Special::COPY_FILE_IF_EXISTS) + .export_values(); + + py::class_(toolpp, "CmdSeqCommand") + .def_rw("enabled", &CmdSeq::Command::enabled) + .def_rw("executable", &CmdSeq::Command::executable) + .def_rw("arguments", &CmdSeq::Command::arguments) + .def_rw("ensure_file_exists", &CmdSeq::Command::ensureFileExists) + .def_rw("path_to_theoretically_existing_file", &CmdSeq::Command::pathToTheoreticallyExistingFile) + .def_rw("use_process_window", &CmdSeq::Command::useProcessWindow) + .def_rw("wait_for_keypress", &CmdSeq::Command::waitForKeypress) + .def_static("get_special_display_name_for", &CmdSeq::Command::getSpecialDisplayNameFor, py::arg("special")) + .def("get_executable_display_name", &CmdSeq::Command::getExecutableDisplayName); + + py::class_(toolpp, "CmdSeqSequence") + .def_rw("name", &CmdSeq::Sequence::name) + .def_rw("commands", &CmdSeq::Sequence::commands); + + py::enum_(toolpp, "CmdSeqType") + .value("INVALID", CmdSeq::Type::INVALID) + .value("BINARY", CmdSeq::Type::BINARY) + .value("KEYVALUES_STRATA", CmdSeq::Type::KEYVALUES_STRATA) + .export_values(); + + py::class_(toolpp, "CmdSeq") + .def(py::init(), py::arg("path")) + .def(py::init(), py::arg("type")) + .def("__bool__", &CmdSeq::operator bool, py::is_operator()) + .def_prop_rw("type", &CmdSeq::getType, &CmdSeq::setType) + .def_prop_ro("version", &CmdSeq::getVersion) + .def("set_version", &CmdSeq::setVersion, py::arg("is_v02")) + .def("sequences", py::overload_cast<>(&CmdSeq::getSequences), py::rv_policy::reference_internal) + .def("bake", [](const CmdSeq& self) { + const auto d = self.bake(); + return py::bytes{d.data(), d.size()}; + }) + .def("bake_to_file", py::overload_cast(&CmdSeq::bake, py::const_), py::arg("path")); + + py::class_(toolpp, "FGDEntityClassProperty") + .def_ro("name", &FGD::Entity::ClassProperty::name) + .def_ro("arguments", &FGD::Entity::ClassProperty::arguments); + + py::class_(toolpp, "FGDEntityField") + .def_ro("name", &FGD::Entity::Field::name) + .def_ro("value_type", &FGD::Entity::Field::valueType) + .def_ro("readonly", &FGD::Entity::Field::readonly) + .def_ro("reportable", &FGD::Entity::Field::reportable) + .def_ro("display_name", &FGD::Entity::Field::displayName) + .def_ro("value_default", &FGD::Entity::Field::valueDefault) + .def_ro("description", &FGD::Entity::Field::description); + + py::class_(toolpp, "FGDEntityFieldChoicesChoice") + .def_ro("value", &FGD::Entity::FieldChoices::Choice::value) + .def_ro("display_name", &FGD::Entity::FieldChoices::Choice::displayName); + + py::class_(toolpp, "FGDEntityFieldChoices") + .def_ro("name", &FGD::Entity::FieldChoices::name) + .def_ro("readonly", &FGD::Entity::FieldChoices::readonly) + .def_ro("reportable", &FGD::Entity::FieldChoices::reportable) + .def_ro("display_name", &FGD::Entity::FieldChoices::displayName) + .def_ro("value_default", &FGD::Entity::FieldChoices::valueDefault) + .def_ro("description", &FGD::Entity::FieldChoices::description) + .def_ro("choices", &FGD::Entity::FieldChoices::choices); + + py::class_(toolpp, "FGDEntityFieldFlagsFlag") + .def_ro("value", &FGD::Entity::FieldFlags::Flag::value) + .def_ro("display_name", &FGD::Entity::FieldFlags::Flag::displayName) + .def_ro("enabled_by_default", &FGD::Entity::FieldFlags::Flag::enabledByDefault) + .def_ro("description", &FGD::Entity::FieldFlags::Flag::description); + + py::class_(toolpp, "FGDEntityFieldFlags") + .def_ro("name", &FGD::Entity::FieldFlags::name) + .def_ro("readonly", &FGD::Entity::FieldFlags::readonly) + .def_ro("reportable", &FGD::Entity::FieldFlags::reportable) + .def_ro("display_name", &FGD::Entity::FieldFlags::displayName) + .def_ro("description", &FGD::Entity::FieldFlags::description) + .def_ro("flags", &FGD::Entity::FieldFlags::flags); + + py::class_(toolpp, "FGDEntityIO") + .def_ro("name", &FGD::Entity::IO::name) + .def_ro("value_type", &FGD::Entity::IO::valueType) + .def_ro("description", &FGD::Entity::IO::description); + + py::class_(toolpp, "FGDEntity") + .def_ro("class_type", &FGD::Entity::classType) + .def_ro("class_properties", &FGD::Entity::classProperties) + .def_ro("description", &FGD::Entity::description) + .def_ro("fields", &FGD::Entity::fields) + .def_ro("fields_with_choices", &FGD::Entity::fieldsWithChoices) + .def_ro("fields_with_flags", &FGD::Entity::fieldsWithFlags) + .def_ro("inputs", &FGD::Entity::inputs) + .def_ro("outputs", &FGD::Entity::outputs); + + py::class_(toolpp, "FGDAutoVisGroup") + .def_ro("parent_name", &FGD::AutoVisGroup::parentName) + .def_ro("name", &FGD::AutoVisGroup::name) + .def_ro("entities", &FGD::AutoVisGroup::entities); + + py::class_(toolpp, "FGD") + .def(py::init<>()) + .def(py::init(), py::arg("fgd_path")) + .def("load", &FGD::load, py::arg("fgd_path")) + .def_prop_ro("version", &FGD::getVersion) + .def_prop_ro("map_size", &FGD::getMapSize) + .def_prop_ro("entities", &FGD::getEntities) + .def_prop_ro("material_exclusion_dirs", &FGD::getMaterialExclusionDirs) + .def_prop_ro("auto_visgroups", &FGD::getAutoVisGroups); + + py::class_(toolpp, "FGDWriterAutoVisGroupWriter") + .def("visgroup", &FGDWriter::AutoVisGroupWriter::visGroup, py::arg("name"), py::arg("entities"), py::rv_policy::reference) + .def("end_auto_visgroup", &FGDWriter::AutoVisGroupWriter::endAutoVisGroup, py::rv_policy::reference); + + py::class_(toolpp, "FGDWriterEntityWriterKeyValueChoicesWriter") + .def("choice", &FGDWriter::EntityWriter::KeyValueChoicesWriter::choice, py::arg("value"), py::arg("display_name"), py::rv_policy::reference) + .def("end_key_value_choices", &FGDWriter::EntityWriter::KeyValueChoicesWriter::endKeyValueChoices, py::rv_policy::reference); + + py::class_(toolpp, "FGDWriterEntityWriterKeyValueFlagsWriter") + .def("flag", &FGDWriter::EntityWriter::KeyValueFlagsWriter::flag, py::arg("value"), py::arg("display_name"), py::arg("enabled_by_default"), py::arg("description") = "", py::rv_policy::reference) + .def("end_key_value_flags", &FGDWriter::EntityWriter::KeyValueFlagsWriter::endKeyValueFlags, py::rv_policy::reference); + + py::class_(toolpp, "FGDWriterEntityWriter") + .def("key_value", &FGDWriter::EntityWriter::keyValue, py::arg("name"), py::arg("value_type"), py::arg("display_name") = "", py::arg("value_default") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false, py::rv_policy::reference) + .def("begin_key_value_choices", &FGDWriter::EntityWriter::beginKeyValueChoices, py::arg("name"), py::arg("display_name") = "", py::arg("value_default") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false) + .def("begin_key_value_flags", &FGDWriter::EntityWriter::beginKeyValueFlags, py::arg("name"), py::arg("display_name") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false) + .def("input", &FGDWriter::EntityWriter::input, py::arg("name"), py::arg("value_type"), py::arg("description") = "", py::rv_policy::reference) + .def("output", &FGDWriter::EntityWriter::output, py::arg("name"), py::arg("value_type"), py::arg("description") = "", py::rv_policy::reference) + .def("end_entity", &FGDWriter::EntityWriter::endEntity, py::rv_policy::reference); + + py::class_(toolpp, "FGDWriter") + .def_static("begin", &FGDWriter::begin) + .def("include", &FGDWriter::include, py::arg("fgd_path"), py::rv_policy::reference) + .def("version", &FGDWriter::version, py::arg("version"), py::rv_policy::reference) + .def("map_size", &FGDWriter::mapSize, py::arg("map_size"), py::rv_policy::reference) + .def("material_exclusion_dirs", &FGDWriter::materialExclusionDirs, py::arg("material_exclusion_dirs"), py::rv_policy::reference) + .def("begin_auto_visgroup", &FGDWriter::beginAutoVisGroup, py::arg("parent_name")) + .def("begin_entity", &FGDWriter::beginEntity, py::arg("class_type"), py::arg("class_properties"), py::arg("name"), py::arg("description") = "") + .def("bake", [](const FGDWriter& self) { + const auto d = self.bake(); + return py::bytes{d.data(), d.size()}; + }) + .def("bake_to_file", py::overload_cast(&FGDWriter::bake, py::const_), py::arg("path")); +} + +} // namespace vcryptpp diff --git a/lang/python/src/vcryptpp.h b/lang/python/src/vcryptpp.h new file mode 100644 index 000000000..1ae642783 --- /dev/null +++ b/lang/python/src/vcryptpp.h @@ -0,0 +1,90 @@ +#pragma once + +#include +#include + +namespace py = nanobind; + +#include + +namespace vcryptpp { + +inline void register_python(py::module_& m) { + auto vcryptpp = m.def_submodule("vcryptpp"); + using namespace vcryptpp; + + { + auto VFONT = vcryptpp.def_submodule("VFONT"); + using namespace VFONT; + + VFONT.attr("IDENTIFIER") = IDENTIFIER; + + VFONT.attr("MAGIC") = MAGIC; + + VFONT.def("decrypt_bytes", [](const py::bytes& data) { + const auto d = decrypt({reinterpret_cast(data.data()), data.size()}); + return py::bytes{d.data(), d.size()}; + }, py::arg("data")); + } + + { + auto VICE = vcryptpp.def_submodule("VICE"); + using namespace VICE; + + { + auto KnownCodes = VICE.def_submodule("KnownCodes"); + using namespace KnownCodes; + + KnownCodes.attr("DEFAULT") = DEFAULT; + KnownCodes.attr("CONTAGION_WEAPONS") = CONTAGION_WEAPONS; + KnownCodes.attr("CONTAGION_SCRIPTS") = CONTAGION_SCRIPTS; + KnownCodes.attr("COUNTER_STRIKE_SOURCE") = COUNTER_STRIKE_SOURCE; + KnownCodes.attr("COUNTER_STRIKE_GLOBAL_OFFENSIVE") = COUNTER_STRIKE_GLOBAL_OFFENSIVE; + KnownCodes.attr("COUNTER_STRIKE_2") = COUNTER_STRIKE_2; + KnownCodes.attr("COUNTER_STRIKE_PROMOD") = COUNTER_STRIKE_PROMOD; + KnownCodes.attr("DAY_OF_DEFEAT_SOURCE") = DAY_OF_DEFEAT_SOURCE; + KnownCodes.attr("DYSTOPIA_1_2") = DYSTOPIA_1_2; + KnownCodes.attr("DYSTOPIA_1_3") = DYSTOPIA_1_3; + KnownCodes.attr("GOLDEN_EYE_SOURCE") = GOLDEN_EYE_SOURCE; + KnownCodes.attr("HALF_LIFE_2_CTF") = HALF_LIFE_2_CTF; + KnownCodes.attr("HALF_LIFE_2_DM") = HALF_LIFE_2_DM; + KnownCodes.attr("INSURGENCY") = INSURGENCY; + KnownCodes.attr("LEFT_4_DEAD_2") = LEFT_4_DEAD_2; + KnownCodes.attr("NO_MORE_ROOM_IN_HELL") = NO_MORE_ROOM_IN_HELL; + KnownCodes.attr("NUCLEAR_DAWN") = NUCLEAR_DAWN; + KnownCodes.attr("TACTICAL_INTERVENTION") = TACTICAL_INTERVENTION; + KnownCodes.attr("TEAM_FORTRESS_2") = TEAM_FORTRESS_2; + KnownCodes.attr("TEAM_FORTRESS_2_ITEMS") = TEAM_FORTRESS_2_ITEMS; + KnownCodes.attr("THE_SHIP") = THE_SHIP; + KnownCodes.attr("ZOMBIE_PANIC_SOURCE") = ZOMBIE_PANIC_SOURCE; + + KnownCodes.attr("EKV_GPU_DEFAULT") = EKV_GPU_DEFAULT; + KnownCodes.attr("EKV_GPU_ALIEN_SWARM") = EKV_GPU_ALIEN_SWARM; + KnownCodes.attr("EKV_GPU_LEFT_4_DEAD_1") = EKV_GPU_LEFT_4_DEAD_1; + KnownCodes.attr("EKV_GPU_LEFT_4_DEAD_2") = EKV_GPU_LEFT_4_DEAD_2; + KnownCodes.attr("EKV_GPU_PORTAL_2") = EKV_GPU_PORTAL_2; + } + + VICE.def("decrypt_bytes", [](const py::bytes& data, std::string_view code = KnownCodes::DEFAULT) { + const auto d = decrypt({reinterpret_cast(data.data()), data.size()}, code); + return py::bytes{d.data(), d.size()}; + }, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT); + + VICE.def("decrypt_str", [](std::string_view data, std::string_view code = KnownCodes::DEFAULT) -> std::string { + const auto d = decrypt({reinterpret_cast(data.data()), data.size()}, code); + return {reinterpret_cast(d.data()), d.size()}; + }, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT); + + VICE.def("encrypt_bytes", [](const py::bytes& data, std::string_view code = KnownCodes::DEFAULT) { + const auto d = encrypt({reinterpret_cast(data.data()), data.size()}, code); + return py::bytes{d.data(), d.size()}; + }, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT); + + VICE.def("encrypt_str", [](std::string_view data, std::string_view code = KnownCodes::DEFAULT) -> std::string { + const auto d = encrypt({reinterpret_cast(data.data()), data.size()}, code); + return {reinterpret_cast(d.data()), d.size()}; + }, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT); + } +} + +} // namespace vcryptpp diff --git a/lang/python/src/vtfpp.h b/lang/python/src/vtfpp.h new file mode 100644 index 000000000..92e0c6b32 --- /dev/null +++ b/lang/python/src/vtfpp.h @@ -0,0 +1,436 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace py = nanobind; + +#include + +namespace vtfpp { + +void register_python(py::module_& m) { + using namespace vtfpp; + auto vtfpp = m.def_submodule("vtfpp"); + + py::enum_(vtfpp, "ImageFormat") + .value("RGBA8888", ImageFormat::RGBA8888) + .value("ABGR8888", ImageFormat::ABGR8888) + .value("RGB888", ImageFormat::RGB888) + .value("BGR888", ImageFormat::BGR888) + .value("RGB565", ImageFormat::RGB565) + .value("I8", ImageFormat::I8) + .value("IA88", ImageFormat::IA88) + .value("P8", ImageFormat::P8) + .value("A8", ImageFormat::A8) + .value("RGB888_BLUESCREEN", ImageFormat::RGB888_BLUESCREEN) + .value("BGR888_BLUESCREEN", ImageFormat::BGR888_BLUESCREEN) + .value("ARGB8888", ImageFormat::ARGB8888) + .value("BGRA8888", ImageFormat::BGRA8888) + .value("DXT1", ImageFormat::DXT1) + .value("DXT3", ImageFormat::DXT3) + .value("DXT5", ImageFormat::DXT5) + .value("BGRX8888", ImageFormat::BGRX8888) + .value("BGR565", ImageFormat::BGR565) + .value("BGRX5551", ImageFormat::BGRX5551) + .value("BGRA4444", ImageFormat::BGRA4444) + .value("DXT1_ONE_BIT_ALPHA", ImageFormat::DXT1_ONE_BIT_ALPHA) + .value("BGRA5551", ImageFormat::BGRA5551) + .value("UV88", ImageFormat::UV88) + .value("UVWQ8888", ImageFormat::UVWQ8888) + .value("RGBA16161616F", ImageFormat::RGBA16161616F) + .value("RGBA16161616", ImageFormat::RGBA16161616) + .value("UVLX8888", ImageFormat::UVLX8888) + .value("R32F", ImageFormat::R32F) + .value("RGB323232F", ImageFormat::RGB323232F) + .value("RGBA32323232F", ImageFormat::RGBA32323232F) + .value("RG1616F", ImageFormat::RG1616F) + .value("RG3232F", ImageFormat::RG3232F) + .value("RGBX8888", ImageFormat::RGBX8888) + .value("EMPTY", ImageFormat::EMPTY) + .value("ATI2N", ImageFormat::ATI2N) + .value("ATI1N", ImageFormat::ATI1N) + .value("RGBA1010102", ImageFormat::RGBA1010102) + .value("BGRA1010102", ImageFormat::BGRA1010102) + .value("R16F", ImageFormat::R16F) + .value("R8", ImageFormat::R8) + .value("BC7", ImageFormat::BC7) + .value("BC6H", ImageFormat::BC6H) + .export_values(); + + { + using namespace ImageFormatDetails; + auto ImageFormatDetails = vtfpp.def_submodule("ImageFormatDetails"); + + ImageFormatDetails.def("red", &red, py::arg("format")); + ImageFormatDetails.def("decompressedRed", &decompressedRed, py::arg("format")); + ImageFormatDetails.def("green", &green, py::arg("format")); + ImageFormatDetails.def("decompressedGreen", &decompressedGreen, py::arg("format")); + ImageFormatDetails.def("blue", &blue, py::arg("format")); + ImageFormatDetails.def("decompressedBlue", &decompressedBlue, py::arg("format")); + ImageFormatDetails.def("alpha", &alpha, py::arg("format")); + ImageFormatDetails.def("decompressedAlpha", &decompressedAlpha, py::arg("format")); + ImageFormatDetails.def("bpp", &bpp, py::arg("format")); + ImageFormatDetails.def("containerFormat", &containerFormat, py::arg("format")); + ImageFormatDetails.def("large", &large, py::arg("format")); + ImageFormatDetails.def("decimal", &decimal, py::arg("format")); + ImageFormatDetails.def("compressed", &compressed, py::arg("format")); + ImageFormatDetails.def("transparent", &transparent, py::arg("format")); + ImageFormatDetails.def("opaque", &opaque, py::arg("format")); + + ImageFormatDetails.def("get_data_length", py::overload_cast(&getDataLength), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("slice_count") = 1); + ImageFormatDetails.def("get_data_length_extended", py::overload_cast(&getDataLength), py::arg("format"), py::arg("mip_count"), py::arg("frame_count"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice_count")); + ImageFormatDetails.def("get_data_position", [](ImageFormat format, uint8_t mip, uint8_t mipCount, uint16_t frame, uint16_t frameCount, uint8_t face, uint8_t faceCount, uint16_t width, uint16_t height, uint16_t slice = 0, uint16_t sliceCount = 1) -> std::pair { + uint32_t offset, length; + if (getDataPosition(offset, length, format, mip, mipCount, frame, frameCount, face, faceCount, width, height, slice, sliceCount)) { + return {offset, length}; + } + return {0, 0}; + }, py::arg("format"), py::arg("mip"), py::arg("mip_count"), py::arg("frame"), py::arg("frame_count"), py::arg("face"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice") = 0, py::arg("slice_count") = 1); + } + + { + using namespace ImageDimensions; + auto ImageDimensions = vtfpp.def_submodule("ImageDimensions"); + + ImageDimensions.def("get_mip_dim", &getMipDim, py::arg("mip"), py::arg("dim")); + ImageDimensions.def("get_recommended_mip_count_for_dims", &getRecommendedMipCountForDims, py::arg("format"), py::arg("width"), py::arg("height")); + } + + // Skip ImagePixel, difficult to bind + + { + using namespace ImageConversion; + auto ImageConversion = vtfpp.def_submodule("ImageConversion"); + + ImageConversion.def("convert_image_data_to_format", [](const py::bytes& imageData, ImageFormat oldFormat, ImageFormat newFormat, uint16_t width, uint16_t height) { + const auto d = convertImageDataToFormat({reinterpret_cast(imageData.data()), imageData.size()}, oldFormat, newFormat, width, height); + return py::bytes{d.data(), d.size()}; + }, py::arg("image_data"), py::arg("old_format"), py::arg("new_format"), py::arg("width"), py::arg("height")); + + ImageConversion.def("convert_several_image_data_to_format", [](const py::bytes& imageData, ImageFormat oldFormat, ImageFormat newFormat, uint8_t mipCount, uint16_t frameCount, uint16_t faceCount, uint16_t width, uint16_t height, uint16_t sliceCount) { + const auto d = convertSeveralImageDataToFormat({reinterpret_cast(imageData.data()), imageData.size()}, oldFormat, newFormat, mipCount, frameCount, faceCount, width, height, sliceCount); + return py::bytes{d.data(), d.size()}; + }, py::arg("image_data"), py::arg("old_format"), py::arg("new_format"), py::arg("mip_count"), py::arg("frame_count"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice_count")); + + py::enum_(ImageConversion, "FileFormat") + .value("DEFAULT", FileFormat::DEFAULT) + .value("PNG", FileFormat::PNG) + .value("JPEG", FileFormat::JPEG) + .value("BMP", FileFormat::BMP) + .value("TGA", FileFormat::TGA) + .value("HDR", FileFormat::HDR) + .export_values(); + + ImageConversion.def("get_default_file_format_for_image_format", &getDefaultFileFormatForImageFormat, py::arg("format")); + + ImageConversion.def("convert_image_data_to_file", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat = FileFormat::DEFAULT) { + const auto d = convertImageDataToFile({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, fileFormat); + return py::bytes{d.data(), d.size()}; + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("file_format") = FileFormat::DEFAULT); + + ImageConversion.def("convert_file_to_image_data", [](const py::bytes& fileData) -> std::tuple { + ImageFormat format; + int width, height, frame; + const auto d = convertFileToImageData({reinterpret_cast(fileData.data()), fileData.size()}, format, width, height, frame); + return {py::bytes{d.data(), d.size()}, format, width, height, frame}; + }, py::arg("file_data")); + + py::enum_(ImageConversion, "ResizeEdge") + .value("CLAMP", ResizeEdge::CLAMP) + .value("REFLECT", ResizeEdge::REFLECT) + .value("WRAP", ResizeEdge::WRAP) + .value("ZERO", ResizeEdge::ZERO) + .export_values(); + + py::enum_(ImageConversion, "ResizeFilter") + .value("DEFAULT", ResizeFilter::DEFAULT) + .value("BOX", ResizeFilter::BOX) + .value("BILINEAR", ResizeFilter::BILINEAR) + .value("CUBIC_BSPLINE", ResizeFilter::CUBIC_BSPLINE) + .value("CATMULLROM", ResizeFilter::CATMULLROM) + .value("MITCHELL", ResizeFilter::MITCHELL) + .export_values(); + + py::enum_(ImageConversion, "ResizeMethod") + .value("NONE", ResizeMethod::NONE) + .value("POWER_OF_TWO_BIGGER", ResizeMethod::POWER_OF_TWO_BIGGER) + .value("POWER_OF_TWO_SMALLER", ResizeMethod::POWER_OF_TWO_SMALLER) + .value("POWER_OF_TWO_NEAREST", ResizeMethod::POWER_OF_TWO_NEAREST) + .export_values(); + + ImageConversion.def("get_resized_dim", &getResizedDim, py::arg("n"), py::arg("resize_method")); + ImageConversion.def("get_resized_dims", [](uint16_t width, ResizeMethod widthResize, uint16_t height, ResizeMethod heightResize) -> std::pair { + setResizedDims(width, widthResize, height, heightResize); + return {width, height}; + }, py::arg("width"), py::arg("resize_width"), py::arg("height"), py::arg("resize_height")); + + ImageConversion.def("resize_image_data", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t newWidth, uint16_t height, uint16_t newHeight, bool srgb, ResizeFilter filter, ResizeEdge edge = ResizeEdge::CLAMP) { + const auto d = resizeImageData({reinterpret_cast(imageData.data()), imageData.size()}, format, width, newWidth, height, newHeight, srgb, filter, edge); + return py::bytes{d.data(), d.size()}; + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("new_width"), py::arg("height"), py::arg("new_height"), py::arg("srgb"), py::arg("filter"), py::arg("edge") = ResizeEdge::CLAMP); + + ImageConversion.def("resize_image_data_strict", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t newWidth, ResizeMethod widthResize, uint16_t height, uint16_t newHeight, ResizeMethod heightResize, bool srgb, ResizeFilter filter, ResizeEdge edge = ResizeEdge::CLAMP) -> std::tuple { + uint16_t widthOut, heightOut; + const auto d = resizeImageDataStrict({reinterpret_cast(imageData.data()), imageData.size()}, format, width, newWidth, widthOut, widthResize, height, newHeight, heightOut, heightResize, srgb, filter, edge); + return {py::bytes{d.data(), d.size()}, widthOut, heightOut}; + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("new_width"), py::arg("width_resize"), py::arg("height"), py::arg("new_height"), py::arg("height_resize"), py::arg("srgb"), py::arg("filter"), py::arg("edge") = ResizeEdge::CLAMP); + + // Skip extractChannelFromImageData, difficult to bind + // Skip applyChannelToImageData, difficult to bind + } + + py::class_(vtfpp, "PPLImage") + .def_ro("width", &PPL::Image::width) + .def_ro("height", &PPL::Image::height) + .def_prop_ro("data", [](const PPL::Image& self) { + return py::bytes{self.data.data(), self.data.size()}; + }); + + py::class_(vtfpp, "PPL") + .def(py::init(), py::arg("checksum"), py::arg("format") = ImageFormat::RGB888, py::arg("version") = 0) + .def("__init__", [](PPL* self, const py::bytes& pplData) { + return new(self) PPL{{reinterpret_cast(pplData.data()), pplData.size()}}; + }, py::arg("ppl_data")) + .def(py::init(), py::arg("path")) + .def("__bool__", &PPL::operator bool, py::is_operator()) + .def_prop_rw("version", &PPL::getVersion, &PPL::setVersion) + .def_prop_rw("checksum", &PPL::getChecksum, &PPL::setChecksum) + .def_prop_rw("format", &PPL::getFormat, &PPL::setFormat) + .def("has_image_for_lod", &PPL::hasImageForLOD, py::arg("lod")) + .def_prop_ro("image_lods", &PPL::getImageLODs) + .def("get_image_raw", [](const PPL& self, uint32_t lod = 0) -> std::optional { + const auto* image = self.getImageRaw(lod); + if (!image) { + return std::nullopt; + } + return *image; + }, py::arg("lod")) + .def("get_image_as", &PPL::getImageAs, py::arg("new_format"), py::arg("lod")) + .def("get_image_as_rgb888", &PPL::getImageAsRGB888, py::arg("lod")) + .def("set_image", [](PPL& self, const py::bytes& imageData, ImageFormat format, uint32_t width, uint32_t height, uint32_t lod = 0) { + self.setImage({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, lod); + }, py::arg("imageData"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("lod") = 0) + .def("set_image_resized", [](PPL& self, const py::bytes& imageData, ImageFormat format, uint32_t width, uint32_t height, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR) { + self.setImage({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, resizedWidth, resizedHeight, lod, filter); + }, py::arg("imageData"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("resized_width"), py::arg("resized_height"), py::arg("lod") = 0, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR) + .def("set_image_from_file", py::overload_cast(&PPL::setImage), py::arg("image_path"), py::arg("lod") = 0) + .def("set_image_resized_from_file", py::overload_cast(&PPL::setImage), py::arg("image_path"), py::arg("resized_width"), py::arg("resized_height"), py::arg("lod") = 0, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR) + .def("save_image", [](const PPL& self, uint32_t lod = 0, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) { + const auto d = self.saveImageToFile(lod, fileFormat); + return py::bytes{d.data(), d.size()}; + }, py::arg("lod") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("save_image_to_file", py::overload_cast(&PPL::saveImageToFile, py::const_), py::arg("image_path"), py::arg("lod") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("bake", [](PPL& self) { + const auto d = self.bake(); + return py::bytes{d.data(), d.size()}; + }) + .def("bake_to_file", py::overload_cast(&PPL::bake), py::arg("ppl_path")); + + vtfpp.attr("VTF_SIGNATURE") = VTF_SIGNATURE; + + py::enum_(vtfpp, "CompressionMethod") + .value("DEFLATE", CompressionMethod::DEFLATE) + .value("ZSTD", CompressionMethod::ZSTD) + .export_values(); + + py::enum_(vtfpp, "ResourceType") + .value("UNKNOWN", Resource::TYPE_UNKNOWN) + .value("THUMBNAIL_DATA", Resource::TYPE_THUMBNAIL_DATA) + .value("IMAGE_DATA", Resource::TYPE_IMAGE_DATA) + .value("PARTICLE_SHEET_DATA", Resource::TYPE_PARTICLE_SHEET_DATA) + .value("CRC", Resource::TYPE_CRC) + .value("LOD_CONTROL_INFO", Resource::TYPE_LOD_CONTROL_INFO) + .value("EXTENDED_FLAGS", Resource::TYPE_EXTENDED_FLAGS) + .value("KEYVALUES_DATA", Resource::TYPE_KEYVALUES_DATA) + .value("AUX_COMPRESSION", Resource::TYPE_AUX_COMPRESSION) + .export_values(); + + py::enum_(vtfpp, "ResourceFlags") + .value("NONE", Resource::FLAG_NONE) + .value("LOCAL_DATA", Resource::FLAG_LOCAL_DATA) + .export_values(); + + // Skip Resource, mostly useless outside C++ + + py::enum_(vtfpp, "VTFFlags") + .value("NONE", VTF::FLAG_NONE) + .value("POINT_SAMPLE", VTF::FLAG_POINT_SAMPLE) + .value("TRILINEAR", VTF::FLAG_TRILINEAR) + .value("CLAMP_S", VTF::FLAG_CLAMP_S) + .value("CLAMP_T", VTF::FLAG_CLAMP_T) + .value("ANISOTROPIC", VTF::FLAG_ANISOTROPIC) + .value("HINT_DXT5", VTF::FLAG_HINT_DXT5) + .value("SRGB", VTF::FLAG_SRGB) + .value("NO_COMPRESS", VTF::FLAG_NO_COMPRESS) + .value("NORMAL", VTF::FLAG_NORMAL) + .value("NO_MIP", VTF::FLAG_NO_MIP) + .value("NO_LOD", VTF::FLAG_NO_LOD) + .value("LOAD_LOWEST_MIPS", VTF::FLAG_LOAD_LOWEST_MIPS) + .value("PROCEDURAL", VTF::FLAG_PROCEDURAL) + .value("ONE_BIT_ALPHA", VTF::FLAG_ONE_BIT_ALPHA) + .value("MULTI_BIT_ALPHA", VTF::FLAG_MULTI_BIT_ALPHA) + .value("ENVMAP", VTF::FLAG_ENVMAP) + .value("RENDERTARGET", VTF::FLAG_RENDERTARGET) + .value("DEPTH_RENDERTARGET", VTF::FLAG_DEPTH_RENDERTARGET) + .value("NO_DEBUG_OVERRIDE", VTF::FLAG_NO_DEBUG_OVERRIDE) + .value("SINGLE_COPY", VTF::FLAG_SINGLE_COPY) + .value("ONE_OVER_MIP_LEVEL_IN_ALPHA", VTF::FLAG_ONE_OVER_MIP_LEVEL_IN_ALPHA) + .value("PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL", VTF::FLAG_PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL) + .value("NORMAL_TO_DUDV", VTF::FLAG_NORMAL_TO_DUDV) + .value("ALPHA_TEST_MIP_GENERATION", VTF::FLAG_ALPHA_TEST_MIP_GENERATION) + .value("NO_DEPTH_BUFFER", VTF::FLAG_NO_DEPTH_BUFFER) + .value("NICE_FILTERED", VTF::FLAG_NICE_FILTERED) + .value("CLAMP_U", VTF::FLAG_CLAMP_U) + .value("VERTEX_TEXTURE", VTF::FLAG_VERTEX_TEXTURE) + .value("SSBUMP", VTF::FLAG_SSBUMP) + .value("UNFILTERABLE_OK", VTF::FLAG_UNFILTERABLE_OK) + .value("BORDER", VTF::FLAG_BORDER) + .value("SPECVAR_RED", VTF::FLAG_SPECVAR_RED) + .value("SPECVAR_ALPHA", VTF::FLAG_SPECVAR_ALPHA) + .export_values(); + + py::class_(vtfpp, "VTFCreationOptions") + .def(py::init<>()) + .def_rw("major_version", &VTF::CreationOptions::majorVersion) + .def_rw("minor_version", &VTF::CreationOptions::minorVersion) + .def_rw("output_format", &VTF::CreationOptions::outputFormat) + .def_rw("width_resize_method", &VTF::CreationOptions::widthResizeMethod) + .def_rw("height_resize_method", &VTF::CreationOptions::heightResizeMethod) + .def_rw("filter", &VTF::CreationOptions::filter) + .def_rw("flags", &VTF::CreationOptions::flags) + .def_rw("initial_frame_count", &VTF::CreationOptions::initialFrameCount) + .def_rw("start_frame", &VTF::CreationOptions::startFrame) + .def_rw("is_cubemap", &VTF::CreationOptions::isCubeMap) + .def_rw("has_spheremap", &VTF::CreationOptions::hasSphereMap) + .def_rw("initial_slice_count", &VTF::CreationOptions::initialSliceCount) + .def_rw("create_mips", &VTF::CreationOptions::createMips) + .def_rw("create_thumbnail", &VTF::CreationOptions::createThumbnail) + .def_rw("create_reflectivity", &VTF::CreationOptions::createReflectivity) + .def_rw("compression_level", &VTF::CreationOptions::compressionLevel) + .def_rw("compression_method", &VTF::CreationOptions::compressionMethod) + .def_rw("bumpmap_scale", &VTF::CreationOptions::bumpMapScale); + + py::class_(vtfpp, "VTF") + .def_ro_static("FLAG_MASK_GENERATED", &VTF::FLAG_MASK_GENERATED) + .def_ro_static("FORMAT_UNCHANGED", &VTF::FORMAT_UNCHANGED) + .def_ro_static("FORMAT_DEFAULT", &VTF::FORMAT_DEFAULT) + .def_ro_static("MAX_RESOURCES", &VTF::MAX_RESOURCES) + .def(py::init<>()) + .def("__init__", [](VTF* self, const py::bytes& vtfData, bool parseHeaderOnly = false) { + return new(self) VTF{std::span{reinterpret_cast(vtfData.data()), vtfData.size()}, parseHeaderOnly}; + }, py::arg("vtf_data"), py::arg("parse_header_only") = false) + .def(py::init(), py::arg("vtf_path"), py::arg("parse_header_only") = false) + .def("__bool__", &VTF::operator bool, py::is_operator()) + .def_static("create_and_bake", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, const std::string& vtfPath, VTF::CreationOptions options) { + VTF::create({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, vtfPath, options); + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_static("create_blank_and_bake", py::overload_cast(&VTF::create), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_static("create", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, VTF::CreationOptions options) { + return VTF::create({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, options); + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_static("create_blank", py::overload_cast(&VTF::create), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_static("create_from_file_and_bake", py::overload_cast(&VTF::create), py::arg("image_path"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_static("create_from_file", py::overload_cast(&VTF::create), py::arg("image_path"), py::arg("creation_options") = VTF::CreationOptions{}) + .def_prop_rw("version_major", &VTF::getMajorVersion, &VTF::setMajorVersion) + .def_prop_rw("version_minor", &VTF::getMinorVersion, &VTF::setMinorVersion) + .def_prop_rw("image_width_resize_method", &VTF::getImageWidthResizeMethod, &VTF::setImageWidthResizeMethod) + .def_prop_rw("image_height_resize_method", &VTF::getImageHeightResizeMethod, &VTF::setImageHeightResizeMethod) + .def_prop_ro("width", &VTF::getWidth) + .def("width_for_mip", [](const VTF& self, uint8_t mip = 0) { return self.getWidth(mip); }, py::arg("mip") = 0) + .def_prop_ro("height", &VTF::getHeight) + .def("height_for_mip", [](const VTF& self, uint8_t mip = 0) { return self.getHeight(mip); }, py::arg("mip") = 0) + .def("set_size", &VTF::setSize, py::arg("width"), py::arg("height"), py::arg("filter")) + .def_prop_rw("flags", &VTF::getFlags, &VTF::setFlags) + .def("add_flags", &VTF::addFlags, py::arg("flags")) + .def("remove_flags", &VTF::removeFlags, py::arg("flags")) + .def_prop_ro("format", &VTF::getFormat) + .def("set_format", &VTF::setFormat, py::arg("new_format"), py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR) + .def_prop_rw("mip_count", &VTF::getMipCount, &VTF::setMipCount) + .def("set_recommended_mip_count", &VTF::setRecommendedMipCount) + .def("compute_mips", &VTF::computeMips, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR) + .def_prop_rw("frame_count", &VTF::getFrameCount, &VTF::setFrameCount) + .def_prop_ro("face_count", &VTF::getFaceCount) + .def("set_face_count", &VTF::setFaceCount, py::arg("is_cubemap"), py::arg("has_spheremap") = false) + .def_prop_rw("slice_count", &VTF::getSliceCount, &VTF::setSliceCount) + .def("set_frame_face_and_slice_count", &VTF::setFrameFaceAndSliceCount, py::arg("new_frame_count"), py::arg("is_cubemap"), py::arg("has_spheremap") = false, py::arg("new_slice_count") = 1) + .def_prop_rw("start_frame", &VTF::getStartFrame, &VTF::setStartFrame) + .def_prop_rw("reflectivity", &VTF::getReflectivity, &VTF::setReflectivity) + .def("compute_reflectivity", &VTF::computeReflectivity) + .def_prop_rw("bumpmap_scale", &VTF::getBumpMapScale, &VTF::setBumpMapScale) + .def_prop_ro("thumbnail_format", &VTF::getThumbnailFormat) + .def_prop_ro("thumbnail_width", &VTF::getThumbnailWidth) + .def_prop_ro("thumbnail_height", &VTF::getThumbnailHeight) + // Skip getResources + // Skip getResource + .def("set_particle_sheet_resource", [](VTF& self, const py::bytes& value) { return self.setParticleSheetResource({reinterpret_cast(value.data()), value.size()}); }, py::arg("value")) + .def("remove_particle_sheet_resource", &VTF::removeParticleSheetResource) + .def("set_crc_resource", &VTF::setCRCResource, py::arg("value")) + .def("remove_crc_resource", &VTF::removeCRCResource) + .def("set_lod_resource", &VTF::setLODResource, py::arg("u"), py::arg("v")) + .def("remove_lod_resource", &VTF::removeLODResource) + .def("set_extended_flags_resource", &VTF::setExtendedFlagsResource, py::arg("value")) + .def("remove_extended_flags_resource", &VTF::removeExtendedFlagsResource) + .def("set_keyvalues_data_resource", &VTF::setKeyValuesDataResource, py::arg("value")) + .def("remove_keyvalues_data_resource", &VTF::removeKeyValuesDataResource) + .def_prop_rw("compression_level", &VTF::getCompressionLevel, &VTF::setCompressionLevel) + .def_prop_rw("compression_method", &VTF::getCompressionMethod, &VTF::setCompressionMethod) + .def("has_image_data", &VTF::hasImageData) + .def("image_data_is_srgb", &VTF::imageDataIsSRGB) + .def("get_image_data_raw", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) { + const auto d = self.getImageDataRaw(mip, frame, face, slice); + return py::bytes{d.data(), d.size()}; + }, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0) + .def("get_image_data_as", [](const VTF& self, ImageFormat newFormat, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) { + const auto d = self.getImageDataAs(newFormat, mip, frame, face, slice); + return py::bytes{d.data(), d.size()}; + }, py::arg("new_format"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0) + .def("get_image_data_as_rgba8888", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) { + const auto d = self.getImageDataAsRGBA8888(mip, frame, face, slice); + return py::bytes{d.data(), d.size()}; + }, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0) + .def("set_image", [](VTF& self, const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) { + return self.setImage({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height, filter, mip, frame, face, slice); + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("filter"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0) + .def("set_image_from_file", py::overload_cast(&VTF::setImage), py::arg("image_path"), py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0) + .def("save_image", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) { + const auto d = self.saveImageToFile(mip, frame, face, slice, fileFormat); + return py::bytes{d.data(), d.size()}; + }, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("save_image_to_file", py::overload_cast(&VTF::saveImageToFile, py::const_), py::arg("image_path"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("has_thumbnail_data", &VTF::hasThumbnailData) + .def("get_thumbnail_data_raw", [](const VTF& self) { + const auto d = self.getThumbnailDataRaw(); + return py::bytes{d.data(), d.size()}; + }) + .def("get_thumbnail_data_as", [](const VTF& self, ImageFormat newFormat) { + const auto d = self.getThumbnailDataAs(newFormat); + return py::bytes{d.data(), d.size()}; + }, py::arg("new_format")) + .def("get_thumbnail_data_as_rgba8888", [](const VTF& self) { + const auto d = self.getThumbnailDataAsRGBA8888(); + return py::bytes{d.data(), d.size()}; + }) + .def("set_thumbnail", [](VTF& self, const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height) { + return self.setThumbnail({reinterpret_cast(imageData.data()), imageData.size()}, format, width, height); + }, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height")) + .def("compute_thumbnail", &VTF::computeThumbnail, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR) + .def("remove_thumbnail", &VTF::removeThumbnail) + .def("save_thumbnail", [](const VTF& self, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) { + const auto d = self.saveThumbnailToFile(fileFormat); + return py::bytes{d.data(), d.size()}; + }, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("save_thumbnail_to_file", py::overload_cast(&VTF::saveThumbnailToFile, py::const_), py::arg("image_path"), py::arg("file_format") = ImageConversion::FileFormat::DEFAULT) + .def("bake", [](const VTF& self) { + const auto d = self.bake(); + return py::bytes{d.data(), d.size()}; + }) + .def("bake_to_file", py::overload_cast(&VTF::bake, py::const_), py::arg("vtf_path")); +} + +} // namespace vtfpp diff --git a/src/bsppp/_bsppp.cmake b/src/bsppp/_bsppp.cmake index aeb0cada1..0a6fa67c2 100644 --- a/src/bsppp/_bsppp.cmake +++ b/src/bsppp/_bsppp.cmake @@ -1,4 +1,5 @@ add_pretty_parser(bsppp + DEPS sourcepp_parser SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/include/bsppp/bsppp.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/bsppp/LumpData.h" diff --git a/src/bsppp/bsppp.cpp b/src/bsppp/bsppp.cpp index a6b36c79b..df0566558 100644 --- a/src/bsppp/bsppp.cpp +++ b/src/bsppp/bsppp.cpp @@ -202,7 +202,7 @@ void BSP::writeLump(BSPLump lumpIndex, std::span data, bool con // If we have the space to add padding (we should), then do so // This should never fail for well-constructed BSP files - auto padding = math::getPaddingForAlignment(4, currentOffset); + auto padding = math::paddingForAlignment(4, currentOffset); if (padding && i < lumpIDs.size() - 1 && currentOffset + padding <= this->header.lumps[lumpIDs[i + 1]].offset) { currentOffset += padding; } diff --git a/src/gamepp/gamepp.cpp b/src/gamepp/gamepp.cpp index b44892da9..d07b4aef5 100644 --- a/src/gamepp/gamepp.cpp +++ b/src/gamepp/gamepp.cpp @@ -13,7 +13,7 @@ using namespace sourcepp; #include std::optional GameInstance::find(std::string_view windowNameOverride) { - GameInstance instance; + GameInstance instance{}; if (!windowNameOverride.empty()) { instance.hwnd = FindWindowA(windowNameOverride.data(), nullptr); @@ -34,7 +34,7 @@ std::optional GameInstance::find(std::string_view windowNameOverri std::string GameInstance::getWindowTitle() const { // This should be large enough std::string title(512, '\0'); - if (auto size = GetWindowTextA(reinterpret_cast(this->hwnd), title.data(), title.length())) { + if (auto size = GetWindowTextA(reinterpret_cast(this->hwnd), title.data(), static_cast(title.length()))) { title.resize(size); return title; } diff --git a/src/kvpp/_kvpp.cmake b/src/kvpp/_kvpp.cmake index 559d30749..aa6f21a75 100644 --- a/src/kvpp/_kvpp.cmake +++ b/src/kvpp/_kvpp.cmake @@ -1,4 +1,5 @@ add_pretty_parser(kvpp + DEPS sourcepp_parser SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/include/kvpp/KV1.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/kvpp/kvpp.h" diff --git a/src/mdlpp/_mdlpp.cmake b/src/mdlpp/_mdlpp.cmake index 86f313920..ebca313bc 100644 --- a/src/mdlpp/_mdlpp.cmake +++ b/src/mdlpp/_mdlpp.cmake @@ -1,4 +1,5 @@ add_pretty_parser(mdlpp + DEPS sourcepp_parser PRECOMPILED_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/mdlpp/structs/Generic.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/mdlpp/structs/MDL.h" diff --git a/src/mdlpp/structs/MDL.cpp b/src/mdlpp/structs/MDL.cpp index 53132f466..b986e986a 100644 --- a/src/mdlpp/structs/MDL.cpp +++ b/src/mdlpp/structs/MDL.cpp @@ -87,7 +87,7 @@ bool MDL::open(const std::byte* data, std::size_t size) { .read(bone.procType) .read(bone.procIndex) .read(bone.physicsBone); - parser::binary::readStringAtOffset(stream, bone.surfacePropName, std::ios::cur, sizeof(int32_t) * 12 + sizeof(math::Vec3f) * 4 + sizeof(math::Quat) * 2 + sizeof(math::Matrix<3,4>) + sizeof(Bone::Flags)); + parser::binary::readStringAtOffset(stream, bone.surfacePropName, std::ios::cur, sizeof(int32_t) * 12 + sizeof(math::Vec3f) * 4 + sizeof(math::Quat) * 2 + sizeof(math::Mat3x4f) + sizeof(Bone::Flags)); stream.read(bone.contents); // _unused0 diff --git a/src/sourcepp/_sourcepp.cmake b/src/sourcepp/_sourcepp.cmake index fd4a50f8d..25695c85b 100644 --- a/src/sourcepp/_sourcepp.cmake +++ b/src/sourcepp/_sourcepp.cmake @@ -1,34 +1,17 @@ list(APPEND ${PROJECT_NAME}_HEADERS - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/Adler32.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/CRC32.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/MD5.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/RSA.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/String.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Angles.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Float.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Integer.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Matrix.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Vector.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Binary.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Text.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/FS.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Macros.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/String.h") + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Math.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/String.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Templates.h") add_library(${PROJECT_NAME} STATIC ${${PROJECT_NAME}_HEADERS} - "${CMAKE_CURRENT_LIST_DIR}/crypto/Adler32.cpp" - "${CMAKE_CURRENT_LIST_DIR}/crypto/CRC32.cpp" - "${CMAKE_CURRENT_LIST_DIR}/crypto/MD5.cpp" - "${CMAKE_CURRENT_LIST_DIR}/crypto/RSA.cpp" - "${CMAKE_CURRENT_LIST_DIR}/crypto/String.cpp" - "${CMAKE_CURRENT_LIST_DIR}/parser/Binary.cpp" - "${CMAKE_CURRENT_LIST_DIR}/parser/Text.cpp" "${CMAKE_CURRENT_LIST_DIR}/FS.cpp" "${CMAKE_CURRENT_LIST_DIR}/String.cpp") target_precompile_headers(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_HEADERS}) -target_link_libraries(${PROJECT_NAME} PUBLIC bufferstream cryptopp::cryptopp) +target_link_libraries(${PROJECT_NAME} PUBLIC bufferstream sourcepp_half) target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") diff --git a/src/sourcepp/crypto/_crypto.cmake b/src/sourcepp/crypto/_crypto.cmake new file mode 100644 index 000000000..eab77edff --- /dev/null +++ b/src/sourcepp/crypto/_crypto.cmake @@ -0,0 +1,18 @@ +list(APPEND ${PROJECT_NAME}_crypto_HEADERS + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/Adler32.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/CRC32.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/MD5.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/RSA.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/String.h") + +add_library(${PROJECT_NAME}_crypto STATIC + ${${PROJECT_NAME}_crypto_HEADERS} + "${CMAKE_CURRENT_LIST_DIR}/Adler32.cpp" + "${CMAKE_CURRENT_LIST_DIR}/CRC32.cpp" + "${CMAKE_CURRENT_LIST_DIR}/MD5.cpp" + "${CMAKE_CURRENT_LIST_DIR}/RSA.cpp" + "${CMAKE_CURRENT_LIST_DIR}/String.cpp") + +target_precompile_headers(${PROJECT_NAME}_crypto PUBLIC ${${PROJECT_NAME}_crypto_HEADERS}) + +target_link_libraries(${PROJECT_NAME}_crypto PUBLIC ${PROJECT_NAME} cryptopp::cryptopp) diff --git a/src/sourcepp/parser/_parser.cmake b/src/sourcepp/parser/_parser.cmake new file mode 100644 index 000000000..3ba7ce021 --- /dev/null +++ b/src/sourcepp/parser/_parser.cmake @@ -0,0 +1,12 @@ +list(APPEND ${PROJECT_NAME}_parser_HEADERS + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Binary.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Text.h") + +add_library(${PROJECT_NAME}_parser STATIC + ${${PROJECT_NAME}_parser_HEADERS} + "${CMAKE_CURRENT_LIST_DIR}/Binary.cpp" + "${CMAKE_CURRENT_LIST_DIR}/Text.cpp") + +target_precompile_headers(${PROJECT_NAME}_parser PUBLIC ${${PROJECT_NAME}_parser_HEADERS}) + +target_link_libraries(${PROJECT_NAME}_parser PUBLIC ${PROJECT_NAME}) diff --git a/src/toolpp/CmdSeq.cpp b/src/toolpp/CmdSeq.cpp index a2e06a3ea..1c9f924e2 100644 --- a/src/toolpp/CmdSeq.cpp +++ b/src/toolpp/CmdSeq.cpp @@ -12,96 +12,102 @@ using namespace toolpp; namespace { -std::vector bakeBinary(const CmdSeq& cmdSeq) { - std::vector out; - BufferStream writer{out}; - - writer - .write("Worldcraft Command Sequences\r\n\x1a", 31) - .write(cmdSeq.getVersion()) - .write(cmdSeq.getSequences().size()); - - for (const auto& [seqName, seqCommands] : cmdSeq.getSequences()) { - writer - .write(seqName, true, 128) - .write(seqCommands.size()); +CmdSeq::Command::Special specialCmdFromString(std::string_view specialCmd) { + using enum CmdSeq::Command::Special; + if (string::iequals(specialCmd, "change_dir")) { + return CHANGE_DIRECTORY; + } + if (string::iequals(specialCmd, "copy_file")) { + return COPY_FILE; + } + if (string::iequals(specialCmd, "delete_file")) { + return DELETE_FILE; + } + if (string::iequals(specialCmd, "rename_file")) { + return RENAME_FILE; + } + if (string::iequals(specialCmd, "copy_file_if_exists")) { + return COPY_FILE_IF_EXISTS; + } + return NONE; +} - for (const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] : seqCommands) { - writer - .write(enabled) - .write(special) - .write(executable, true, 260) - .write(arguments, true, 260) - .write(true) - .write(ensureFileExists) - .write(pathToTheoreticallyExistingFile, true, 260) - .write(useProcessWindow); +} // namespace - if (cmdSeq.getVersion() > 0.15f) { - writer.write(waitForKeypress); - } - } +std::string CmdSeq::Command::getSpecialDisplayNameFor(Special special) { + switch (special) { + case Special::NONE: + break; + case Special::CHANGE_DIRECTORY: + return "Change Directory"; + case Special::COPY_FILE: + return "Copy File"; + case Special::DELETE_FILE: + return "Delete File"; + case Special::RENAME_FILE: + return "Rename File"; + case Special::COPY_FILE_IF_EXISTS: + return "Copy File If It Exists"; } - - out.resize(writer.size()); - return out; + return "None"; } -std::vector bakeKeyValues(const CmdSeq& cmdSeq) { - KV1Writer kv; - auto& kvFile = kv.addChild("Command Sequences"); - for (const auto& [seqName, seqCommands] : cmdSeq.getSequences()) { - auto& kvSequence = kvFile.addChild(seqName); - for (int i = 1; i <= seqCommands.size(); i++) { - const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands[i - 1]; - auto& kvCommand = kvSequence.addChild(std::to_string(i)); - kvCommand["enabled"] = enabled; - kvCommand["special_cmd"] = static_cast(special); - kvCommand["run"] = executable; - kvCommand["params"] = arguments; - kvCommand["ensure_check"] = ensureFileExists; - kvCommand["ensure_fn"] = pathToTheoreticallyExistingFile; - kvCommand["use_process_wnd"] = useProcessWindow; - kvCommand["no_wait"] = waitForKeypress; - } +std::string CmdSeq::Command::getExecutableDisplayName() const { + if (this->special != Command::Special::NONE) { + return getSpecialDisplayNameFor(this->special); } - - const auto kvStr = kv.bake(); - std::vector out; - out.resize(kvStr.length()); - std::memcpy(out.data(), kvStr.data(), kvStr.length()); - return out; + return this->executable; } -} // namespace - -CmdSeq::CmdSeq(std::string path_) - : version(0.f) - , path(std::move(path_)) { +CmdSeq::CmdSeq(const std::string& path) + : type(Type::INVALID) + , version(0.2f) { { FileStream reader{path}; if (!reader) { return; } if (auto binStr = reader.seek_in(0).read_string(10); binStr == "Worldcraft") { - this->usingKeyValues = false; + this->type = Type::BINARY; } else { auto kvStr = reader.seek_in(0).read_string(19); string::toLower(kvStr); if (kvStr == "\"command sequences\"") { - this->usingKeyValues = true; + this->type = Type::KEYVALUES_STRATA; } else { return; } } } - if (this->usingKeyValues) { - this->parseKeyValues(path); - } else { - this->parseBinary(path); + switch (this->type) { + using enum Type; + case INVALID: + break; + case BINARY: + this->parseBinary(path); + break; + case KEYVALUES_STRATA: + this->parseKeyValuesStrata(path); + break; } } +CmdSeq::CmdSeq(Type type_) + : type(type_) + , version(0.2f) {} + +CmdSeq::operator bool() const { + return this->type != Type::INVALID; +} + +CmdSeq::Type CmdSeq::getType() const { + return this->type; +} + +void CmdSeq::setType(Type type_) { + this->type = type_; +} + float CmdSeq::getVersion() const { return this->version; } @@ -132,6 +138,9 @@ void CmdSeq::parseBinary(const std::string& path) { auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands.emplace_back(); enabled = reader.read() & 0xFF; special = reader.read(); + if (special == static_cast(Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS)) { + special = Command::Special::COPY_FILE_IF_EXISTS; + } executable = reader.read_string(260); arguments = reader.read_string(260); reader.skip_in(); @@ -145,7 +154,7 @@ void CmdSeq::parseBinary(const std::string& path) { } } -void CmdSeq::parseKeyValues(const std::string& path) { +void CmdSeq::parseKeyValuesStrata(const std::string& path) { this->version = 0.2f; const KV1 cmdSeq{fs::readFileText(path)}; @@ -156,7 +165,15 @@ void CmdSeq::parseKeyValues(const std::string& path) { for (const auto& kvCommand : kvSequence.getChildren()) { auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands.emplace_back(); string::toBool(kvCommand["enabled"].getValue(), enabled); - string::toInt(kvCommand["special_cmd"].getValue(), reinterpret_cast&>(special)); + const auto specialCmd = kvCommand["special_cmd"].getValue(); + if (parser::text::isNumber(specialCmd)) { + string::toInt(specialCmd, reinterpret_cast&>(special)); + if (special == Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS) { + special = Command::Special::COPY_FILE_IF_EXISTS; + } + } else { + special = ::specialCmdFromString(specialCmd); + } executable = kvCommand["run"].getValue(); arguments = kvCommand["params"].getValue(); string::toBool(kvCommand["ensure_check"].getValue(), ensureFileExists); @@ -175,33 +192,85 @@ const std::vector& CmdSeq::getSequences() const { return this->sequences; } -std::vector CmdSeq::bake() const { - return this->bake(this->usingKeyValues); +std::vector CmdSeq::bakeBinary() const { + std::vector out; + BufferStream writer{out}; + + writer + .write("Worldcraft Command Sequences\r\n\x1a", 31) + .write(this->getVersion()) + .write(this->getSequences().size()); + + for (const auto& [seqName, seqCommands] : this->getSequences()) { + writer + .write(seqName, true, 128) + .write(seqCommands.size()); + + for (const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] : seqCommands) { + writer + .write(enabled) + .write(special) + .write(executable, true, 260) + .write(arguments, true, 260) + .write(true) + .write(ensureFileExists) + .write(pathToTheoreticallyExistingFile, true, 260) + .write(useProcessWindow); + + if (this->getVersion() > 0.15f) { + writer.write(waitForKeypress); + } + } + } + + out.resize(writer.size()); + return out; } -std::vector CmdSeq::bake(bool overrideUsingKeyValues) const { - if (overrideUsingKeyValues) { - return ::bakeKeyValues(*this); +std::vector CmdSeq::bakeKeyValuesStrata() const { + KV1Writer kv; + auto& kvFile = kv.addChild("Command Sequences"); + for (const auto& [seqName, seqCommands] : this->getSequences()) { + auto& kvSequence = kvFile.addChild(seqName); + for (int i = 1; i <= seqCommands.size(); i++) { + const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands[i - 1]; + auto& kvCommand = kvSequence.addChild(std::to_string(i)); + kvCommand["enabled"] = enabled; + kvCommand["special_cmd"] = static_cast(special); + kvCommand["run"] = executable; + kvCommand["params"] = arguments; + kvCommand["ensure_check"] = ensureFileExists; + kvCommand["ensure_fn"] = pathToTheoreticallyExistingFile; + kvCommand["use_process_wnd"] = useProcessWindow; + kvCommand["no_wait"] = waitForKeypress; + } } - return ::bakeBinary(*this); + + const auto kvStr = kv.bake(); + std::vector out; + out.resize(kvStr.length()); + std::memcpy(out.data(), kvStr.data(), kvStr.length()); + return out; } -bool CmdSeq::bake(const std::string& path_) { - return this->bake(path_, this->usingKeyValues); +std::vector CmdSeq::bake() const { + switch (this->type) { + using enum Type; + case INVALID: + return {}; + case BINARY: + return this->bakeBinary(); + case KEYVALUES_STRATA: + return this->bakeKeyValuesStrata(); + } + return {}; } -bool CmdSeq::bake(const std::string& path_, bool overrideUsingKeyValues) { - FileStream writer{path_}; +bool CmdSeq::bake(const std::string& path) const { + FileStream writer{path}; if (!writer) { return false; } - this->path = path_; - - writer.seek_out(0); - if (overrideUsingKeyValues) { - writer.write(::bakeKeyValues(*this)); - } else { - writer.write(::bakeBinary(*this)); - } + writer.seek_out(0).write(this->bake()); return true; } diff --git a/src/toolpp/FGD.cpp b/src/toolpp/FGD.cpp index f9f955ca8..d106e71c3 100644 --- a/src/toolpp/FGD.cpp +++ b/src/toolpp/FGD.cpp @@ -8,9 +8,11 @@ #include #include +#include #include using namespace sourcepp; +using namespace std::string_view_literals; using namespace toolpp; namespace { @@ -467,11 +469,11 @@ void writeOptionalKeyValueStrings(BufferStream& writer, std::initializer_listwriter - .write("@include \"", 10) + .write("@include \""sv, false) .write(fgdPath, false) - .write("\"\n\n", 3); + .write("\"\n\n"sv, false); return *this; } FGDWriter& FGDWriter::version(int version) { this->writer - .write("@version(", 9) + .write("@version("sv, false) .write(std::to_string(version), false) - .write(")\n\n", 3); + .write(")\n\n"sv, false); return *this; } FGDWriter& FGDWriter::mapSize(math::Vec2i mapSize) { this->writer - .write("@mapsize(", 9) + .write("@mapsize("sv, false) .write(std::to_string(mapSize[0]), false) - .write(", ", 2) + .write(", "sv, false) .write(std::to_string(mapSize[1]), false) - .write(")\n\n", 3); + .write(")\n\n"sv, false); return *this; } FGDWriter& FGDWriter::materialExclusionDirs(const std::vector& dirs) { - this->writer.write("@MaterialExclusion\n[\n", 21); + this->writer.write("@MaterialExclusion\n[\n"sv, false); for (const auto& dir : dirs) { this->writer << '\t' << '\"'; this->writer.write(dir, false); this->writer << '\"' << '\n'; } - this->writer.write("]\n\n", 3); + this->writer.write("]\n\n"sv, false); return *this; } FGDWriter::AutoVisGroupWriter FGDWriter::beginAutoVisGroup(const std::string& parentName) { this->writer - .write("@AutoVisGroup = \"", 17) + .write("@AutoVisGroup = \""sv, false) .write(parentName, false) - .write("\"\n[\n", 4); + .write("\"\n[\n"sv, false); return AutoVisGroupWriter{*this}; } FGDWriter::AutoVisGroupWriter& FGDWriter::AutoVisGroupWriter::visGroup(const std::string& name, const std::vector& entities) { this->parent.writer - .write("\t\"", 2) + .write("\t\""sv, false) .write(name, false) - .write("\"\n\t[\n", 5); + .write("\"\n\t[\n"sv, false); for (const auto& entity : entities) { this->parent.writer - .write("\t\t\"", 3) + .write("\t\t\""sv, false) .write(entity, false) - .write("\"\n", 2); + .write("\"\n"sv, false); } - this->parent.writer.write("\t]\n", 3); + this->parent.writer.write("\t]\n"sv, false); return *this; } FGDWriter& FGDWriter::AutoVisGroupWriter::endAutoVisGroup() const { - this->parent.writer.write("]\n\n", 3); + this->parent.writer.write("]\n\n"sv, false); return this->parent; } @@ -684,20 +686,20 @@ FGDWriter::EntityWriter FGDWriter::beginEntity(const std::string& classType, con } } this->writer - .write("= ", 2) + .write("= "sv, false) .write(name, false) - .write(" :", 2); + .write(" :"sv, false); // Put the description on the same line if it's short if (description.size() < 32) { this->writer - .write(" \"", 2) + .write(" \""sv, false) .write(description, false); } else { this->writer - .write("\n\t\"", 3) + .write("\n\t\""sv, false) .write(description, false); } - this->writer.write("\"\n[\n", 4); + this->writer.write("\"\n[\n"sv, false); return EntityWriter{*this}; } @@ -709,10 +711,10 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::keyValue(const std::string& na .write(valueType, false) .write(')'); if (readOnly) { - this->parent.writer.write(" readonly", 9); + this->parent.writer.write(" readonly"sv, false); } if (report) { - this->parent.writer.write(" report", 7); + this->parent.writer.write(" report"sv, false); } ::writeOptionalKeyValueStrings(this->parent.writer, {displayName, valueDefault, description}); this->parent.writer << '\n'; @@ -723,30 +725,30 @@ FGDWriter::EntityWriter::KeyValueChoicesWriter FGDWriter::EntityWriter::beginKey this->parent.writer .write('\t') .write(name, false) - .write("(choices)", 9); + .write("(choices)"sv, false); if (readOnly) { - this->parent.writer.write(" readonly", 9); + this->parent.writer.write(" readonly"sv, false); } if (report) { - this->parent.writer.write(" report", 7); + this->parent.writer.write(" report"sv, false); } ::writeOptionalKeyValueStrings(this->parent.writer, {displayName, valueDefault, description}); - this->parent.writer.write(" =\n\t[\n", 6); + this->parent.writer.write(" =\n\t[\n"sv, false); return KeyValueChoicesWriter{*this}; } FGDWriter::EntityWriter::KeyValueChoicesWriter& FGDWriter::EntityWriter::KeyValueChoicesWriter::choice(const std::string& value, const std::string& displayName) { this->parent.parent.writer - .write("\t\t\"", 3) + .write("\t\t\""sv, false) .write(value, false) - .write("\" : \"", 5) + .write("\" : \""sv, false) .write(displayName, false) - .write("\"\n", 2); + .write("\"\n"sv, false); return *this; } FGDWriter::EntityWriter& FGDWriter::EntityWriter::KeyValueChoicesWriter::endKeyValueChoices() const { - this->parent.parent.writer.write("\t]\n", 3); + this->parent.parent.writer.write("\t]\n"sv, false); return this->parent; } @@ -754,29 +756,29 @@ FGDWriter::EntityWriter::KeyValueFlagsWriter FGDWriter::EntityWriter::beginKeyVa this->parent.writer .write('\t') .write(name, false) - .write("(flags)", 7); + .write("(flags)"sv, false); if (readOnly) { - this->parent.writer.write(" readonly", 9); + this->parent.writer.write(" readonly"sv, false); } if (report) { - this->parent.writer.write(" report", 7); + this->parent.writer.write(" report"sv, false); } ::writeOptionalKeyValueStrings(this->parent.writer, {displayName, description}); - this->parent.writer.write(" =\n\t[\n", 6); + this->parent.writer.write(" =\n\t[\n"sv, false); return KeyValueFlagsWriter{*this}; } FGDWriter::EntityWriter::KeyValueFlagsWriter& FGDWriter::EntityWriter::KeyValueFlagsWriter::flag(uint64_t value, const std::string& displayName, bool enabledByDefault, const std::string& description) { this->parent.parent.writer - .write("\t\t", 2) + .write("\t\t"sv, false) .write(std::to_string(value), false) - .write(" : \"", 4) + .write(" : \""sv, false) .write(displayName, false) - .write("\" : ", 4) + .write("\" : "sv, false) .write(std::to_string(enabledByDefault), false); if (!description.empty()) { this->parent.parent.writer - .write(" : \"", 4) + .write(" : \""sv, false) .write(description, false) .write('\"'); } @@ -785,21 +787,21 @@ FGDWriter::EntityWriter::KeyValueFlagsWriter& FGDWriter::EntityWriter::KeyValueF } FGDWriter::EntityWriter& FGDWriter::EntityWriter::KeyValueFlagsWriter::endKeyValueFlags() const { - this->parent.parent.writer.write("\t]\n", 3); + this->parent.parent.writer.write("\t]\n"sv, false); return this->parent; } FGDWriter::EntityWriter& FGDWriter::EntityWriter::input(const std::string& name, const std::string& valueType, const std::string& description) { this->parent.writer .write('\t') - .write("input ", 6) + .write("input "sv, false) .write(name, false) .write('(') .write(valueType, false) .write(')'); if (!description.empty()) { this->parent.writer - .write(" : \"", 4) + .write(" : \""sv, false) .write(description, false) .write('\"'); } @@ -810,14 +812,14 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::input(const std::string& name, FGDWriter::EntityWriter& FGDWriter::EntityWriter::output(const std::string& name, const std::string& valueType, const std::string& description) { this->parent.writer .write('\t') - .write("output ", 7) + .write("output "sv, false) .write(name, false) .write('(') .write(valueType, false) .write(')'); if (!description.empty()) { this->parent.writer - .write(" : \"", 4) + .write(" : \""sv, false) .write(description, false) .write('\"'); } @@ -826,18 +828,18 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::output(const std::string& name } FGDWriter& FGDWriter::EntityWriter::endEntity() const { - this->parent.writer.write("]\n\n", 3); + this->parent.writer.write("]\n\n"sv, false); return this->parent; } -std::string FGDWriter::bake() { - this->backingData.resize(this->writer.tell()); - if (this->backingData.ends_with("\n\n")) { - this->backingData.pop_back(); +std::string FGDWriter::bake() const { + std::string_view out{this->backingData.data(), this->writer.tell()}; + while (out.ends_with("\n\n")) { + out = out.substr(0, out.size() - 1); } - return this->backingData; + return std::string{out}; } -bool FGDWriter::bake(const std::string& fgdPath) { +bool FGDWriter::bake(const std::string& fgdPath) const { return fs::writeFileText(fgdPath, this->bake()); } diff --git a/src/toolpp/_toolpp.cmake b/src/toolpp/_toolpp.cmake index d8d9342c1..cf49ee17d 100644 --- a/src/toolpp/_toolpp.cmake +++ b/src/toolpp/_toolpp.cmake @@ -1,5 +1,5 @@ add_pretty_parser(toolpp - DEPS sourcepp::kvpp + DEPS sourcepp_parser sourcepp::kvpp SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/include/toolpp/CmdSeq.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/toolpp/FGD.h" diff --git a/src/vpkpp/_vpkpp.cmake b/src/vpkpp/_vpkpp.cmake index b1746cb12..7abac1e68 100644 --- a/src/vpkpp/_vpkpp.cmake +++ b/src/vpkpp/_vpkpp.cmake @@ -1,6 +1,6 @@ add_pretty_parser(vpkpp - DEPS cryptopp::cryptopp MINIZIP::minizip sourcepp::bsppp sourcepp::kvpp - DEPS_INTERFACE tsl::hat_trie + DEPS libzstd_static MINIZIP::minizip sourcepp_crypto sourcepp_parser sourcepp::bsppp sourcepp::kvpp + DEPS_PUBLIC tsl::hat_trie PRECOMPILED_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/vpkpp/format/BSP.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/vpkpp/format/FPX.h" @@ -31,9 +31,6 @@ add_pretty_parser(vpkpp "${CMAKE_CURRENT_LIST_DIR}/format/ZIP.cpp" "${CMAKE_CURRENT_LIST_DIR}/PackFile.cpp") -target_include_directories(vpkpp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ext/hat-trie/include") - if(SOURCEPP_VPKPP_SUPPORT_VPK_V54) - target_link_libraries(vpkpp PRIVATE libzstd_static) target_compile_definitions(vpkpp PRIVATE VPKPP_SUPPORT_VPK_V54) endif() diff --git a/src/vpkpp/format/BSP.cpp b/src/vpkpp/format/BSP.cpp index a9f02dc6a..a0c11b928 100644 --- a/src/vpkpp/format/BSP.cpp +++ b/src/vpkpp/format/BSP.cpp @@ -113,7 +113,7 @@ bool BSP::bake(const std::string& outputDir_, BakeOptions options, const EntryCa if (!writer) { return false; } - writer.writeLump(BSPLump::PAKFILE, fs::readFileBuffer(this->tempZIPPath)); + writer.writeLump(BSPLump::PAKFILE, fs::readFileBuffer(this->tempZIPPath), false); } // Rename and reopen the ZIP diff --git a/src/vpkpp/format/PCK.cpp b/src/vpkpp/format/PCK.cpp index e3b7e21d5..0c74a5b83 100644 --- a/src/vpkpp/format/PCK.cpp +++ b/src/vpkpp/format/PCK.cpp @@ -34,7 +34,7 @@ std::unique_ptr PCK::create(const std::string& path, uint32_t version, if (version > 1) { stream - .write(FlagsV2::FLAG_NONE) + .write(FLAG_DIR_NONE) .write(0); } @@ -59,7 +59,7 @@ std::unique_ptr PCK::open(const std::string& path, const EntryCallback if (auto signature = reader.read(); signature != PCK_SIGNATURE) { // PCK might be embedded - reader.seek_in(-static_cast(sizeof(uint32_t)), std::ios::end); + reader.seek_in(sizeof(uint32_t), std::ios::end); if (auto endSignature = reader.read(); endSignature != PCK_SIGNATURE) { return nullptr; } @@ -80,20 +80,20 @@ std::unique_ptr PCK::open(const std::string& path, const EntryCallback reader.read(pck->header.godotVersionMinor); reader.read(pck->header.godotVersionPatch); - pck->header.flags = FLAG_NONE; + pck->header.flags = FLAG_DIR_NONE; std::size_t extraEntryContentsOffset = 0; if (pck->header.packVersion > 1) { - pck->header.flags = reader.read(); + pck->header.flags = reader.read(); extraEntryContentsOffset = reader.read(); } - if (pck->header.flags & FLAG_ENCRYPTED) { + if (pck->header.flags & FLAG_DIR_ENCRYPTED) { // File directory is encrypted return nullptr; } - if (pck->header.flags & FLAG_RELATIVE_FILE_DATA) { + if (pck->header.flags & FLAG_DIR_RELATIVE_FILE_DATA) { extraEntryContentsOffset += pck->startOffset; - pck->header.flags = static_cast(pck->header.flags & ~FLAG_RELATIVE_FILE_DATA); + pck->header.flags = static_cast(pck->header.flags & ~FLAG_DIR_RELATIVE_FILE_DATA); } // Reserved @@ -115,6 +115,9 @@ std::unique_ptr PCK::open(const std::string& path, const EntryCallback if (pck->header.packVersion > 1) { entry.flags = reader.read(); + if (entry.flags & FLAG_FILE_REMOVED) { + continue; + } } pck->entries.emplace(entryPath, entry); @@ -141,7 +144,7 @@ std::optional> PCK::readEntry(const std::string& path_) c } // It's baked into the file on disk - if (entry->flags & FLAG_ENCRYPTED) { + if (entry->flags & FLAG_FILE_ENCRYPTED) { // File is encrypted return std::nullopt; } @@ -182,7 +185,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa entry->offset = fileData.size(); fileData.insert(fileData.end(), binData->begin(), binData->end()); - const auto padding = math::getPaddingForAlignment(PCK_FILE_DATA_PADDING, static_cast(entry->length)); + const auto padding = math::paddingForAlignment(PCK_FILE_DATA_PADDING, static_cast(entry->length)); for (int i = 0; i < padding; i++) { fileData.push_back(static_cast(0)); } @@ -236,7 +239,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa this->dataOffset = stream.tell_out(); for (const auto& path : std::views::keys(entriesToBake)) { const auto entryPath = std::string{PCK_PATH_PREFIX} + path; - const auto padding = math::getPaddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast(entryPath.length())); + const auto padding = math::paddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast(entryPath.length())); this->dataOffset += sizeof(uint32_t) + // Path length entryPath.length() + padding + // Path @@ -251,7 +254,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa // Directory for (const auto& [path, entry] : entriesToBake) { const auto entryPath = std::string{PCK_PATH_PREFIX} + path; - const auto padding = math::getPaddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast(entryPath.length())); + const auto padding = math::paddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast(entryPath.length())); stream.write(static_cast(entryPath.length() + padding)); stream.write(entryPath, false, entryPath.length() + padding); @@ -297,7 +300,7 @@ PCK::operator std::string() const { if (this->startOffset > 0) { out += " | Embedded"; } - if (this->header.flags & FLAG_ENCRYPTED) { + if (this->header.flags & FLAG_DIR_ENCRYPTED) { out += " | Encrypted"; } return out; diff --git a/src/vpkpp/format/VPK.cpp b/src/vpkpp/format/VPK.cpp index 2034a7cc9..60865f6af 100644 --- a/src/vpkpp/format/VPK.cpp +++ b/src/vpkpp/format/VPK.cpp @@ -55,20 +55,21 @@ VPK::VPK(const std::string& fullFilePath_) } std::unique_ptr VPK::create(const std::string& path, uint32_t version) { - if (version != 1 && version != 2 && version != 54) { + if (version != 0 && version != 1 && version != 2 && version != 54) { return nullptr; } { FileStream stream{path, FileStream::OPT_TRUNCATE | FileStream::OPT_CREATE_IF_NONEXISTENT}; - Header1 header1{}; - header1.signature = VPK_SIGNATURE; - header1.version = version; - header1.treeSize = 1; - stream.write(header1); - - if (version == 2 || version == 54) { + if (version > 0) { + Header1 header1{}; + header1.signature = VPK_SIGNATURE; + header1.version = version; + header1.treeSize = 1; + stream.write(header1); + } + if (version > 1) { Header2 header2{}; header2.fileDataSectionSize = 0; header2.archiveMD5SectionSize = 0; @@ -106,12 +107,28 @@ std::unique_ptr VPK::openInternal(const std::string& path, const Entry reader.seek_in(0); reader.read(vpk->header1); if (vpk->header1.signature != VPK_SIGNATURE) { - // File is not a VPK - return nullptr; + reader.seek_in(3, std::ios::end); + if (reader.read() == '\0' && reader.read() == '\0' && reader.read() == '\0') { + // hack: if file is 9 bytes long it's probably an empty VTMB VPK and we should bail so that code can pick it up + // either way a 9 byte long VPK should not have any files in it + if (std::filesystem::file_size(vpk->fullFilePath) == 9) { + return nullptr; + } + + // File is one of those shitty ancient VPKs + vpk->header1.signature = VPK_SIGNATURE; + vpk->header1.version = 0; + vpk->header1.treeSize = 0; + + reader.seek_in(0); + } else { + // File is not a VPK + return nullptr; + } } if (vpk->hasExtendedHeader()) { reader.read(vpk->header2); - } else if (vpk->header1.version != 1) { + } else if (vpk->header1.version != 0 && vpk->header1.version != 1) { // Apex Legends, Titanfall, etc. are not supported return nullptr; } @@ -312,8 +329,8 @@ std::optional> VPK::readEntry(const std::string& path_) c } const auto entryLength = (this->hasCompression() && entry->compressedLength) ? entry->compressedLength : entry->length; - if (!entryLength) { - return {}; + if (entryLength == 0) { + return std::vector{}; } std::vector out(entryLength, static_cast(0)); @@ -559,9 +576,11 @@ bool VPK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa outDir.seek_out(0); // Dummy header - outDir.write(this->header1); - if (this->hasExtendedHeader()) { - outDir.write(this->header2); + if (this->header1.version > 0) { + outDir.write(this->header1); + if (this->hasExtendedHeader()) { + outDir.write(this->header2); + } } // File tree data @@ -730,6 +749,12 @@ bool VPK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa this->footer2.signature.clear(); } + // Ancient crap VPK with no header + if (this->header1.version == 0) { + PackFile::setFullFilePath(outputDir); + return true; + } + // Write new headers outDir.seek_out(0); outDir.write(this->header1); @@ -861,10 +886,8 @@ uint32_t VPK::getVersion() const { } void VPK::setVersion(uint32_t version) { - if (version != 1 && version != 2 && version != 54) { - return; - } - if (::isFPX(this) || version == this->header1.version) { + // Version must be supported, we cannot be an FPX, and version must be different + if ((version != 0 && version != 1 && version != 2 && version != 54) || ::isFPX(this) || version == this->header1.version) { return; } this->header1.version = version; diff --git a/src/vpkpp/format/VPK_VTMB.cpp b/src/vpkpp/format/VPK_VTMB.cpp index 28e570123..fac822986 100644 --- a/src/vpkpp/format/VPK_VTMB.cpp +++ b/src/vpkpp/format/VPK_VTMB.cpp @@ -59,7 +59,7 @@ std::unique_ptr VPK_VTMB::open(const std::string& path, const EntryCal void VPK_VTMB::openNumbered(uint32_t archiveIndex, const std::string& path, const EntryCallback& callback) { FileStream reader{path}; - reader.seek_in(-static_cast(sizeof(uint32_t) * 2 + sizeof(uint8_t)), std::ios::end); + reader.seek_in(sizeof(uint32_t) * 2 + sizeof(uint8_t), std::ios::end); auto fileCount = reader.read(); auto dirOffset = reader.read(); diff --git a/src/vtfpp/ImageConversion.cpp b/src/vtfpp/ImageConversion.cpp index 30b7922a3..b12908f74 100644 --- a/src/vtfpp/ImageConversion.cpp +++ b/src/vtfpp/ImageConversion.cpp @@ -2,17 +2,21 @@ #include #include +#include #include #include +#include #include +#include +#include #ifdef SOURCEPP_BUILD_WITH_TBB #include #endif #include -#include #include +#include #define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_STATIC @@ -29,6 +33,14 @@ #define STBI_WRITE_NO_STDIO #include +#define TINYEXR_IMPLEMENTATION 1 +#ifdef SOURCEPP_BUILD_WITH_THREADS +#define TINYEXR_USE_THREAD 1 +#else +#define TINYEXR_USE_THREAD 0 +#endif +#include + using namespace sourcepp; using namespace vtfpp; @@ -284,7 +296,7 @@ namespace { return {(r), (g), (b), (a)}; \ }) #ifdef SOURCEPP_BUILD_WITH_TBB - #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq) + #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq) #else #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a) #endif @@ -345,7 +357,7 @@ namespace { #ifdef SOURCEPP_BUILD_WITH_TBB #define VTFPP_CONVERT(InputType, ...) \ std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \ - std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::InputType { \ + std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::InputType { \ return __VA_ARGS__; \ }) #else @@ -414,7 +426,7 @@ namespace { return { static_cast(r), static_cast(g), static_cast(b), static_cast(a) }; \ }) #ifdef SOURCEPP_BUILD_WITH_TBB - #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq) + #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq) #else #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a) #endif @@ -484,7 +496,7 @@ namespace { #ifdef SOURCEPP_BUILD_WITH_TBB #define VTFPP_CONVERT(InputType, ...) \ std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \ - std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::InputType { \ + std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::InputType { \ return __VA_ARGS__; \ }) #else @@ -528,7 +540,7 @@ namespace { std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::InputType)}; \ std::transform(__VA_ARGS__ __VA_OPT__(,) imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::InputType pixel) -> ImagePixel::RGBA32323232F { return {(r), (g), (b), (a)}; }) #ifdef SOURCEPP_BUILD_WITH_TBB - #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq) + #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq) #else #define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a) #endif @@ -537,12 +549,12 @@ namespace { switch (format) { using enum ImageFormat; - VTFPP_CASE_CONVERT_AND_BREAK(R32F, pixel.r, 0.f, 0.f, 1.f); - VTFPP_CASE_CONVERT_AND_BREAK(RG3232F, pixel.r, pixel.g, 0.f, 1.f); - VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F, pixel.r, pixel.g, pixel.b, 1.f); - VTFPP_CASE_CONVERT_AND_BREAK(R16F, pixel.r.toFloat32(), 0.f, 0.f, 1.f); - VTFPP_CASE_CONVERT_AND_BREAK(RG1616F, pixel.r.toFloat32(), pixel.g.toFloat32(), 0.f, 1.f); - VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, pixel.r.toFloat32(), pixel.g.toFloat32(), pixel.b.toFloat32(), pixel.a.toFloat32()); + VTFPP_CASE_CONVERT_AND_BREAK(R32F, pixel.r, 0.f, 0.f, 1.f); + VTFPP_CASE_CONVERT_AND_BREAK(RG3232F, pixel.r, pixel.g, 0.f, 1.f); + VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F, pixel.r, pixel.g, pixel.b, 1.f); + VTFPP_CASE_CONVERT_AND_BREAK(R16F, pixel.r, 0.f, 0.f, 1.f); + VTFPP_CASE_CONVERT_AND_BREAK(RG1616F, pixel.r, pixel.g, 0.f, 1.f); + VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, pixel.r, pixel.g, pixel.b, pixel.a); default: SOURCEPP_DEBUG_BREAK; break; } @@ -571,7 +583,7 @@ namespace { #ifdef SOURCEPP_BUILD_WITH_TBB #define VTFPP_CONVERT(InputType, ...) \ std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \ - std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::InputType { \ + std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::InputType { \ return __VA_ARGS__; \ }) #else @@ -589,9 +601,9 @@ namespace { VTFPP_CASE_CONVERT_AND_BREAK(R32F, {pixel.r}); VTFPP_CASE_CONVERT_AND_BREAK(RG3232F, {pixel.r, pixel.g}); VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F, {pixel.r, pixel.g, pixel.b}); - VTFPP_CASE_CONVERT_AND_BREAK(R16F, {pixel.r}); - VTFPP_CASE_CONVERT_AND_BREAK(RG1616F, {pixel.r, pixel.g}); - VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, {pixel.r, pixel.g, pixel.b, pixel.a}); + VTFPP_CASE_CONVERT_AND_BREAK(R16F, {half{pixel.r}}); + VTFPP_CASE_CONVERT_AND_BREAK(RG1616F, {half{pixel.r}, half{pixel.g}}); + VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, {half{pixel.r}, half{pixel.g}, half{pixel.b}, half{pixel.a}}); default: SOURCEPP_DEBUG_BREAK; break; } @@ -610,10 +622,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA32323232F)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::RGBA32323232F { return { @@ -636,10 +648,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA8888)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::RGBA8888 { return { @@ -662,10 +674,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA16161616)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::RGBA16161616 { return { @@ -688,10 +700,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA8888)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::RGBA8888 { return { @@ -714,10 +726,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA16161616)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::RGBA16161616 { return { @@ -740,10 +752,10 @@ namespace { newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8)); std::span newDataSpan{reinterpret_cast(newData.data()), newData.size() / sizeof(ImagePixel::RGBA32323232F)}; - std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; \ + std::span imageDataSpan{reinterpret_cast(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; std::transform( #ifdef SOURCEPP_BUILD_WITH_TBB - std::execution::unseq, + std::execution::par_unseq, #endif imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::RGBA32323232F { return { @@ -773,14 +785,13 @@ std::vector ImageConversion::convertImageDataToFormat(std::span ImageConversion::convertImageDataToFormat(std::span ImageConversion::convertSeveralImageDataToFormat(std::spa return out; } +ImageConversion::FileFormat ImageConversion::getDefaultFileFormatForImageFormat(ImageFormat format) { + using enum FileFormat; + return ImageFormatDetails::decimal(format) ? EXR : PNG; +} + std::vector ImageConversion::convertImageDataToFile(std::span imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat) { if (imageData.empty() || format == ImageFormat::EMPTY) { return {}; @@ -874,11 +889,7 @@ std::vector ImageConversion::convertImageDataToFile(std::span ImageConversion::convertImageDataToFile(std::span ImageConversion::convertImageDataToFile(std::span ImageConversion::convertImageDataToFile(std::span ImageConversion::convertImageDataToFile(std::span(imageData.data())); + if (format == ImageFormat::RGB323232F) { + stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB323232F) / (8 * sizeof(float)), reinterpret_cast(imageData.data())); } else { - auto hdr = convertImageDataToFormat(imageData, format, ImageFormat::RGBA32323232F, width, height); - stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / (8 * sizeof(float)), reinterpret_cast(hdr.data())); + const auto hdr = convertImageDataToFormat(imageData, format, ImageFormat::RGB323232F, width, height); + stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB323232F) / (8 * sizeof(float)), reinterpret_cast(hdr.data())); + } + break; + } + case FileFormat::EXR: { + EXRHeader header; + InitEXRHeader(&header); + + std::vector rawData; + if (!ImageFormatDetails::decimal(format) || ImageFormatDetails::compressed(format)) { + if (ImageFormatDetails::transparent(format)) { + rawData = convertImageDataToFormat(imageData, format, ImageFormat::RGBA32323232F, width, height); + format = ImageFormat::RGBA32323232F; + } else { + rawData = convertImageDataToFormat(imageData, format, ImageFormat::RGB323232F, width, height); + format = ImageFormat::RGB323232F; + } + } else { + rawData = {imageData.begin(), imageData.end()}; + } + + header.num_channels = (ImageFormatDetails::red(format) > 0) + (ImageFormatDetails::green(format) > 0) + (ImageFormatDetails::blue(format) > 0) + (ImageFormatDetails::alpha(format) > 0); + header.channels = static_cast(std::malloc(header.num_channels * sizeof(EXRChannelInfo))); + header.pixel_types = static_cast(malloc(header.num_channels * sizeof(int))); + header.requested_pixel_types = static_cast(malloc(header.num_channels * sizeof(int))); + + switch (header.num_channels) { + case 4: + header.channels[0].name[0] = 'A'; + header.channels[1].name[0] = 'B'; + header.channels[2].name[0] = 'G'; + header.channels[3].name[0] = 'R'; + break; + case 3: + header.channels[0].name[0] = 'B'; + header.channels[1].name[0] = 'G'; + header.channels[2].name[0] = 'R'; + break; + case 2: + header.channels[0].name[0] = 'G'; + header.channels[1].name[0] = 'R'; + break; + case 1: + header.channels[0].name[0] = 'R'; + break; + default: + FreeEXRHeader(&header); + return {}; + } + for (int i = 0; i < header.num_channels; i++) { + header.channels[i].name[1] = '\0'; + } + + int pixelType = (ImageFormatDetails::red(format) / 8) == sizeof(half) ? TINYEXR_PIXELTYPE_HALF : TINYEXR_PIXELTYPE_FLOAT; + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = pixelType; + header.requested_pixel_types[i] = pixelType; + } + + std::vector> images(header.num_channels); + std::vector imagePtrs(header.num_channels); + switch (header.num_channels) { + case 4: + if (pixelType == TINYEXR_PIXELTYPE_HALF) { + images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::a); + images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::b); + images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::g); + images[3] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::r); + } else { + images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::a); + images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::b); + images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::g); + images[3] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::r); + } + break; + case 3: + if (pixelType == TINYEXR_PIXELTYPE_HALF) { + // We should not be here! + FreeEXRHeader(&header); + return {}; + } + images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::b); + images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::g); + images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::r); + break; + case 2: + if (pixelType == TINYEXR_PIXELTYPE_HALF) { + images[0] = extractChannelFromImageData(imageData, &ImagePixel::RG1616F::g); + images[1] = extractChannelFromImageData(imageData, &ImagePixel::RG1616F::r); + } else { + images[0] = extractChannelFromImageData(imageData, &ImagePixel::RG3232F::g); + images[1] = extractChannelFromImageData(imageData, &ImagePixel::RG3232F::r); + } + break; + case 1: + images[0] = rawData; + break; + default: + FreeEXRHeader(&header); + return {}; + } + for (int i = 0; i < header.num_channels; i++) { + imagePtrs[i] = images[i].data(); } + + EXRImage image; + InitEXRImage(&image); + image.width = width; + image.height = height; + image.images = reinterpret_cast(imagePtrs.data()); + image.num_channels = header.num_channels; + + unsigned char* data = nullptr; + const char* err = nullptr; + + size_t size = SaveEXRImageToMemory(&image, &header, &data, &err); + if (err) { + FreeEXRErrorMessage(err); + FreeEXRHeader(&header); + return {}; + } + if (data) { + out = {reinterpret_cast(data), reinterpret_cast(data) + size}; + std::free(data); + } + + FreeEXRHeader(&header); break; } case FileFormat::DEFAULT: @@ -953,7 +1092,167 @@ std::vector ImageConversion::convertFileToImageData(std::span(fileData.data()), fileData.size()) == TINYEXR_SUCCESS) { + if (version.multipart || version.non_image) { + return {}; + } + + EXRHeader header; + InitEXRHeader(&header); + const char* err = nullptr; + if (ParseEXRHeaderFromMemory(&header, &version, reinterpret_cast(fileData.data()), fileData.size(), &err) != TINYEXR_SUCCESS) { + FreeEXRErrorMessage(err); + return {}; + } + + // Sanity check + if (header.num_channels < 1) { + FreeEXRHeader(&header); + return {}; + } + + // Define the channel names we support (RGBA, greyscale) + std::unordered_map channelIndices{{"R", -1}, {"G", -1}, {"B", -1}, {"A", -1}, {"Y", -1}}; + + // Get channel type (EXR supports different types per channel, we do not) + // Rather than bailing we ask EXR to convert the lowest precision data + auto channelType = header.pixel_types[0]; + for (int i = 1; i < header.num_channels; i++) { + // UINT -> HALF -> FLOAT + if (header.pixel_types[i] > channelType && channelIndices.contains(header.channels[i].name)) { + channelType = header.pixel_types[i]; + } + } + // requested_pixel_types field only supports floats + if (channelType == TINYEXR_PIXELTYPE_UINT) { + channelType = TINYEXR_PIXELTYPE_HALF; + } + + // Determine proper format to use + for (int i = 0; i < header.num_channels; i++) { + if (channelIndices.contains(header.channels[i].name)) { + channelIndices[header.channels[i].name] = i; + } + } + if (channelIndices["Y"] >= 0) { + if (channelIndices["A"] >= 0) { + format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RGBA16161616F : ImageFormat::RGBA32323232F; + } else { + if (channelType == TINYEXR_PIXELTYPE_HALF) { + // VTF has no RGB161616F + channelType = TINYEXR_PIXELTYPE_FLOAT; + } + format = ImageFormat::RGB323232F; + } + channelIndices["R"] = channelIndices["Y"]; + channelIndices["G"] = channelIndices["Y"]; + channelIndices["B"] = channelIndices["Y"]; + } else if (channelIndices["A"] >= 0) { + format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RGBA16161616F : ImageFormat::RGBA32323232F; + } else if (channelIndices["B"] >= 0) { + if (channelType == TINYEXR_PIXELTYPE_HALF) { + // VTF has no RGB161616F + channelType = TINYEXR_PIXELTYPE_FLOAT; + } + format = ImageFormat::RGB323232F; + } else if (channelIndices["G"] >= 0) { + format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RG1616F : ImageFormat::RG3232F; + } else if (channelIndices["R"] >= 0) { + format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::R16F : ImageFormat::R32F; + } else { + FreeEXRHeader(&header); + return {}; + } + + // Now that channelType has stopped changing, we can set it properly + for (int i = 0; i < header.num_channels; i++) { + if (header.pixel_types[i] != channelType && channelIndices.contains(header.channels[i].name)) { + header.requested_pixel_types[i] = channelType; + } + } + + EXRImage image; + InitEXRImage(&image); + if (LoadEXRImageFromMemory(&image, &header, reinterpret_cast(fileData.data()), fileData.size(), &err) != TINYEXR_SUCCESS) { + FreeEXRErrorMessage(err); + FreeEXRHeader(&header); + return {}; + } + + width = image.width; + height = image.height; + + // Merge channel data into a single buffer + std::vector combinedChannels(width * height * (ImageFormatDetails::bpp(format) / 8)); + const auto populateBuffer = [ + hasRed=ImageFormatDetails::red(format) > 0, + hasGreen=ImageFormatDetails::green(format) > 0, + hasBlue=ImageFormatDetails::blue(format) > 0, + hasAlpha=ImageFormatDetails::alpha(format) > 0, + width, + height, + &header, + r=channelIndices["R"], + g=channelIndices["G"], + b=channelIndices["B"], + a=channelIndices["A"], + &image, + &combinedChannels + ] { + const auto channelCount = hasRed + hasGreen + hasBlue + hasAlpha; + std::span out{reinterpret_cast(combinedChannels.data()), combinedChannels.size() / sizeof(C)}; + if (header.tiled) { + for (int t = 0; t < image.num_tiles; t++) { + auto** src = reinterpret_cast(image.tiles[t].images); + for (int j = 0; j < header.tile_size_y; j++) { + for (int i = 0; i < header.tile_size_x; i++) { + const auto ii = static_cast(image.tiles[t].offset_x) * header.tile_size_x + i; + const auto jj = static_cast(image.tiles[t].offset_y) * header.tile_size_y + j; + const auto idx = ii + jj * image.width; + + if (ii >= image.width || jj >= image.height) { + continue; + } + + const auto srcIdx = j * static_cast(header.tile_size_x) + i; + if (r >= 0) out[idx * channelCount + 0] = src[r][srcIdx]; + else if (hasRed) out[idx * channelCount + 0] = 0.f; + if (g >= 0) out[idx * channelCount + 1] = src[g][srcIdx]; + else if (hasGreen) out[idx * channelCount + 1] = 0.f; + if (b >= 0) out[idx * channelCount + 2] = src[b][srcIdx]; + else if (hasBlue) out[idx * channelCount + 2] = 0.f; + if (a >= 0) out[idx * channelCount + 3] = src[a][srcIdx]; + else if (hasAlpha) out[idx * channelCount + 3] = 1.f; + } + } + } + } else { + auto** src = reinterpret_cast(image.images); + for (uint64_t i = 0; i < width * height; i++) { + if (r >= 0) out[i * channelCount + 0] = src[r][i]; + else if (hasRed) out[i * channelCount + 0] = 0.f; + if (g >= 0) out[i * channelCount + 1] = src[g][i]; + else if (hasGreen) out[i * channelCount + 1] = 0.f; + if (b >= 0) out[i * channelCount + 2] = src[b][i]; + else if (hasBlue) out[i * channelCount + 2] = 0.f; + if (a >= 0) out[i * channelCount + 3] = src[a][i]; + else if (hasAlpha) out[i * channelCount + 3] = 1.f; + } + } + }; + if (channelType == TINYEXR_PIXELTYPE_HALF) { + populateBuffer.operator()(); + } else { + populateBuffer.operator()(); + } + + FreeEXRImage(&image); + FreeEXRHeader(&header); + return combinedChannels; + } + + // HDR if (stbi_is_hdr_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()))) { const std::unique_ptr stbImage{ stbi_loadf_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()), &width, &height, &channels, 0), @@ -965,6 +1264,7 @@ std::vector ImageConversion::convertFileToImageData(std::span ImageConversion::convertFileToImageData(std::span(stbImage.get()), reinterpret_cast(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)}; } - // 16-bit single-frame image - if (stbi_is_16_bit_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()))) { - const std::unique_ptr stbImage{ - stbi_load_16_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()), &width, &height, &channels, 0), - &stbi_image_free, - }; - if (!stbImage) { - return {}; - } - - if (channels == 4) { - format = ImageFormat::RGBA16161616; - } else { - return {}; - } - - return {reinterpret_cast(stbImage.get()), reinterpret_cast(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)}; - } - - // 8-bit or less multi-frame image + // GIF if (fileData.size() >= 3 && static_cast(fileData[0]) == 'G' && static_cast(fileData[1]) == 'I' && static_cast(fileData[2]) == 'F') { const std::unique_ptr stbImage{ stbi_load_gif_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()), nullptr, &width, &height, &frameCount, &channels, 0), @@ -1013,6 +1294,75 @@ std::vector ImageConversion::convertFileToImageData(std::span(stbImage.get()), reinterpret_cast(stbImage.get() + (ImageFormatDetails::getDataLength(format, width, height) * frameCount))}; } + // 16-bit single-frame image + if (stbi_is_16_bit_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()))) { + const std::unique_ptr stbImage{ + stbi_load_16_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()), &width, &height, &channels, 0), + &stbi_image_free, + }; + if (!stbImage) { + return {}; + } + + if (channels == 4) { + format = ImageFormat::RGBA16161616; + } else if (channels >= 1 && channels < 4) { + // There are no other 16-bit integer formats in Source, so we have to do a conversion here + format = ImageFormat::RGBA16161616; + + std::vector out(ImageFormatDetails::getDataLength(format, width, height)); + std::span outPixels{reinterpret_cast(out.data()), out.size() / sizeof(ImagePixel::RGBA16161616)}; + + switch (channels) { + case 1: { + std::span inPixels{reinterpret_cast(stbImage.get()), outPixels.size()}; + std::transform( +#ifdef SOURCEPP_BUILD_WITH_TBB + std::execution::par_unseq, +#endif + inPixels.begin(), inPixels.end(), outPixels.begin(), [](uint16_t pixel) -> ImagePixel::RGBA16161616 { + return {pixel, 0, 0, 0xffff}; + }); + } + case 2: { + struct RG1616 { + uint16_t r; + uint16_t g; + }; + std::span inPixels{reinterpret_cast(stbImage.get()), outPixels.size()}; + std::transform( +#ifdef SOURCEPP_BUILD_WITH_TBB + std::execution::par_unseq, +#endif + inPixels.begin(), inPixels.end(), outPixels.begin(), [](RG1616 pixel) -> ImagePixel::RGBA16161616 { + return {pixel.r, pixel.g, 0, 0xffff}; + }); + } + case 3: { + struct RGB161616 { + uint16_t r; + uint16_t g; + uint16_t b; + }; + std::span inPixels{reinterpret_cast(stbImage.get()), outPixels.size()}; + std::transform( +#ifdef SOURCEPP_BUILD_WITH_TBB + std::execution::par_unseq, +#endif + inPixels.begin(), inPixels.end(), outPixels.begin(), [](RGB161616 pixel) -> ImagePixel::RGBA16161616 { + return {pixel.r, pixel.g, pixel.b, 0xffff}; + }); + } + default: + return {}; + } + } else { + return {}; + } + + return {reinterpret_cast(stbImage.get()), reinterpret_cast(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)}; + } + // 8-bit or less single frame image const std::unique_ptr stbImage{ stbi_load_from_memory(reinterpret_cast(fileData.data()), static_cast(fileData.size()), &width, &height, &channels, 0), diff --git a/src/vtfpp/PPL.cpp b/src/vtfpp/PPL.cpp index c300de2c6..2f3b45db8 100644 --- a/src/vtfpp/PPL.cpp +++ b/src/vtfpp/PPL.cpp @@ -200,7 +200,7 @@ std::vector PPL::bake() { } const auto seekPoint = writer.tell(); writer.seek_u(currentOffset).write(image.data); - const auto alignment = math::getPaddingForAlignment(ALIGNMENT, writer.tell()); + const auto alignment = math::paddingForAlignment(ALIGNMENT, writer.tell()); for (int i = 0; i < alignment; i++) { writer.write(0); } diff --git a/src/vtfpp/VTF.cpp b/src/vtfpp/VTF.cpp index 44dd6383b..f3f36c803 100644 --- a/src/vtfpp/VTF.cpp +++ b/src/vtfpp/VTF.cpp @@ -11,6 +11,7 @@ #include #include +#include #include @@ -19,25 +20,66 @@ using namespace vtfpp; namespace { -std::vector compressData(std::span data, int level) { - mz_ulong compressedSize = mz_compressBound(data.size()); - std::vector out(compressedSize); +std::vector compressData(std::span data, int16_t level, CompressionMethod method) { + switch (method) { + using enum CompressionMethod; + case DEFLATE: { + mz_ulong compressedSize = mz_compressBound(data.size()); + std::vector out(compressedSize); + + int status = MZ_OK; + while ((status = mz_compress2(reinterpret_cast(out.data()), &compressedSize, reinterpret_cast(data.data()), data.size(), level)) == MZ_BUF_ERROR) { + compressedSize *= 2; + out.resize(compressedSize); + } - int status = MZ_OK; - while ((status = mz_compress2(reinterpret_cast(out.data()), &compressedSize, reinterpret_cast(data.data()), data.size(), level)) == MZ_BUF_ERROR) { - compressedSize *= 2; - out.resize(compressedSize); - } + if (status != MZ_OK) { + return {}; + } + out.resize(compressedSize); + return out; + } + case ZSTD: { + if (level < 0) { + level = 6; + } - if (status != MZ_OK) { - return {}; + auto expectedSize = ZSTD_compressBound(data.size()); + std::vector out(expectedSize); + + auto compressedSize = ZSTD_compress(out.data(), expectedSize, data.data(), data.size(), level); + if (ZSTD_isError(compressedSize)) { + return {}; + } + + out.resize(compressedSize); + return out; + } } - out.resize(compressedSize); - return out; + return {}; } } // namespace +const std::array& Resource::getOrder() { + static constinit std::array typeArray{ + TYPE_THUMBNAIL_DATA, + TYPE_IMAGE_DATA, + TYPE_PARTICLE_SHEET_DATA, + TYPE_CRC, + TYPE_LOD_CONTROL_INFO, + TYPE_EXTENDED_FLAGS, + TYPE_KEYVALUES_DATA, + TYPE_AUX_COMPRESSION, + }; + static bool unsorted = true; + if (unsorted) { + std::sort(typeArray.begin(), typeArray.end()); + unsorted = false; + } + return typeArray; +} + Resource::ConvertedData Resource::convertData() const { switch (this->type) { case TYPE_CRC: @@ -146,17 +188,17 @@ VTF::VTF(std::vector&& vtfData, bool parseHeaderOnly) Resource* lastResource = nullptr; for (int i = 0; i < resourceCount; i++) { - auto& [type, flags, data] = this->resources.emplace_back(); + auto& [type, flags_, data_] = this->resources.emplace_back(); auto typeAndFlags = stream.read(); type = static_cast(typeAndFlags & 0xffffff); // last 3 bytes - flags = static_cast(typeAndFlags >> 24); // first byte - data = stream.read_span(4); + flags_ = static_cast(typeAndFlags >> 24); // first byte + data_ = stream.read_span(4); - if (!(flags & Resource::FLAG_LOCAL_DATA)) { + if (!(flags_ & Resource::FLAG_LOCAL_DATA)) { if (lastResource) { auto lastOffset = *reinterpret_cast(lastResource->data.data()); - auto currentOffset = *reinterpret_cast(data.data()); + auto currentOffset = *reinterpret_cast(data_.data()); auto curPos = stream.tell(); stream.seek(lastOffset); @@ -191,9 +233,20 @@ VTF::VTF(std::vector&& vtfData, bool parseHeaderOnly) if (uint32_t newOffset, newLength; ImageFormatDetails::getDataPosition(newOffset, newLength, this->format, i, this->mipCount, j, this->frameCount, k, faceCount, this->width, this->height, 0, this->getSliceCount())) { // Keep in mind that slices are compressed together mz_ulong decompressedImageDataSize = newLength * this->sliceCount; - if (mz_uncompress(reinterpret_cast(decompressedImageData.data() + newOffset), &decompressedImageDataSize, reinterpret_cast(imageResource->data.data() + oldOffset), oldLength) != MZ_OK) { - this->opened = false; - return; + switch (auxResource->getDataAsAuxCompressionMethod()) { + using enum CompressionMethod; + case DEFLATE: + if (mz_uncompress(reinterpret_cast(decompressedImageData.data() + newOffset), &decompressedImageDataSize, reinterpret_cast(imageResource->data.data() + oldOffset), oldLength) != MZ_OK) { + this->opened = false; + return; + } + break; + case ZSTD: + if (auto decompressedSize = ZSTD_decompress(reinterpret_cast(decompressedImageData.data() + newOffset), decompressedImageDataSize, reinterpret_cast(imageResource->data.data() + oldOffset), oldLength); ZSTD_isError(decompressedSize) || decompressedSize != decompressedImageDataSize) { + this->opened = false; + return; + } + break; } } oldOffset += oldLength; @@ -205,7 +258,7 @@ VTF::VTF(std::vector&& vtfData, bool parseHeaderOnly) } } } else { - stream.skip(math::getPaddingForAlignment(16, stream.tell())); + stream.skip(math::paddingForAlignment(16, stream.tell())); this->opened = stream.tell() == headerSize; this->resources.reserve(2); @@ -227,7 +280,8 @@ VTF::VTF(std::vector&& vtfData, bool parseHeaderOnly) } if (const auto* resource = this->getResource(Resource::TYPE_AUX_COMPRESSION)) { - this->compressionLevel = static_cast(resource->getDataAsAuxCompressionLevel()); + this->compressionLevel = resource->getDataAsAuxCompressionLevel(); + this->compressionMethod = resource->getDataAsAuxCompressionMethod(); this->removeResourceInternal(Resource::TYPE_AUX_COMPRESSION); } } @@ -263,13 +317,14 @@ VTF& VTF::operator=(const VTF& other) { this->resources.clear(); for (const auto& [otherType, otherFlags, otherData] : other.resources) { - auto& [type, flags, data] = this->resources.emplace_back(); + auto& [type, flags_, data_] = this->resources.emplace_back(); type = otherType; - flags = otherFlags; - data = {this->data.data() + (otherData.data() - other.data.data()), otherData.size()}; + flags_ = otherFlags; + data_ = {this->data.data() + (otherData.data() - other.data.data()), otherData.size()}; } this->compressionLevel = other.compressionLevel; + this->compressionMethod = other.compressionMethod; this->imageWidthResizeMethod = other.imageWidthResizeMethod; this->imageHeightResizeMethod = other.imageHeightResizeMethod; @@ -315,6 +370,7 @@ void VTF::createInternal(VTF& writer, CreationOptions options) { } writer.setFormat(options.outputFormat); writer.setCompressionLevel(options.compressionLevel); + writer.setCompressionMethod(options.compressionMethod); } void VTF::create(std::span imageData, ImageFormat format, uint16_t width, uint16_t height, const std::string& vtfPath, CreationOptions options) { @@ -410,6 +466,14 @@ void VTF::setImageResizeMethods(ImageConversion::ResizeMethod imageWidthResizeMe this->imageHeightResizeMethod = imageHeightResizeMethod_; } +void VTF::setImageWidthResizeMethod(ImageConversion::ResizeMethod imageWidthResizeMethod_) { + this->imageWidthResizeMethod = imageWidthResizeMethod_; +} + +void VTF::setImageHeightResizeMethod(ImageConversion::ResizeMethod imageHeightResizeMethod_) { + this->imageHeightResizeMethod = imageHeightResizeMethod_; +} + uint16_t VTF::getWidth(uint8_t mip) const { return mip > 0 ? ImageDimensions::getMipDim(mip, this->width) : this->width; } @@ -485,7 +549,7 @@ void VTF::setFormat(ImageFormat newFormat, ImageConversion::ResizeFilter filter) newMipCount = recommendedCount; } if (ImageFormatDetails::compressed(newFormat)) { - this->regenerateImageData(newFormat, this->width + math::getPaddingForAlignment(4, this->width), this->height + math::getPaddingForAlignment(4, this->height), newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter); + this->regenerateImageData(newFormat, this->width + math::paddingForAlignment(4, this->width), this->height + math::paddingForAlignment(4, this->height), newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter); } else { this->regenerateImageData(newFormat, this->width, this->height, newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter); } @@ -551,7 +615,7 @@ void VTF::computeMips(ImageConversion::ResizeFilter filter) { } #ifdef SOURCEPP_BUILD_WITH_THREADS })); - if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency() * 2) { + if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency()) { for (auto& future : futures) { future.get(); } @@ -689,7 +753,7 @@ void VTF::computeReflectivity() { futures.push_back(std::async(std::launch::async, [this, j, k, l] { return getReflectivityForImage(*this, j, k, l); })); - if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency() * 2) { + if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency()) { for (auto& future : futures) { this->reflectivity += future.get(); } @@ -784,7 +848,7 @@ void VTF::setResourceInternal(Resource::Type type, std::span da this->data.clear(); BufferStream writer{this->data}; - for (auto resourceType : Resource::TYPE_ARRAY_ORDER) { + for (auto resourceType : Resource::getOrder()) { if (!resourceData.contains(resourceType)) { continue; } @@ -933,7 +997,7 @@ void VTF::removeExtendedFlagsResource() { this->removeResourceInternal(Resource::TYPE_EXTENDED_FLAGS); } -void VTF::setKeyValuesData(const std::string& value) { +void VTF::setKeyValuesDataResource(const std::string& value) { std::vector keyValuesData; BufferStream writer{keyValuesData}; @@ -944,18 +1008,26 @@ void VTF::setKeyValuesData(const std::string& value) { this->setResourceInternal(Resource::TYPE_KEYVALUES_DATA, keyValuesData); } -void VTF::removeKeyValuesData() { +void VTF::removeKeyValuesDataResource() { this->removeResourceInternal(Resource::TYPE_KEYVALUES_DATA); } -uint8_t VTF::getCompressionLevel() const { +int16_t VTF::getCompressionLevel() const { return this->compressionLevel; } -void VTF::setCompressionLevel(uint8_t newCompressionLevel) { +void VTF::setCompressionLevel(int16_t newCompressionLevel) { this->compressionLevel = newCompressionLevel; } +CompressionMethod VTF::getCompressionMethod() const { + return this->compressionMethod; +} + +void VTF::setCompressionMethod(CompressionMethod newCompressionMethod) { + this->compressionMethod = newCompressionMethod; +} + bool VTF::hasImageData() const { return this->format != ImageFormat::EMPTY && this->width > 0 && this->height > 0; } @@ -986,12 +1058,16 @@ std::vector VTF::getImageDataAsRGBA8888(uint8_t mip, uint16_t frame, } bool VTF::setImage(std::span imageData_, ImageFormat format_, uint16_t width_, uint16_t height_, ImageConversion::ResizeFilter filter, uint8_t mip, uint16_t frame, uint8_t face, uint16_t slice) { + if (imageData_.empty()) { + return false; + } + if (!this->hasImageData()) { uint16_t resizedWidth = width_, resizedHeight = height_; ImageConversion::setResizedDims(resizedWidth, this->imageWidthResizeMethod, resizedHeight, this->imageHeightResizeMethod); if (ImageFormatDetails::compressed(format_)) { - resizedWidth += math::getPaddingForAlignment(4, resizedWidth); - resizedHeight += math::getPaddingForAlignment(4, resizedHeight); + resizedWidth += math::paddingForAlignment(4, resizedWidth); + resizedHeight += math::paddingForAlignment(4, resizedHeight); } if (const auto newMipCount = ImageDimensions::getRecommendedMipCountForDims(format_, resizedWidth, resizedHeight); newMipCount <= mip) { mip = newMipCount - 1; @@ -1013,11 +1089,13 @@ bool VTF::setImage(std::span imageData_, ImageFormat format_, u } if (uint32_t offset, length; ImageFormatDetails::getDataPosition(offset, length, this->format, mip, this->mipCount, frame, this->frameCount, face, faceCount, this->width, this->height, slice, this->sliceCount)) { std::vector image{imageData_.begin(), imageData_.end()}; - if (this->format != format_) { - image = ImageConversion::convertImageDataToFormat(image, format_, this->format, this->width, this->height); + const auto newWidth = ImageDimensions::getMipDim(mip, this->width); + const auto newHeight = ImageDimensions::getMipDim(mip, this->height); + if (width_ != newWidth || height_ != newHeight) { + image = ImageConversion::resizeImageData(image, format_, width_, newWidth, height_, newHeight, this->imageDataIsSRGB(), filter); } - if (width_ != ImageDimensions::getMipDim(mip, this->width) || height_ != ImageDimensions::getMipDim(mip, this->height)) { - image = ImageConversion::resizeImageData(image, this->format, width_, ImageDimensions::getMipDim(mip, this->width), height_, ImageDimensions::getMipDim(mip, this->height), this->imageDataIsSRGB(), filter); + if (format_ != this->format) { + image = ImageConversion::convertImageDataToFormat(image, format_, this->format, newWidth, newHeight); } std::memcpy(imageResource->data.data() + offset, image.data(), image.size()); } @@ -1030,7 +1108,7 @@ bool VTF::setImage(const std::string& imagePath, ImageConversion::ResizeFilter f auto imageData_ = ImageConversion::convertFileToImageData(fs::readFileBuffer(imagePath), inputFormat, inputWidth, inputHeight, inputFrameCount); // Unable to decode file - if (inputFormat == ImageFormat::EMPTY || !inputWidth || !inputHeight || !inputFrameCount) { + if (imageData_.empty() || inputFormat == ImageFormat::EMPTY || !inputWidth || !inputHeight || !inputFrameCount) { return false; } @@ -1088,6 +1166,16 @@ std::vector VTF::getThumbnailDataAsRGBA8888() const { return this->getThumbnailDataAs(ImageFormat::RGBA8888); } +void VTF::setThumbnail(std::span imageData_, ImageFormat format_, uint16_t width_, uint16_t height_) { + if (format_ != this->thumbnailFormat) { + this->setResourceInternal(Resource::TYPE_THUMBNAIL_DATA, ImageConversion::convertImageDataToFormat(imageData_, format_, this->thumbnailFormat, width_, height_)); + } else { + this->setResourceInternal(Resource::TYPE_THUMBNAIL_DATA, imageData_); + } + this->thumbnailWidth = width_; + this->thumbnailHeight = height_; +} + void VTF::computeThumbnail(ImageConversion::ResizeFilter filter) { if (!this->hasImageData()) { return; @@ -1145,7 +1233,7 @@ std::vector VTF::bake() const { } if (this->minorVersion < 3) { - const auto headerAlignment = math::getPaddingForAlignment(16, writer.tell()); + const auto headerAlignment = math::paddingForAlignment(16, writer.tell()); for (uint16_t i = 0; i < headerAlignment; i++) { writer.write({}); } @@ -1169,19 +1257,20 @@ std::vector VTF::bake() const { auxCompressionResourceData.resize((this->mipCount * this->frameCount * faceCount + 2) * sizeof(uint32_t)); BufferStream auxWriter{auxCompressionResourceData, false}; - // Format of aux resource is as follows, with each item being a 4 byte integer: + // Format of aux resource is as follows, with each item of unspecified type being a 4 byte integer: // - Size of resource in bytes, not counting this int - // - Compression level + // - Compression level, method (2 byte integers) // - (X times) Size of each mip-face-frame combo auxWriter .write(auxCompressionResourceData.size() - sizeof(uint32_t)) - .write(this->compressionLevel); + .write(this->compressionLevel) + .write(this->compressionMethod); for (int i = this->mipCount - 1; i >= 0; i--) { for (int j = 0; j < this->frameCount; j++) { for (int k = 0; k < faceCount; k++) { if (uint32_t offset, length; ImageFormatDetails::getDataPosition(offset, length, this->format, i, this->mipCount, j, this->frameCount, k, faceCount, this->width, this->height, 0, this->sliceCount)) { - auto compressedData = ::compressData({imageResource->data.data() + offset, length * this->sliceCount}, this->compressionLevel); + auto compressedData = ::compressData({imageResource->data.data() + offset, length * this->sliceCount}, this->compressionLevel, this->compressionMethod); compressedImageResourceData.insert(compressedImageResourceData.end(), compressedData.begin(), compressedData.end()); auxWriter.write(compressedData.size()); } @@ -1192,7 +1281,9 @@ std::vector VTF::bake() const { } writer - .write(0) // padding + .write(0) // padding + .write(0) // padding + .write(0) // padding .write(this->getResources().size() + hasAuxCompression) .write(0); // padding @@ -1212,7 +1303,7 @@ std::vector VTF::bake() const { writer_.write(data); writer_.seek_u(resourceOffsetPos).write(resourceOffsetValue); }; - for (const auto resourceType : Resource::TYPE_ARRAY_ORDER) { + for (const auto resourceType : Resource::getOrder()) { if (hasAuxCompression && resourceType == Resource::TYPE_AUX_COMPRESSION) { writeNonLocalResource(writer, resourceType, auxCompressionResourceData); } else if (hasAuxCompression && resourceType == Resource::TYPE_IMAGE_DATA) { diff --git a/src/vtfpp/_vtfpp.cmake b/src/vtfpp/_vtfpp.cmake index 0d3606a92..7ccabf2e1 100644 --- a/src/vtfpp/_vtfpp.cmake +++ b/src/vtfpp/_vtfpp.cmake @@ -1,6 +1,5 @@ add_pretty_parser(vtfpp - DEPS miniz - DEPS_INTERFACE sourcepp_stb + DEPS miniz libzstd_static sourcepp_parser sourcepp_stb sourcepp_tinyexr PRECOMPILED_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/vtfpp/ImageConversion.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/vtfpp/ImageFormats.h" @@ -14,8 +13,6 @@ add_pretty_parser(vtfpp "${CMAKE_CURRENT_LIST_DIR}/SHT.cpp" "${CMAKE_CURRENT_LIST_DIR}/VTF.cpp") -target_include_directories(vtfpp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ext/stb/include") - sourcepp_add_tbb(vtfpp) sourcepp_add_threads(vtfpp) target_link_compressonator(vtfpp) diff --git a/test/toolpp.cpp b/test/toolpp.cpp index 19ff0830c..07a9f6a13 100644 --- a/test/toolpp.cpp +++ b/test/toolpp.cpp @@ -8,23 +8,31 @@ using namespace toolpp; TEST(toolpp, cmdSeqOpenBinary) { CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/binary.wc"}; + ASSERT_TRUE(cmdSeq); + ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::BINARY); ASSERT_EQ(cmdSeq.getSequences().size(), 8); } -TEST(toolpp, cmdSeqOpenKeyValues) { +TEST(toolpp, cmdSeqOpenKeyValuesStrata) { CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/keyvalues.wc"}; + ASSERT_TRUE(cmdSeq); + ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::KEYVALUES_STRATA); ASSERT_EQ(cmdSeq.getSequences().size(), 4); } TEST(toolpp, cmdSeqBakeBinary) { CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/binary.wc"}; + ASSERT_TRUE(cmdSeq); + ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::BINARY); auto existingData = fs::readFileBuffer(ASSET_ROOT "toolpp/cmdseq/binary.wc"); auto bakedData = cmdSeq.bake(); ASSERT_EQ(existingData, bakedData); } -TEST(toolpp, cmdSeqBakeKeyValues) { +TEST(toolpp, cmdSeqBakeKeyValuesStrata) { CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/keyvalues.wc"}; + ASSERT_TRUE(cmdSeq); + ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::KEYVALUES_STRATA); auto existingData = fs::readFileBuffer(ASSET_ROOT "toolpp/cmdseq/keyvalues.wc"); auto bakedData = cmdSeq.bake(); ASSERT_EQ(existingData, bakedData); diff --git a/test/vtfpp.cpp b/test/vtfpp.cpp index a3b7b40f3..425be5391 100644 --- a/test/vtfpp.cpp +++ b/test/vtfpp.cpp @@ -836,6 +836,7 @@ TEST(vtfpp, read_v76_c9) { EXPECT_EQ(vtf.getThumbnailWidth(), 16); EXPECT_EQ(vtf.getThumbnailHeight(), 16); EXPECT_EQ(vtf.getCompressionLevel(), 9); + EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::DEFLATE); // Resources EXPECT_EQ(vtf.getResources().size(), 2); @@ -876,7 +877,8 @@ TEST(vtfpp, write_v76_c6) { EXPECT_EQ(vtf.getThumbnailFormat(), ImageFormat::DXT1); EXPECT_EQ(vtf.getThumbnailWidth(), 16); EXPECT_EQ(vtf.getThumbnailHeight(), 16); - EXPECT_EQ(vtf.getCompressionLevel(), 6); + EXPECT_EQ(vtf.getCompressionLevel(), -1); + EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::ZSTD); } TEST(vtfpp, read_v76_nomip_c9) { @@ -903,6 +905,7 @@ TEST(vtfpp, read_v76_nomip_c9) { EXPECT_EQ(vtf.getThumbnailWidth(), 16); EXPECT_EQ(vtf.getThumbnailHeight(), 16); EXPECT_EQ(vtf.getCompressionLevel(), 9); + EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::DEFLATE); // Resources EXPECT_EQ(vtf.getResources().size(), 2);