diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index dcffb37fd..000000000
--- a/.clang-format
+++ /dev/null
@@ -1,66 +0,0 @@
-# Generated from CLion C/C++ Code Style settings
-BasedOnStyle: LLVM
-AccessModifierOffset: -4
-AlignAfterOpenBracket: AlwaysBreak
-AlignConsecutiveAssignments: None
-AlignOperands: Align
-AllowAllArgumentsOnNextLine: false
-AllowAllConstructorInitializersOnNextLine: false
-AllowAllParametersOfDeclarationOnNextLine: false
-AllowShortBlocksOnASingleLine: Always
-AllowShortCaseLabelsOnASingleLine: true
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: Always
-AllowShortLambdasOnASingleLine: All
-AllowShortLoopsOnASingleLine: true
-BreakAfterReturnType: None
-BreakBeforeBraces: Custom
-BreakTemplateDeclarations: Yes
-BraceWrapping:
-  AfterCaseLabel: false
-  AfterClass: false
-  AfterControlStatement: Never
-  AfterEnum: false
-  AfterFunction: false
-  AfterNamespace: false
-  AfterUnion: false
-  BeforeCatch: false
-  BeforeElse: false
-  IndentBraces: false
-  SplitEmptyFunction: false
-  SplitEmptyRecord: false
-BreakBeforeBinaryOperators: None
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializers: BeforeComma
-BreakInheritanceList: BeforeComma
-ColumnLimit: 0
-CompactNamespaces: false
-ContinuationIndentWidth: 8
-IndentCaseLabels: true
-IndentPPDirectives: BeforeHash
-IndentWidth: 4
-KeepEmptyLinesAtTheStartOfBlocks: false
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PointerAlignment: Left
-ReflowComments: false
-SpaceAfterCStyleCast: true
-SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 0
-SpacesInAngles: false
-SpacesInCStyleCastParentheses: false
-SpacesInContainerLiterals: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-TabWidth: 4
-UseTab: ForIndentation
diff --git a/.editorconfig b/.editorconfig
index dc74c0861..7dad88fd0 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -37,6 +37,7 @@ ij_yaml_spaces_within_braces = true
 ij_yaml_spaces_within_brackets = true
 
 [{*.cmake,CMakeLists.txt}]
+indent_style = space
 ij_cmake_align_command_call_r_par = false
 ij_cmake_align_control_flow_r_par = false
 ij_cmake_align_multiline_parameters_in_calls = false
diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml
index 6e691de18..c34e99bba 100644
--- a/.github/workflows/deploy_docs.yml
+++ b/.github/workflows/deploy_docs.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Create & Deploy Docs
-        uses: DenverCoder1/doxygen-github-pages-action@v1.3.1
+        uses: DenverCoder1/doxygen-github-pages-action@v2.0.0
         with:
           github_token: ${{secrets.GITHUB_TOKEN}}
           branch: docs
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 000000000..6518ed65e
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,65 @@
+name: Build Wheels
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        type: string
+        required: true
+        default: YYYY.MM.DDdevN
+        description: 'The package version'
+      release:
+        type: boolean
+        required: true
+        default: false
+        description: 'Push a release to PyPI'
+
+jobs:
+  build_sdist:
+    name: Build SDist
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: '${{github.workspace}}/lang/python'
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Configure CMake
+        working-directory: '${{github.workspace}}'
+        run: cmake -G "Unix Makefiles" -B build -DCMAKE_BUILD_TYPE=Release -DSOURCEPP_BUILD_PYTHON_WRAPPERS=ON -DSOURCEPP_PYTHON_VERSION="${{inputs.version}}"
+
+      - name: Build SDist
+        run: |
+          pipx run build --sdist
+
+      - name: Check Metadata
+        run: |
+          pipx run twine check dist/*
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist-sdist
+          path: ${{github.workspace}}/lang/python/dist/*.tar.gz
+
+  upload_release:
+    name: Upload a Release
+    if: inputs.release
+    needs: [build_sdist]
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi-release
+      url: https://pypi.org/p/sourcepp
+    permissions:
+      id-token: write
+    steps:
+      - name: Download Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist-sdist
+          path: dist
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
index 0c0169e33..2669bc643 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,15 +1,43 @@
-# IDEs
+# IDE
 .idea/
 .vs/
 .vscode/
 CMakeSettings.json
 
-# Project exclude paths
+
+# Build
 build/
+dist/
 cmake-build-*/
 out/
+*.dll
+*.ilk
+*.pdb
+*.pyd
+*.so*
 
-# Generated
+# Docs
 docs/html/
+
+
+# Test
 test/res/
 test/Helpers.h
+
+
+# Python
+.mypy_cache/
+.venv/
+__pycache__/
+wheelhouse/
+*.pyi
+*.typed
+*.whl
+
+lang/python/CMakeLists.txt
+lang/python/pyproject.toml
+lang/python/src/sourcepp/__init__.py
+
+lang/python/LICENSE
+lang/python/README.md
+lang/python/THIRDPARTY_LEGAL_NOTICES.txt
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b45a38798..167dff27f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,12 +3,13 @@ cmake_minimum_required(VERSION 3.25 FATAL_ERROR)
 
 # Set defaults before project call
 if(PROJECT_IS_TOP_LEVEL)
-    set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64")
+    set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64" CACHE INTERNAL "" FORCE)
 endif()
 
 
 # Create project
-project(sourcepp)
+project(sourcepp
+        DESCRIPTION "Several modern C++20 libraries for sanely parsing Valve formats.")
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
@@ -26,13 +27,15 @@ option(SOURCEPP_USE_VCRYPTPP       "Build vcryptpp library"              ${SOURC
 option(SOURCEPP_USE_VPKPP          "Build vpkpp library"                 ${SOURCEPP_LIBS_START_ENABLED})
 option(SOURCEPP_USE_VTFPP          "Build vtfpp library"                 ${SOURCEPP_LIBS_START_ENABLED})
 
-option(SOURCEPP_BUILD_BENCHMARKS   "Build benchmarks for supported libraries" OFF)
-option(SOURCEPP_BUILD_C_WRAPPERS   "Build C wrappers for supported libraries" OFF)
-option(SOURCEPP_BUILD_WITH_OPENCL  "Build with support for GPU compute"       OFF)
-option(SOURCEPP_BUILD_WITH_TBB     "Build with support for std::execution"    OFF)
-option(SOURCEPP_BUILD_WITH_THREADS "Build with support for threading"          ON)
-option(SOURCEPP_BUILD_TESTS        "Build tests for supported libraries"      OFF)
-option(SOURCEPP_BUILD_WIN7_COMPAT  "Build with Windows 7 compatibility"       OFF)
+option(SOURCEPP_BUILD_BENCHMARKS      "Build benchmarks for supported libraries"      OFF)
+option(SOURCEPP_BUILD_C_WRAPPERS      "Build C wrappers for supported libraries"      OFF)
+option(SOURCEPP_BUILD_CSHARP_WRAPPERS "Build C# wrappers for supported libraries"     OFF)
+option(SOURCEPP_BUILD_PYTHON_WRAPPERS "Build Python wrappers for supported libraries" OFF)
+option(SOURCEPP_BUILD_WITH_OPENCL     "Build with support for GPU compute"            OFF)
+option(SOURCEPP_BUILD_WITH_TBB        "Build with support for std::execution"         OFF)
+option(SOURCEPP_BUILD_WITH_THREADS    "Build with support for threading"               ON)
+option(SOURCEPP_BUILD_TESTS           "Build tests for supported libraries"           OFF)
+option(SOURCEPP_BUILD_WIN7_COMPAT     "Build with Windows 7 compatibility"            OFF)
 
 option(SOURCEPP_LINK_STATIC_MSVC_RUNTIME "Link to static MSVC runtime library" OFF)
 
@@ -53,6 +56,9 @@ if(SOURCEPP_USE_VPKPP)
     set(SOURCEPP_USE_KVPP ON CACHE INTERNAL "" FORCE)
 endif()
 
+if(SOURCEPP_BUILD_CSHARP_WRAPPERS)
+    set(SOURCEPP_BUILD_C_WRAPPERS ON CACHE INTERNAL "" FORCE)
+endif()
 if(MSVC)
     # MSVC does not rely on tbb for std::execution policies, so we can force this on
     set(SOURCEPP_BUILD_WITH_TBB ON CACHE INTERNAL "" FORCE)
@@ -73,6 +79,7 @@ endif()
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 include(AddPrettyParser)
 include(AddSourcePPLibrary)
+include(FetchContent)
 include(IncludeSubdirectory)
 include(PrintOptions)
 include(TargetOptimize)
@@ -84,6 +91,8 @@ include_subdirectory(ext)
 
 # Shared code
 include_subdirectory(src/sourcepp)
+include_subdirectory(src/sourcepp/crypto)
+include_subdirectory(src/sourcepp/parser)
 
 
 # Shared C code
@@ -92,10 +101,48 @@ if(SOURCEPP_BUILD_C_WRAPPERS)
 endif()
 
 
+# Python bindings, part 1
+if(SOURCEPP_BUILD_PYTHON_WRAPPERS)
+    set(SOURCEPP_PYTHON_NAME "${PROJECT_NAME}_python")
+    if(NOT SOURCEPP_PYTHON_VERSION)
+        set(SOURCEPP_PYTHON_VERSION "0.0.1dev1")
+        message(WARNING "SOURCEPP_PYTHON_VERSION is not defined, do not release this build publicly! Defaulting it to ${SOURCEPP_PYTHON_VERSION}...")
+    endif()
+    find_package(Python REQUIRED
+            COMPONENTS Interpreter Development.Module
+            OPTIONAL_COMPONENTS Development.SABIModule)
+    FetchContent_Declare(
+            nanobind
+            GIT_REPOSITORY "https://github.com/wjakob/nanobind.git"
+            GIT_TAG "origin/master")
+    FetchContent_MakeAvailable(nanobind)
+    set(${SOURCEPP_PYTHON_NAME}_SOURCES "")
+    set(${SOURCEPP_PYTHON_NAME}_DEFINES "")
+
+    # Set the version and git commit hash here
+    find_package(Git REQUIRED)
+    execute_process(COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
+            WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+            OUTPUT_VARIABLE SOURCEPP_GIT_TAG
+            RESULT_VARIABLE SOURCEPP_GIT_TAG_ERROR
+            OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(NOT SOURCEPP_GIT_TAG)
+        message(FATAL_ERROR "Failed to retrieve git commit SHA: ${SOURCEPP_GIT_TAG_ERROR}")
+    endif()
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/CMakeLists.txt" @ONLY)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/pyproject.toml" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/pyproject.toml")
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/python/cfg/__init__.py"    "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp/__init__.py")
+
+    # These need to be inside the python directory, let's duplicate them!
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/LICENSE"                      "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/LICENSE"                      COPYONLY)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/README.md"                    "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/README.md"                    COPYONLY)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/THIRDPARTY_LEGAL_NOTICES.txt" "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/THIRDPARTY_LEGAL_NOTICES.txt" COPYONLY)
+endif()
+
+
 # Tests, part 1
 if(SOURCEPP_BUILD_TESTS)
     set(SOURCEPP_TEST_NAME "${PROJECT_NAME}_test")
-    include(FetchContent)
     FetchContent_Declare(
             googletest
             GIT_REPOSITORY "https://github.com/google/googletest.git"
@@ -123,7 +170,6 @@ endif()
 # Benchmarks
 if(SOURCEPP_BUILD_BENCHMARKS)
     set(SOURCEPP_BENCH_NAME "${PROJECT_NAME}_bench")
-    include(FetchContent)
     FetchContent_Declare(
             benchmark
             GIT_REPOSITORY https://github.com/google/benchmark.git
@@ -138,16 +184,16 @@ endif()
 
 
 # Add libraries
-add_sourcepp_library(bsppp             NO_TEST      ) # sourcepp::bsppp
-add_sourcepp_library(dmxpp                          ) # sourcepp::dmxpp
-add_sourcepp_library(gamepp                         ) # sourcepp::gamepp
-add_sourcepp_library(kvpp                      BENCH) # sourcepp::kvpp
-add_sourcepp_library(mdlpp                          ) # sourcepp::mdlpp
-add_sourcepp_library(steampp  C                     ) # sourcepp::steampp
-add_sourcepp_library(toolpp                         ) # sourcepp::toolpp
-add_sourcepp_library(vcryptpp C CSHARP              ) # sourcepp::vcryptpp
-add_sourcepp_library(vpkpp    C CSHARP NO_TEST      ) # sourcepp::vpkpp
-add_sourcepp_library(vtfpp                     BENCH) # sourcepp::vtfpp
+add_sourcepp_library(bsppp                    NO_TEST      ) # sourcepp::bsppp
+add_sourcepp_library(dmxpp                                 ) # sourcepp::dmxpp
+add_sourcepp_library(gamepp   C        PYTHON              ) # sourcepp::gamepp
+add_sourcepp_library(kvpp                             BENCH) # sourcepp::kvpp
+add_sourcepp_library(mdlpp                                 ) # sourcepp::mdlpp
+add_sourcepp_library(steampp  C        PYTHON              ) # sourcepp::steampp
+add_sourcepp_library(toolpp            PYTHON              ) # sourcepp::toolpp
+add_sourcepp_library(vcryptpp C CSHARP PYTHON              ) # sourcepp::vcryptpp
+add_sourcepp_library(vpkpp    C CSHARP        NO_TEST      ) # sourcepp::vpkpp
+add_sourcepp_library(vtfpp             PYTHON         BENCH) # sourcepp::vtfpp
 
 
 # Tests, part 2
@@ -160,9 +206,56 @@ if(SOURCEPP_BUILD_TESTS)
 endif()
 
 
+# Python bindings, part 2
+if(SOURCEPP_BUILD_PYTHON_WRAPPERS)
+    nanobind_add_module(${SOURCEPP_PYTHON_NAME} NB_STATIC STABLE_ABI LTO
+            "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp.cpp"
+            ${${SOURCEPP_PYTHON_NAME}_SOURCES})
+    set_target_properties(${SOURCEPP_PYTHON_NAME} PROPERTIES
+            OUTPUT_NAME "_${PROJECT_NAME}_impl"
+            LIBRARY_OUTPUT_DIRECTORY         "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp"
+            LIBRARY_OUTPUT_DIRECTORY_DEBUG   "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp"
+            LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/sourcepp")
+    target_compile_definitions(${SOURCEPP_PYTHON_NAME} PRIVATE ${${SOURCEPP_PYTHON_NAME}_DEFINES})
+    target_link_libraries(${SOURCEPP_PYTHON_NAME} PRIVATE ${${SOURCEPP_PYTHON_NAME}_DEPS})
+    install(TARGETS ${SOURCEPP_PYTHON_NAME} LIBRARY DESTINATION "./${PROJECT_NAME}")
+
+    add_custom_target(${SOURCEPP_PYTHON_NAME}_all)
+    add_dependencies(${SOURCEPP_PYTHON_NAME}_all ${SOURCEPP_PYTHON_NAME})
+
+    # We need to manually write out each module :(
+    set(${SOURCEPP_PYTHON_NAME}_MODULES
+            "sourcepp._sourcepp_impl"
+            "sourcepp._sourcepp_impl.gamepp"
+            "sourcepp._sourcepp_impl.sourcepp"
+            "sourcepp._sourcepp_impl.sourcepp.math"
+            "sourcepp._sourcepp_impl.steampp"
+            "sourcepp._sourcepp_impl.toolpp"
+            "sourcepp._sourcepp_impl.vcryptpp"
+            "sourcepp._sourcepp_impl.vcryptpp.VFONT"
+            "sourcepp._sourcepp_impl.vcryptpp.VICE"
+            "sourcepp._sourcepp_impl.vtfpp"
+            "sourcepp._sourcepp_impl.vtfpp.ImageFormatDetails"
+            "sourcepp._sourcepp_impl.vtfpp.ImageDimensions"
+            "sourcepp._sourcepp_impl.vtfpp.ImageConversion")
+    foreach(MODULE ${${SOURCEPP_PYTHON_NAME}_MODULES})
+        string(REPLACE "." "/" MODULE_DIR "${MODULE}")
+        string(REPLACE "." "_" MODULE_NAME_NORMALIZED "${MODULE}")
+        set(MODULE_NAME_NORMALIZED "${MODULE_NAME_NORMALIZED}_stub")
+        nanobind_add_stub("${SOURCEPP_PYTHON_NAME}_stub_${MODULE_NAME_NORMALIZED}"
+                DEPENDS ${SOURCEPP_PYTHON_NAME}
+                MODULE "${MODULE}"
+                OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${MODULE_DIR}.pyi"
+                PYTHON_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src")
+        install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${MODULE_DIR}.pyi" DESTINATION "./${MODULE_DIR}/..")
+        add_dependencies(${SOURCEPP_PYTHON_NAME}_all ${SOURCEPP_PYTHON_NAME}_stub_${MODULE_NAME_NORMALIZED})
+    endforeach()
+endif()
+
+
 # Print options
 print_options(OPTIONS
         USE_BSPPP USE_DMXPP USE_GAMEPP USE_KVPP USE_MDLPP USE_STEAMPP USE_TOOLPP USE_VCRYPTPP USE_VPKPP USE_VTFPP
-        BUILD_BENCHMARKS BUILD_C_WRAPPERS BUILD_WITH_OPENCL BUILD_WITH_TBB BUILD_WITH_THREADS BUILD_TESTS BUILD_WIN7_COMPAT
+        BUILD_BENCHMARKS BUILD_C_WRAPPERS BUILD_CSHARP_WRAPPERS BUILD_PYTHON_WRAPPERS BUILD_WITH_OPENCL BUILD_WITH_TBB BUILD_WITH_THREADS BUILD_TESTS BUILD_WIN7_COMPAT
         LINK_STATIC_MSVC_RUNTIME
         VPKPP_SUPPORT_VPK_V54)
diff --git a/README.md b/README.md
index de78d5c50..55c2cc083 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
-    <td rowspan="1"><code>bsppp</code></td>
+    <td rowspan="1"><code>bsppp</code><sup>*</sup></td>
     <td><a href="https://developer.valvesoftware.com/wiki/BSP_(Source)">BSP</a> v17-27</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
@@ -30,7 +30,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
-    <td rowspan="1"><code>dmxpp</code></td>
+    <td rowspan="1"><code>dmxpp</code><sup>*</sup></td>
     <td><a href="https://developer.valvesoftware.com/wiki/DMX">DMX</a> Binary v1-5</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
@@ -42,7 +42,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td>Get Source engine instance window title/position/size</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
-    <td rowspan="3" align="center"></td>
+    <td rowspan="3" align="center">C<br>Python</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
@@ -53,15 +53,15 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
     <td rowspan="1"><code>kvpp</code></td>
-    <td><a href="https://developer.valvesoftware.com/wiki/KeyValues">KeyValues</a> v1<sup>*</sup></td>
+    <td><a href="https://developer.valvesoftware.com/wiki/KeyValues">KeyValues</a> Text v1<sup>&dagger;</sup></td>
     <td align="center">✅</td>
     <td align="center">✅</td>
     <td rowspan="1" align="center"></td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
-    <td rowspan="5"><code>mdlpp</code></td>
-    <td><a href="https://developer.valvesoftware.com/wiki/MDL_(Source)">MDL</a> v44-49<sup>&dagger;</sup></td>
+    <td rowspan="5"><code>mdlpp</code><sup>*</sup></td>
+    <td><a href="https://developer.valvesoftware.com/wiki/MDL_(Source)">MDL</a> v44-49</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
     <td rowspan="5" align="center"></td>
@@ -84,7 +84,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td>Find Steam install folder</td>
     <td align="center">✅</td>
     <td align="center">-</td>
-    <td rowspan="3" align="center">C</td>
+    <td rowspan="3" align="center">C<br>Python</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
@@ -102,7 +102,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     </td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="3" align="center"></td>
+    <td rowspan="3" align="center">Python</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
@@ -119,7 +119,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td><a href="https://developer.valvesoftware.com/wiki/VICE">VICE</a> encrypted files</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="3" align="center">C<br>C#</td>
+    <td rowspan="3" align="center">C<br>C#<br>Python</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
@@ -186,7 +186,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
     <td>
-      <a href="https://developer.valvesoftware.com/wiki/VPK">VPK</a> v1-2, v54
+      <a href="https://developer.valvesoftware.com/wiki/VPK">VPK</a> pre-v1, v1-2, v54
       <br> &bull; <a href="https://www.counter-strike.net/cs2">Counter-Strike: 2</a> modifications
       <br> &bull; <a href="https://clientmod.ru">Counter-Strike: Source ClientMod</a> modifications
     </td>
@@ -207,17 +207,77 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
-    <td>ZIP (and BZ2, GZ, XZ, ZSTD)</td>
+    <td>ZIP</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
-    <td rowspan="3"><code>vtfpp</code></td>
+    <td rowspan="23"><code>vtfpp</code></td>
+    <td><a href="https://en.wikipedia.org/wiki/BMP_file_format">BMP</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td rowspan="23" align="center">Python</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://openexr.com">EXR</a> v1</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/GIF">GIF</a></td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/RGBE_image_format">HDR</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/JPEG">JPEG</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td>PIC</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/PNG">PNG</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://netpbm.sourceforge.net/doc/pnm.html">PNM</a> (PGM, PPM)</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
     <td><a href="https://developer.valvesoftware.com/wiki/PPL">PPL</a> v0</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="3" align="center"></td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://www.adobe.com/creativecloud/file-types/image/raster/psd-file.html">PSD</a></td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr><!-- empty row to disable github striped bg color --></tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/Truevision_TGA">TGA</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
   </tr>
   <tr><!-- empty row to disable github striped bg color --></tr>
   <tr>
@@ -230,9 +290,16 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
 </table>
 
-(\*) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VDF](https://developer.valvesoftware.com/wiki/VDF), [VMT](https://developer.valvesoftware.com/wiki/VMT), and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)).
+(\*) These libraries are incomplete and still in development. Their interfaces are unstable and will likely change in the future.
+Libraries not starred should be considered stable, and their existing interfaces will not change much if at all. Note that wrappers
+only exist for stable libraries.
+
+(&dagger;) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VMT](https://developer.valvesoftware.com/wiki/VMT) and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)).
+
+## Wrappers
 
-(&dagger;) The MDL parser is not complete. It is usable in its current state, but it does not currently parse more complex components like animations. This parser is still in development.
+Wrappers for libraries considered complete exist for C, C#, and/or Python, depending on the library. The Python wrappers can be
+found on PyPI in the [sourcepp](https://pypi.org/project/sourcepp) package.
 
 ## Special Thanks
 
diff --git a/THIRDPARTY_LEGAL_NOTICES.txt b/THIRDPARTY_LEGAL_NOTICES.txt
index cf387f125..a6d198797 100644
--- a/THIRDPARTY_LEGAL_NOTICES.txt
+++ b/THIRDPARTY_LEGAL_NOTICES.txt
@@ -1,4 +1,4 @@
----------------  bufferstream  ---------------
+---------------  BufferStream  ---------------
 
 MIT License
 
@@ -67,16 +67,41 @@ modification, are permitted provided that the following conditions are met:
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+---------------      half      ---------------
+
+The MIT License
+
+Copyright (c) 2012-2021 Christian Rau
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
 
 
 ---------------    hat-trie    ---------------
@@ -119,6 +144,37 @@ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 
+---------------    OpenEXR     ---------------
+
+Copyright (c) 2002, Industrial Light & Magic, a division of Lucas Digital Ltd. LLC.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
 ---------------     miniz      ---------------
 
 Copyright 2013-2014 RAD Game Tools and Valve Software
@@ -166,6 +222,37 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 
 
+---------------    nanobind    ---------------
+
+Copyright (c) 2022 Wenzel Jakob <wenzel.jakob@epfl.ch>.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
 ---------------      stb       ---------------
 
 Copyright (c) 2017 Sean Barrett
@@ -214,6 +301,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 
 
+---------------    TinyEXR     ---------------
+
+Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
 ---------------      zlib      ---------------
 
 (C) 1995-2024 Jean-loup Gailly and Mark Adler
@@ -244,21 +362,22 @@ BSD License
 
 For Zstandard software
 
-Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+Copyright (c) Meta Platforms, Inc. and affiliates.
+All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
 
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
 
- * Neither the name Facebook, nor Meta, nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
+* Neither the name Facebook, nor Meta, nor the names of its contributors may
+  be used to endorse or promote products derived from this software without
+  specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
diff --git a/cmake/AddPrettyParser.cmake b/cmake/AddPrettyParser.cmake
index 432c33c31..c2ba3dbc7 100644
--- a/cmake/AddPrettyParser.cmake
+++ b/cmake/AddPrettyParser.cmake
@@ -1,6 +1,6 @@
 # Add a new parser library
 function(add_pretty_parser TARGET)
-    cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C" "" "DEPS;DEPS_INTERFACE;PRECOMPILED_HEADERS;SOURCES")
+    cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C" "" "DEPS;DEPS_PUBLIC;DEPS_INTERFACE;PRECOMPILED_HEADERS;SOURCES")
 
     if(OPTIONS_C)
         add_library(${TARGET}c SHARED ${${PROJECT_NAME}c_SOURCES} ${OPTIONS_PRECOMPILED_HEADERS} ${OPTIONS_SOURCES})
@@ -16,8 +16,8 @@ function(add_pretty_parser TARGET)
     if(NOT ("PRECOMPILED_HEADERS" IN_LIST OPTIONS_UNPARSED_ARGUMENTS))
         target_precompile_headers(${TARGET} PUBLIC ${OPTIONS_HEADERS})
     endif()
-    target_link_libraries(${TARGET} PUBLIC ${PROJECT_NAME})
     target_link_libraries(${TARGET} PRIVATE ${OPTIONS_DEPS})
+    target_link_libraries(${TARGET} PUBLIC ${PROJECT_NAME} ${OPTIONS_DEPS_PUBLIC})
     target_link_libraries(${TARGET} INTERFACE ${OPTIONS_DEPS_INTERFACE})
 
     # Define DEBUG macro
diff --git a/cmake/AddSourcePPLibrary.cmake b/cmake/AddSourcePPLibrary.cmake
index 18a760ed6..581dfd071 100644
--- a/cmake/AddSourcePPLibrary.cmake
+++ b/cmake/AddSourcePPLibrary.cmake
@@ -1,7 +1,9 @@
 function(add_sourcepp_library TARGET)
-    cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C;CSHARP;NO_TEST;BENCH" "" "")
+    cmake_parse_arguments(PARSE_ARGV 1 OPTIONS "C;CSHARP;PYTHON;NO_TEST;BENCH" "" "")
     string(TOUPPER ${TARGET} TARGET_UPPER)
     if(SOURCEPP_USE_${TARGET_UPPER})
+        set(PROPAGATE_VARS "")
+
         # Add C++
         include("${CMAKE_CURRENT_SOURCE_DIR}/src/${TARGET}/_${TARGET}.cmake")
 
@@ -11,23 +13,31 @@ function(add_sourcepp_library TARGET)
         endif()
 
         # Add C#
-        if(OPTIONS_CSHARP)
+        if(SOURCEPP_BUILD_CSHARP_WRAPPERS AND OPTIONS_CSHARP)
             configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/Buffer.cs.in"     "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/Buffer.cs")
             configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/String.cs.in"     "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/String.cs")
             configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/sourcepp/TARGET.csproj.in" "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/${TARGET}.csproj")
+            add_custom_target(${TARGET}_csharp DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/lang/csharp/src/${TARGET}/${TARGET}.csproj")
+            add_dependencies(${TARGET}_csharp ${TARGET}c)
         endif()
 
-        set(PROPAGATE_VARS "")
+        # Add Python
+        if(SOURCEPP_BUILD_PYTHON_WRAPPERS AND OPTIONS_PYTHON)
+            list(APPEND ${SOURCEPP_PYTHON_NAME}_DEPS sourcepp::${TARGET})
+            list(APPEND ${SOURCEPP_PYTHON_NAME}_DEFINES ${TARGET_UPPER})
+            list(APPEND ${SOURCEPP_PYTHON_NAME}_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/lang/python/src/${TARGET}.h")
+            list(APPEND PROPAGATE_VARS ${SOURCEPP_PYTHON_NAME}_DEPS ${SOURCEPP_PYTHON_NAME}_DEFINES ${SOURCEPP_PYTHON_NAME}_SOURCES)
+        endif()
 
         # Add tests
-        if(NOT OPTIONS_NO_TEST AND SOURCEPP_BUILD_TESTS)
+        if(SOURCEPP_BUILD_TESTS AND NOT OPTIONS_NO_TEST)
             list(APPEND ${SOURCEPP_TEST_NAME}_DEPS sourcepp::${TARGET})
             list(APPEND ${SOURCEPP_TEST_NAME}_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/test/${TARGET}.cpp")
             list(APPEND PROPAGATE_VARS ${SOURCEPP_TEST_NAME}_DEPS ${SOURCEPP_TEST_NAME}_SOURCES)
         endif()
 
         # Add benchmarks
-        if(OPTIONS_BENCH AND SOURCEPP_BUILD_BENCHMARKS)
+        if(SOURCEPP_BUILD_BENCHMARKS AND OPTIONS_BENCH)
             add_executable(${TARGET}_bench "${CMAKE_CURRENT_SOURCE_DIR}/bench/${TARGET}.cpp")
             target_link_libraries(${TARGET}_bench PUBLIC ${SOURCEPP_BENCH_NAME} sourcepp::${TARGET})
             include("${CMAKE_CURRENT_SOURCE_DIR}/bench/${TARGET}.cmake")
diff --git a/docs/index.md b/docs/index.md
index 8d4ff20e3..622f7d4d8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -22,14 +22,14 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <th>Wrappers</th>
   </tr>
   <tr>
-    <td rowspan="1"><code>bsppp</code></td>
+    <td rowspan="1"><code>bsppp</code><sup>*</sup></td>
     <td><a href="https://developer.valvesoftware.com/wiki/BSP_(Source)">BSP</a> v17-27</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
     <td rowspan="1" align="center"></td>
   </tr>
   <tr style="background: none;">
-    <td rowspan="1"><code>dmxpp</code></td>
+    <td rowspan="1"><code>dmxpp</code><sup>*</sup></td>
     <td><a href="https://developer.valvesoftware.com/wiki/DMX">DMX</a> Binary v1-5</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
@@ -40,7 +40,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td>Get Source engine instance window title/position/size</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
-    <td rowspan="2" align="center"></td>
+    <td rowspan="2" align="center">C<br>Python</td>
   </tr>
   <tr>
     <td>Run commands in a Source engine instance remotely</td>
@@ -49,14 +49,14 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
   <tr>
     <td rowspan="1"><code>kvpp</code></td>
-    <td><a href="https://developer.valvesoftware.com/wiki/KeyValues">KeyValues</a> v1<sup>*</sup></td>
+    <td><a href="https://developer.valvesoftware.com/wiki/KeyValues">KeyValues</a> Text v1<sup>&dagger;</sup></td>
     <td align="center">✅</td>
     <td align="center">✅</td>
     <td rowspan="1" align="center"></td>
   </tr>
   <tr>
-    <td rowspan="3"><code>mdlpp</code></td>
-    <td><a href="https://developer.valvesoftware.com/wiki/MDL_(Source)">MDL</a> v44-49<sup>&dagger;</sup></td>
+    <td rowspan="3"><code>mdlpp</code><sup>*</sup></td>
+    <td><a href="https://developer.valvesoftware.com/wiki/MDL_(Source)">MDL</a> v44-49</td>
     <td align="center">✅</td>
     <td align="center">❌</td>
     <td rowspan="3" align="center"></td>
@@ -76,7 +76,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td>Find Steam install folder</td>
     <td align="center">✅</td>
     <td align="center">-</td>
-    <td rowspan="2" align="center">C</td>
+    <td rowspan="2" align="center">C<br>Python</td>
   </tr>
   <tr>
     <td>Find installed Steam games</td>
@@ -92,7 +92,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     </td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="2" align="center"></td>
+    <td rowspan="2" align="center">Python</td>
   </tr>
   <tr>
     <td>
@@ -107,7 +107,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td><a href="https://developer.valvesoftware.com/wiki/VICE">VICE</a> encrypted files</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="2" align="center">C<br>C#</td>
+    <td rowspan="2" align="center">C<br>C#<br>Python</td>
   </tr>
   <tr>
     <td><a href="https://developer.valvesoftware.com/wiki/Vfont">VFONT</a> encrypted fonts</td>
@@ -163,7 +163,7 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
   </tr>
   <tr>
     <td>
-      <a href="https://developer.valvesoftware.com/wiki/VPK">VPK</a> v1-2, v54
+      <a href="https://developer.valvesoftware.com/wiki/VPK">VPK</a> pre-v1, v1-2, v54
       <br> &bull; <a href="https://www.counter-strike.net/cs2">Counter-Strike: 2</a> modifications
       <br> &bull; <a href="https://clientmod.ru">Counter-Strike: Source ClientMod</a> modifications
     </td>
@@ -181,16 +181,66 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
     <td align="center">✅</td>
   </tr>
   <tr>
-    <td>ZIP (and BZ2, GZ, XZ, ZSTD)</td>
+    <td>ZIP</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
   </tr>
   <tr>
-    <td rowspan="2"><code>vtfpp</code></td>
+    <td rowspan="12"><code>vtfpp</code></td>
+    <td><a href="https://en.wikipedia.org/wiki/BMP_file_format">BMP</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td rowspan="12" align="center">Python</td>
+  </tr>
+  <tr>
+    <td><a href="https://openexr.com">EXR</a> v1</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/GIF">GIF</a></td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/RGBE_image_format">HDR</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/JPEG">JPEG</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td>PIC</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/PNG">PNG</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td><a href="https://netpbm.sourceforge.net/doc/pnm.html">PNM</a> (PGM, PPM)</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
     <td><a href="https://developer.valvesoftware.com/wiki/PPL">PPL</a> v0</td>
     <td align="center">✅</td>
     <td align="center">✅</td>
-    <td rowspan="2" align="center"></td>
+  </tr>
+  <tr>
+    <td><a href="https://www.adobe.com/creativecloud/file-types/image/raster/psd-file.html">PSD</a></td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td><a href="https://en.wikipedia.org/wiki/Truevision_TGA">TGA</a></td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
   </tr>
   <tr>
     <td>
@@ -203,9 +253,16 @@ Several modern C++20 libraries for sanely parsing Valve formats, rolled into one
 </table>
 \endhtmlonly
 
-(\*) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VDF](https://developer.valvesoftware.com/wiki/VDF), [VMT](https://developer.valvesoftware.com/wiki/VMT), and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)).
+(\*) These libraries are incomplete and still in development. Their interfaces are unstable and will likely change in the future.
+Libraries not starred should be considered stable, and their existing interfaces will not change much if at all. Note that wrappers
+only exist for stable libraries.
+
+(&dagger;) Many text-based formats in Source are close to (if not identical to) KeyValues v1, such as [VMT](https://developer.valvesoftware.com/wiki/VMT) and [VMF](https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)).
+
+## Wrappers
 
-(&dagger;) The MDL parser is not complete. It is usable in its current state, but it does not currently parse more complex components like animations. This parser is still in development.
+Wrappers for libraries considered complete exist for C, C#, and/or Python, depending on the library. The Python wrappers can be
+found on PyPI in the [sourcepp](https://pypi.org/project/sourcepp) package.
 
 ## Special Thanks
 
diff --git a/ext/_ext.cmake b/ext/_ext.cmake
index 54964b0b7..530c62412 100644
--- a/ext/_ext.cmake
+++ b/ext/_ext.cmake
@@ -21,6 +21,10 @@ if(NOT TARGET cryptopp::cryptopp)
 endif()
 
 
+# half
+add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/half")
+
+
 # hat-trie
 if(NOT TARGET tsl::hat_trie)
     add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/hat-trie")
@@ -38,11 +42,11 @@ endif()
 
 
 # minizip-ng (guard this behind vpkpp because this is a big dependency)
-if(SOURCEPP_USE_VPKPP AND NOT TARGET MINIZIP::minizip)
+if((SOURCEPP_USE_VPKPP OR SOURCEPP_USE_VTFPP) AND NOT TARGET MINIZIP::minizip)
     set(MZ_COMPAT           OFF CACHE INTERNAL "")
     # todo: guard liblzma/xz force-enable behind BSP compression option
     set(MZ_LZMA             ON  CACHE INTERNAL "" FORCE)
-    if(SOURCEPP_VPKPP_SUPPORT_VPK_V54)
+    if(SOURCEPP_USE_VTFPP OR SOURCEPP_VPKPP_SUPPORT_VPK_V54)
         set(MZ_ZSTD         ON  CACHE INTERNAL "" FORCE)
     endif()
     set(MZ_FETCH_LIBS       ON  CACHE INTERNAL "" FORCE)
@@ -73,7 +77,7 @@ endif()
 
 function(sourcepp_add_opencl TARGET)
     if(SOURCEPP_BUILD_WITH_OPENCL)
-        target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_OPENCL)
+        target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_OPENCL)
         target_link_libraries(${TARGET} PRIVATE OpenCL::OpenCL)
     endif()
 endfunction()
@@ -83,10 +87,14 @@ endfunction()
 add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/stb")
 
 
-# TBB
+# tinyexr
+add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/tinyexr")
+
+
+# tbb
 function(sourcepp_add_tbb TARGET)
     if(SOURCEPP_BUILD_WITH_TBB)
-        target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_TBB)
+        target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_TBB)
         if(NOT MSVC)
             target_link_libraries(${TARGET} PRIVATE tbb)
         endif()
@@ -106,7 +114,7 @@ endif()
 
 function(sourcepp_add_threads TARGET)
     if(SOURCEPP_BUILD_WITH_THREADS)
-        target_compile_definitions(${TARGET} PRIVATE SOURCEPP_BUILD_WITH_THREADS)
+        target_compile_definitions(${TARGET} PUBLIC SOURCEPP_BUILD_WITH_THREADS)
         target_link_libraries(${TARGET} PRIVATE Threads::Threads)
     endif()
 endfunction()
diff --git a/ext/bufferstream b/ext/bufferstream
index 689c50d56..fa4160118 160000
--- a/ext/bufferstream
+++ b/ext/bufferstream
@@ -1 +1 @@
-Subproject commit 689c50d56a0eefb066209c281eac99599845edb6
+Subproject commit fa4160118b06b84706a7a4766a3b487fd5c9a6d0
diff --git a/ext/compressonator/CMakeLists.txt b/ext/compressonator/CMakeLists.txt
index a4757f151..22cc64f15 100644
--- a/ext/compressonator/CMakeLists.txt
+++ b/ext/compressonator/CMakeLists.txt
@@ -1,4 +1,6 @@
-set(COMPRESSONATOR_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "")
+include_guard(GLOBAL)
+
+set(COMPRESSONATOR_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL "")
 
 function(target_link_compressonator TARGET)
     if(WIN32)
@@ -20,8 +22,8 @@ function(target_link_compressonator TARGET)
         endif()
     elseif(APPLE)
         target_link_libraries(${TARGET} PRIVATE
-                "${COMPRESSONATOR_DIR}/lib/macOS/libCompressonator$<$<CONFIG:Debug>:d>.a"
-                "${COMPRESSONATOR_DIR}/lib/macOS/libCMP_Core$<$<CONFIG:Debug>:d>.a")
+                "${COMPRESSONATOR_DIR}/lib/macOS_arm64/libCompressonator$<$<CONFIG:Debug>:d>.a"
+                "${COMPRESSONATOR_DIR}/lib/macOS_arm64/libCMP_Core$<$<CONFIG:Debug>:d>.a")
     elseif(UNIX)
         target_link_libraries(${TARGET} PRIVATE
                 "${COMPRESSONATOR_DIR}/lib/linux_x86_64/libCompressonator$<$<CONFIG:Debug>:d>.a"
diff --git a/ext/compressonator/lib/macOS/libCMP_Core.a b/ext/compressonator/lib/macOS_arm64/libCMP_Core.a
similarity index 98%
rename from ext/compressonator/lib/macOS/libCMP_Core.a
rename to ext/compressonator/lib/macOS_arm64/libCMP_Core.a
index b334e5f1f..69947a390 100644
Binary files a/ext/compressonator/lib/macOS/libCMP_Core.a and b/ext/compressonator/lib/macOS_arm64/libCMP_Core.a differ
diff --git a/ext/compressonator/lib/macOS/libCMP_Cored.a b/ext/compressonator/lib/macOS_arm64/libCMP_Cored.a
similarity index 51%
rename from ext/compressonator/lib/macOS/libCMP_Cored.a
rename to ext/compressonator/lib/macOS_arm64/libCMP_Cored.a
index 523764d44..68c2458ee 100644
Binary files a/ext/compressonator/lib/macOS/libCMP_Cored.a and b/ext/compressonator/lib/macOS_arm64/libCMP_Cored.a differ
diff --git a/ext/compressonator/lib/macOS/libCompressonator.a b/ext/compressonator/lib/macOS_arm64/libCompressonator.a
similarity index 89%
rename from ext/compressonator/lib/macOS/libCompressonator.a
rename to ext/compressonator/lib/macOS_arm64/libCompressonator.a
index 6ac269e9e..2fa0c0702 100644
Binary files a/ext/compressonator/lib/macOS/libCompressonator.a and b/ext/compressonator/lib/macOS_arm64/libCompressonator.a differ
diff --git a/ext/compressonator/lib/macOS/libCompressonatord.a b/ext/compressonator/lib/macOS_arm64/libCompressonatord.a
similarity index 61%
rename from ext/compressonator/lib/macOS/libCompressonatord.a
rename to ext/compressonator/lib/macOS_arm64/libCompressonatord.a
index 5c5e41bf9..203957a5b 100644
Binary files a/ext/compressonator/lib/macOS/libCompressonatord.a and b/ext/compressonator/lib/macOS_arm64/libCompressonatord.a differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib
index 0efe277be..9efa4b405 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib
index b578fb151..4fc8a1a28 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib
index c08519b47..25f22e752 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib
index 97202b42f..e10065294 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVX512d.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib
index 99a1b33cc..482bcc8e5 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_AVXd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib
index 014aebdd5..de5062d49 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSE.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib
index 61d410a9a..f3c42d72f 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MD_SSEd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib
index 95c3ef771..0d6f7aa66 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MDd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib
index 62660ca70..7a56db967 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib
index 87defa8f6..f0b4cfc27 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib
index 7a8681eff..3150b8ed9 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib
index 41ef859f2..af3fbcc5e 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVX512d.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib
index 9db8d8d7c..932ae1f1b 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_AVXd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib
index 2bf68a0ee..05f719b66 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSE.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib
index 538cee480..b720f1c7a 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MT_SSEd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib b/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib
index 1a21c9d03..5c19763da 100644
Binary files a/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib and b/ext/compressonator/lib/win_x86_64/CMP_Core_MTd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib
index 1c3f6b54c..f6cb96cda 100644
Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MD.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib
index 46aef8db7..79d3aad1c 100644
Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MDd.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib
index 49197349a..9f7df3aae 100644
Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MT.lib differ
diff --git a/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib b/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib
index d863c69f8..2b3d81cc3 100644
Binary files a/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib and b/ext/compressonator/lib/win_x86_64/Compressonator_MTd.lib differ
diff --git a/ext/half/CMakeLists.txt b/ext/half/CMakeLists.txt
new file mode 100644
index 000000000..516cacabb
--- /dev/null
+++ b/ext/half/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.16)
+project(sourcepp_half)
+set(CMAKE_CXX_STANDARD 20)
+
+# Create library
+add_library(${PROJECT_NAME} INTERFACE
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/half.hpp")
+
+target_include_directories(${PROJECT_NAME} INTERFACE
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
diff --git a/ext/half/LICENSE b/ext/half/LICENSE
new file mode 100644
index 000000000..6023222b0
--- /dev/null
+++ b/ext/half/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2012-2021 Christian Rau
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/ext/half/include/half.hpp b/ext/half/include/half.hpp
new file mode 100644
index 000000000..d0a882dd6
--- /dev/null
+++ b/ext/half/include/half.hpp
@@ -0,0 +1,4601 @@
+// half - IEEE 754-based half-precision floating-point library.
+//
+// Copyright (c) 2012-2021 Christian Rau <rauy@users.sourceforge.net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+// Version 2.2.0
+
+/// \file
+/// Main header file for half-precision functionality.
+
+#ifndef HALF_HALF_HPP
+#define HALF_HALF_HPP
+
+#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__)
+
+#if defined(__INTEL_COMPILER)
+	#define HALF_ICC_VERSION __INTEL_COMPILER
+#elif defined(__ICC)
+	#define HALF_ICC_VERSION __ICC
+#elif defined(__ICL)
+	#define HALF_ICC_VERSION __ICL
+#else
+	#define HALF_ICC_VERSION 0
+#endif
+
+// check C++11 language features
+#if defined(__clang__)										// clang
+	#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+		#define HALF_ENABLE_CPP11_CONSTEXPR 1
+	#endif
+	#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+		#define HALF_ENABLE_CPP11_NOEXCEPT 1
+	#endif
+	#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+		#define HALF_ENABLE_CPP11_USER_LITERALS 1
+	#endif
+	#if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL)
+		#define HALF_ENABLE_CPP11_THREAD_LOCAL 1
+	#endif
+	#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif
+#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__)		// Intel C++
+	#if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL)
+		#define HALF_ENABLE_CPP11_THREAD_LOCAL 1
+	#endif
+	#if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+		#define HALF_ENABLE_CPP11_USER_LITERALS 1
+	#endif
+	#if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+		#define HALF_ENABLE_CPP11_CONSTEXPR 1
+	#endif
+	#if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+		#define HALF_ENABLE_CPP11_NOEXCEPT 1
+	#endif
+	#if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif
+#elif defined(__GNUC__)										// gcc
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
+		#if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL)
+			#define HALF_ENABLE_CPP11_THREAD_LOCAL 1
+		#endif
+		#if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+			#define HALF_ENABLE_CPP11_USER_LITERALS 1
+		#endif
+		#if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+			#define HALF_ENABLE_CPP11_CONSTEXPR 1
+		#endif
+		#if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+			#define HALF_ENABLE_CPP11_NOEXCEPT 1
+		#endif
+		#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+			#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+		#endif
+		#if !defined(HALF_ENABLE_CPP11_LONG_LONG)
+			#define HALF_ENABLE_CPP11_LONG_LONG 1
+		#endif
+	#endif
+	#define HALF_TWOS_COMPLEMENT_INT 1
+#elif defined(_MSC_VER)										// Visual C++
+	#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL)
+		#define HALF_ENABLE_CPP11_THREAD_LOCAL 1
+	#endif
+	#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+		#define HALF_ENABLE_CPP11_USER_LITERALS 1
+	#endif
+	#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+		#define HALF_ENABLE_CPP11_CONSTEXPR 1
+	#endif
+	#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+		#define HALF_ENABLE_CPP11_NOEXCEPT 1
+	#endif
+	#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+		#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+	#endif
+	#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+		#define HALF_ENABLE_CPP11_LONG_LONG 1
+	#endif
+	#define HALF_TWOS_COMPLEMENT_INT 1
+	#define HALF_POP_WARNINGS 1
+	#pragma warning(push)
+	#pragma warning(disable : 4099 4127 4146)	//struct vs class, constant in if, negative unsigned
+#endif
+
+// check C++11 library features
+#include <utility>
+#if defined(_LIBCPP_VERSION)								// libc++
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+		#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
+			#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CSTDINT
+			#define HALF_ENABLE_CPP11_CSTDINT 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CMATH
+			#define HALF_ENABLE_CPP11_CMATH 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_HASH
+			#define HALF_ENABLE_CPP11_HASH 1
+		#endif
+		#ifndef HALF_ENABLE_CPP11_CFENV
+			#define HALF_ENABLE_CPP11_CFENV 1
+		#endif
+	#endif
+#elif defined(__GLIBCXX__)									// libstdc++
+	#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+		#ifdef __clang__
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
+				#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+				#define HALF_ENABLE_CPP11_CSTDINT 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH)
+				#define HALF_ENABLE_CPP11_CMATH 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH)
+				#define HALF_ENABLE_CPP11_HASH 1
+			#endif
+			#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV)
+				#define HALF_ENABLE_CPP11_CFENV 1
+			#endif
+		#else
+			#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
+				#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+			#endif
+			#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+				#define HALF_ENABLE_CPP11_CSTDINT 1
+			#endif
+			#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH)
+				#define HALF_ENABLE_CPP11_CMATH 1
+			#endif
+			#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH)
+				#define HALF_ENABLE_CPP11_HASH 1
+			#endif
+			#if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV)
+				#define HALF_ENABLE_CPP11_CFENV 1
+			#endif
+		#endif
+	#endif
+#elif defined(_CPPLIB_VER)									// Dinkumware/Visual C++
+	#if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
+		#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+	#endif
+	#if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+			#define HALF_ENABLE_CPP11_CSTDINT 1
+	#endif
+	#if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH)
+		#define HALF_ENABLE_CPP11_HASH 1
+	#endif
+	#if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH)
+		#define HALF_ENABLE_CPP11_CMATH 1
+	#endif
+	#if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV)
+		#define HALF_ENABLE_CPP11_CFENV 1
+	#endif
+#endif
+#undef HALF_GCC_VERSION
+#undef HALF_ICC_VERSION
+
+// any error throwing C++ exceptions?
+#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT)
+#define HALF_ERRHANDLING_THROWS 1
+#endif
+
+// any error handling enabled?
+#define HALF_ERRHANDLING	(HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)
+
+#if HALF_ERRHANDLING
+	#define HALF_UNUSED_NOERR(name) name
+#else
+	#define HALF_UNUSED_NOERR(name)
+#endif
+
+// support constexpr
+#if HALF_ENABLE_CPP11_CONSTEXPR
+	#define HALF_CONSTEXPR				constexpr
+	#define HALF_CONSTEXPR_CONST		constexpr
+	#if HALF_ERRHANDLING
+		#define HALF_CONSTEXPR_NOERR
+	#else
+		#define HALF_CONSTEXPR_NOERR	constexpr
+	#endif
+#else
+	#define HALF_CONSTEXPR
+	#define HALF_CONSTEXPR_CONST		const
+	#define HALF_CONSTEXPR_NOERR
+#endif
+
+// support noexcept
+#if HALF_ENABLE_CPP11_NOEXCEPT
+	#define HALF_NOEXCEPT	noexcept
+	#define HALF_NOTHROW	noexcept
+#else
+	#define HALF_NOEXCEPT
+	#define HALF_NOTHROW	throw()
+#endif
+
+// support thread storage
+#if HALF_ENABLE_CPP11_THREAD_LOCAL
+	#define HALF_THREAD_LOCAL	thread_local
+#else
+	#define HALF_THREAD_LOCAL	static
+#endif
+
+#include <utility>
+#include <algorithm>
+#include <istream>
+#include <ostream>
+#include <limits>
+#include <stdexcept>
+#include <climits>
+#include <cmath>
+#include <cstring>
+#include <cstdlib>
+#if HALF_ENABLE_CPP11_TYPE_TRAITS
+	#include <type_traits>
+#endif
+#if HALF_ENABLE_CPP11_CSTDINT
+	#include <cstdint>
+#endif
+#if HALF_ERRHANDLING_ERRNO
+	#include <cerrno>
+#endif
+#if HALF_ENABLE_CPP11_CFENV
+	#include <cfenv>
+#endif
+#if HALF_ENABLE_CPP11_HASH
+	#include <functional>
+#endif
+
+
+#ifndef HALF_ENABLE_F16C_INTRINSICS
+	/// Enable F16C intruction set intrinsics.
+	/// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between 
+	/// half-precision and single-precision values which may result in improved performance. This will not perform additional checks 
+	/// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.
+	///
+	/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms.
+	#define HALF_ENABLE_F16C_INTRINSICS __F16C__
+#endif
+#if HALF_ENABLE_F16C_INTRINSICS
+	#include <immintrin.h>
+#endif
+
+#ifdef HALF_DOXYGEN_ONLY
+/// Type for internal floating-point computations.
+/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal 
+/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available). 
+/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to 
+/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions.
+#define HALF_ARITHMETIC_TYPE (undefined)
+
+/// Enable internal exception flags.
+/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to 
+/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept().
+#define HALF_ERRHANDLING_FLAGS	0
+
+/// Enable exception propagation to `errno`.
+/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to 
+/// [errno](https://en.cppreference.com/w/cpp/error/errno) from `<cerrno>`. Specifically this will propagate domain errors as 
+/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as 
+/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated.
+#define HALF_ERRHANDLING_ERRNO	0
+
+/// Enable exception propagation to built-in floating-point platform.
+/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in 
+/// single- and double-precision implementation's exception flags using the 
+/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from `<cfenv>`. However, this 
+/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision 
+/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags.
+#define HALF_ERRHANDLING_FENV	0
+
+/// Throw C++ exception on domain errors.
+/// Defining this to a string literal causes operations on half-precision values to throw a 
+/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors.
+#define HALF_ERRHANDLING_THROW_INVALID		(undefined)
+
+/// Throw C++ exception on pole errors.
+/// Defining this to a string literal causes operations on half-precision values to throw a 
+/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors.
+#define HALF_ERRHANDLING_THROW_DIVBYZERO	(undefined)
+
+/// Throw C++ exception on overflow errors.
+/// Defining this to a string literal causes operations on half-precision values to throw a 
+/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows.
+#define HALF_ERRHANDLING_THROW_OVERFLOW		(undefined)
+
+/// Throw C++ exception on underflow errors.
+/// Defining this to a string literal causes operations on half-precision values to throw a 
+/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows.
+#define HALF_ERRHANDLING_THROW_UNDERFLOW	(undefined)
+
+/// Throw C++ exception on rounding errors.
+/// Defining this to 1 causes operations on half-precision values to throw a 
+/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors.
+#define HALF_ERRHANDLING_THROW_INEXACT		(undefined)
+#endif
+
+#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
+/// Raise INEXACT exception on overflow.
+/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition.
+/// These will be raised after any possible handling of the underflow exception.
+#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT	1
+#endif
+
+#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+/// Raise INEXACT exception on underflow.
+/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition.
+/// These will be raised after any possible handling of the underflow exception.
+///
+/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result 
+/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result.
+#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT	1
+#endif
+
+/// Default rounding mode.
+/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types 
+/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical 
+/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective 
+/// constants or the equivalent values of 
+/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style):
+///
+/// `std::float_round_style`         | value | rounding
+/// ---------------------------------|-------|-------------------------
+/// `std::round_indeterminate`       | -1    | fastest
+/// `std::round_toward_zero`         | 0     | toward zero
+/// `std::round_to_nearest`          | 1     | to nearest (default)
+/// `std::round_toward_infinity`     | 2     | toward positive infinity
+/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
+///
+/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even 
+/// be set to [std::numeric_limits<float>::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize 
+/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though).
+#ifndef HALF_ROUND_STYLE
+	#define HALF_ROUND_STYLE	1		// = std::round_to_nearest
+#endif
+
+/// Value signaling overflow.
+/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow of an 
+/// operation, in particular it just evaluates to positive infinity.
+///
+/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL)
+#define HUGE_VALH	std::numeric_limits<half_float::half>::infinity()
+
+/// Fast half-precision fma function.
+/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate 
+/// half-precision multiplication followed by an addition, which is always the case.
+///
+/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma)
+#define FP_FAST_FMAH	1
+
+///	Half rounding mode.
+/// In correspondence with `FLT_ROUNDS` from `<cfloat>` this symbol expands to the rounding mode used for 
+/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE).
+///
+/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS)
+#define HLF_ROUNDS	HALF_ROUND_STYLE
+
+#ifndef FP_ILOGB0
+	#define FP_ILOGB0		INT_MIN
+#endif
+#ifndef FP_ILOGBNAN
+	#define FP_ILOGBNAN		INT_MAX
+#endif
+#ifndef FP_SUBNORMAL
+	#define FP_SUBNORMAL	0
+#endif
+#ifndef FP_ZERO
+	#define FP_ZERO			1
+#endif
+#ifndef FP_NAN
+	#define FP_NAN			2
+#endif
+#ifndef FP_INFINITE
+	#define FP_INFINITE		3
+#endif
+#ifndef FP_NORMAL
+	#define FP_NORMAL		4
+#endif
+
+#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT)
+	#define FE_INVALID		0x10
+	#define FE_DIVBYZERO	0x08
+	#define FE_OVERFLOW		0x04
+	#define FE_UNDERFLOW	0x02
+	#define FE_INEXACT		0x01
+	#define FE_ALL_EXCEPT	(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)
+#endif
+
+
+/// Main namespace for half-precision functionality.
+/// This namespace contains all the functionality provided by the library.
+namespace half_float
+{
+	class half;
+
+#if HALF_ENABLE_CPP11_USER_LITERALS
+	/// Library-defined half-precision literals.
+	/// Import this namespace to enable half-precision floating-point literals:
+	/// ~~~~{.cpp}
+	/// using namespace half_float::literal;
+	/// half_float::half = 4.2_h;
+	/// ~~~~
+	namespace literal
+	{
+		half operator "" _h(long double);
+	}
+#endif
+
+	/// \internal
+	/// \brief Implementation details.
+	namespace detail
+	{
+	#if HALF_ENABLE_CPP11_TYPE_TRAITS
+		/// Conditional type.
+		template<bool B,typename T,typename F> struct conditional : std::conditional<B,T,F> {};
+
+		/// Helper for tag dispatching.
+		template<bool B> struct bool_type : std::integral_constant<bool,B> {};
+		using std::true_type;
+		using std::false_type;
+
+		/// Type traits for floating-point types.
+		template<typename T> struct is_float : std::is_floating_point<T> {};
+	#else
+		/// Conditional type.
+		template<bool,typename T,typename> struct conditional { typedef T type; };
+		template<typename T,typename F> struct conditional<false,T,F> { typedef F type; };
+
+		/// Helper for tag dispatching.
+		template<bool> struct bool_type {};
+		typedef bool_type<true> true_type;
+		typedef bool_type<false> false_type;
+
+		/// Type traits for floating-point types.
+		template<typename> struct is_float : false_type {};
+		template<typename T> struct is_float<const T> : is_float<T> {};
+		template<typename T> struct is_float<volatile T> : is_float<T> {};
+		template<typename T> struct is_float<const volatile T> : is_float<T> {};
+		template<> struct is_float<float> : true_type {};
+		template<> struct is_float<double> : true_type {};
+		template<> struct is_float<long double> : true_type {};
+	#endif
+
+		/// Type traits for floating-point bits.
+		template<typename T> struct bits { typedef unsigned char type; };
+		template<typename T> struct bits<const T> : bits<T> {};
+		template<typename T> struct bits<volatile T> : bits<T> {};
+		template<typename T> struct bits<const volatile T> : bits<T> {};
+
+	#if HALF_ENABLE_CPP11_CSTDINT
+		/// Unsigned integer of (at least) 16 bits width.
+		typedef std::uint_least16_t uint16;
+
+		/// Fastest unsigned integer of (at least) 32 bits width.
+		typedef std::uint_fast32_t uint32;
+
+		/// Fastest signed integer of (at least) 32 bits width.
+		typedef std::int_fast32_t int32;
+
+		/// Unsigned integer of (at least) 32 bits width.
+		template<> struct bits<float> { typedef std::uint_least32_t type; };
+
+		/// Unsigned integer of (at least) 64 bits width.
+		template<> struct bits<double> { typedef std::uint_least64_t type; };
+	#else
+		/// Unsigned integer of (at least) 16 bits width.
+		typedef unsigned short uint16;
+
+		/// Fastest unsigned integer of (at least) 32 bits width.
+		typedef unsigned long uint32;
+
+		/// Fastest unsigned integer of (at least) 32 bits width.
+		typedef long int32;
+
+		/// Unsigned integer of (at least) 32 bits width.
+		template<> struct bits<float> : conditional<std::numeric_limits<unsigned int>::digits>=32,unsigned int,unsigned long> {};
+
+		#if HALF_ENABLE_CPP11_LONG_LONG
+			/// Unsigned integer of (at least) 64 bits width.
+			template<> struct bits<double> : conditional<std::numeric_limits<unsigned long>::digits>=64,unsigned long,unsigned long long> {};
+		#else
+			/// Unsigned integer of (at least) 64 bits width.
+			template<> struct bits<double> { typedef unsigned long type; };
+		#endif
+	#endif
+
+	#ifdef HALF_ARITHMETIC_TYPE
+		/// Type to use for arithmetic computations and mathematic functions internally.
+		typedef HALF_ARITHMETIC_TYPE internal_t;
+	#endif
+
+		/// Tag type for binary construction.
+		struct binary_t {};
+
+		/// Tag for binary construction.
+		HALF_CONSTEXPR_CONST binary_t binary = binary_t();
+
+		/// \name Implementation defined classification and arithmetic
+		/// \{
+
+		/// Check for infinity.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if infinity
+		/// \retval false else
+		template<typename T> bool builtin_isinf(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::isinf(arg);
+		#elif defined(_MSC_VER)
+			return !::_finite(static_cast<double>(arg)) && !::_isnan(static_cast<double>(arg));
+		#else
+			return arg == std::numeric_limits<T>::infinity() || arg == -std::numeric_limits<T>::infinity();
+		#endif
+		}
+
+		/// Check for NaN.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if not a number
+		/// \retval false else
+		template<typename T> bool builtin_isnan(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::isnan(arg);
+		#elif defined(_MSC_VER)
+			return ::_isnan(static_cast<double>(arg)) != 0;
+		#else
+			return arg != arg;
+		#endif
+		}
+
+		/// Check sign.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if signbit set
+		/// \retval false else
+		template<typename T> bool builtin_signbit(T arg)
+		{
+		#if HALF_ENABLE_CPP11_CMATH
+			return std::signbit(arg);
+		#else
+			return arg < T() || (arg == T() && T(1)/arg < T());
+		#endif
+		}
+
+		/// Platform-independent sign mask.
+		/// \param arg integer value in two's complement
+		/// \retval -1 if \a arg negative
+		/// \retval 0 if \a arg positive
+		inline uint32 sign_mask(uint32 arg)
+		{
+			static const int N = std::numeric_limits<uint32>::digits - 1;
+		#if HALF_TWOS_COMPLEMENT_INT
+			return static_cast<int32>(arg) >> N;
+		#else
+			return -((arg>>N)&1);
+		#endif
+		}
+
+		/// Platform-independent arithmetic right shift.
+		/// \param arg integer value in two's complement
+		/// \param i shift amount (at most 31)
+		/// \return \a arg right shifted for \a i bits with possible sign extension
+		inline uint32 arithmetic_shift(uint32 arg, int i)
+		{
+		#if HALF_TWOS_COMPLEMENT_INT
+			return static_cast<int32>(arg) >> i;
+		#else
+			return static_cast<int32>(arg)/(static_cast<int32>(1)<<i) - ((arg>>(std::numeric_limits<uint32>::digits-1))&1);
+		#endif
+		}
+
+		/// \}
+		/// \name Error handling
+		/// \{
+
+		/// Internal exception flags.
+		/// \return reference to global exception flags
+		inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; }
+
+		/// Raise floating-point exception.
+		/// \param flags exceptions to raise
+		/// \param cond condition to raise exceptions for
+		inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true)
+		{
+		#if HALF_ERRHANDLING
+			if(!cond)
+				return;
+		#if HALF_ERRHANDLING_FLAGS
+			errflags() |= flags;
+		#endif
+		#if HALF_ERRHANDLING_ERRNO
+			if(flags & FE_INVALID)
+				errno = EDOM;
+			else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW))
+				errno = ERANGE;
+		#endif
+		#if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV
+			std::feraiseexcept(flags);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_INVALID
+			if(flags & FE_INVALID)
+				throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
+			if(flags & FE_DIVBYZERO)
+				throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_OVERFLOW
+			if(flags & FE_OVERFLOW)
+				throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
+			if(flags & FE_UNDERFLOW)
+				throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_INEXACT
+			if(flags & FE_INEXACT)
+				throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT);
+		#endif
+		#if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+			if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT))
+				raise(FE_INEXACT);
+		#endif
+		#if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
+			if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT))
+				raise(FE_INEXACT);
+		#endif
+		#endif
+		}
+
+		/// Check and signal for any NaN.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \retval true if either \a x or \a y is NaN
+		/// \retval false else
+		/// \exception FE_INVALID if \a x or \a y is NaN
+		inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00);
+		#endif
+			return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00;
+		}
+
+		/// Signal and silence signaling NaN.
+		/// \param nan half-precision NaN value
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a nan is signaling NaN
+		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, !(nan&0x200));
+		#endif
+			return nan | 0x200;
+		}
+
+		/// Signal and silence signaling NaNs.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a x or \a y is signaling NaN
+		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)));
+		#endif
+			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200);
+		}
+
+		/// Signal and silence signaling NaNs.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \param z third half-precision value to check
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
+		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200)));
+		#endif
+			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200);
+		}
+
+		/// Select value or signaling NaN.
+		/// \param x preferred half-precision value
+		/// \param y ignored half-precision value except for signaling NaN
+		/// \return \a y if signaling NaN, \a x otherwise
+		/// \exception FE_INVALID if \a y is signaling NaN
+		inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
+		{
+		#if HALF_ERRHANDLING
+			return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x;
+		#else
+			return x;
+		#endif
+		}
+
+		/// Raise domain error and return NaN.
+		/// return quiet NaN
+		/// \exception FE_INVALID
+		inline HALF_CONSTEXPR_NOERR unsigned int invalid()
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID);
+		#endif
+			return 0x7FFF;
+		}
+
+		/// Raise pole error and return infinity.
+		/// \param sign half-precision value with sign bit only
+		/// \return half-precision infinity with sign of \a sign
+		/// \exception FE_DIVBYZERO
+		inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_DIVBYZERO);
+		#endif
+			return sign | 0x7C00;
+		}
+
+		/// Check value for underflow.
+		/// \param arg non-zero half-precision value to check
+		/// \return \a arg
+		/// \exception FE_UNDERFLOW if arg is subnormal
+		inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg)
+		{
+		#if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+			raise(FE_UNDERFLOW, !(arg&0x7C00));
+		#endif
+			return arg;
+		}
+
+		/// \}
+		/// \name Conversion and rounding
+		/// \{
+
+		/// Half-precision overflow.
+		/// \tparam R rounding mode to use
+		/// \param sign half-precision value with sign bit only
+		/// \return rounded overflowing half-precision value
+		/// \exception FE_OVERFLOW
+		template<std::float_round_style R> HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_OVERFLOW);
+		#endif
+			return	(R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) :
+					(R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) :
+					(R==std::round_toward_zero) ? (sign|0x7BFF) :
+					(sign|0x7C00);
+		}
+
+		/// Half-precision underflow.
+		/// \tparam R rounding mode to use
+		/// \param sign half-precision value with sign bit only
+		/// \return rounded underflowing half-precision value
+		/// \exception FE_UNDERFLOW
+		template<std::float_round_style R> HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0)
+		{
+		#if HALF_ERRHANDLING
+			raise(FE_UNDERFLOW);
+		#endif
+			return	(R==std::round_toward_infinity) ? (sign+1-(sign>>15)) :
+					(R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) :
+					sign;
+		}
+
+		/// Round half-precision number.
+		/// \tparam R rounding mode to use
+		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
+		/// \param value finite half-precision number to round
+		/// \param g guard bit (most significant discarded bit)
+		/// \param s sticky bit (or of all but the most significant discarded bits)
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R,bool I> HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s)
+		{
+		#if HALF_ERRHANDLING
+			value +=	(R==std::round_to_nearest) ? (g&(s|value)) :
+						(R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) :
+						(R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0;
+			if((value&0x7C00) == 0x7C00)
+				raise(FE_OVERFLOW);
+			else if(value & 0x7C00)
+				raise(FE_INEXACT, I || (g|s)!=0);
+			else
+				raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0);
+			return value;
+		#else
+			return	(R==std::round_to_nearest) ? (value+(g&(s|value))) :
+					(R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) :
+					(R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) :
+					value;
+		#endif
+		}
+
+		/// Round half-precision number to nearest integer value.
+		/// \tparam R rounding mode to use
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
+		/// \param value half-precision value to round
+		/// \return half-precision bits for nearest integral value
+		/// \exception FE_INVALID for signaling NaN
+		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		template<std::float_round_style R,bool E,bool I> unsigned int integral(unsigned int value)
+		{
+			unsigned int abs = value & 0x7FFF;
+			if(abs < 0x3C00)
+			{
+				raise(FE_INEXACT, I);
+				return ((R==std::round_to_nearest) ? (0x3C00&-static_cast<unsigned>(abs>=(0x3800+E))) :
+						(R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) :
+						(R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast<unsigned>(value>0x8000)) :
+						0) | (value&0x8000);
+			}
+			if(abs >= 0x6400)
+				return (abs>0x7C00) ? signal(value) : value;
+			unsigned int exp = 25 - (abs>>10), mask = (1<<exp) - 1;
+			raise(FE_INEXACT, I && (value&mask));
+			return ((	(R==std::round_to_nearest) ? ((1<<(exp-1))-(~(value>>exp)&E)) :
+						(R==std::round_toward_infinity) ? (mask&((value>>15)-1)) :
+						(R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) :
+						0) + value) & ~mask;
+		}
+
+		/// Convert fixed point to half-precision floating-point.
+		/// \tparam R rounding mode to use
+		/// \tparam F number of fractional bits in [11,31]
+		/// \tparam S `true` for signed, `false` for unsigned
+		/// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F
+		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
+		/// \param m mantissa in Q1.F fixed point format
+		/// \param exp biased exponent - 1
+		/// \param sign half-precision value with sign bit only
+		/// \param s sticky bit (or of all but the most significant already discarded bits)
+		/// \return value converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R,unsigned int F,bool S,bool N,bool I> unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0)
+		{
+			if(S)
+			{
+				uint32 msign = sign_mask(m);
+				m = (m^msign) - msign;
+				sign = msign & 0x8000;
+			}
+			if(N)
+				for(; m<(static_cast<uint32>(1)<<F) && exp; m<<=1,--exp) ;
+			else if(exp < 0)
+				return rounded<R,I>(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast<uint32>(1)<<(F-11-exp))-1))!=0));
+			return rounded<R,I>(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast<uint32>(1)<<(F-11))-1))!=0));
+		}
+
+		/// Convert IEEE single-precision to half-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \tparam R rounding mode to use
+		/// \param value single-precision value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int float2half_impl(float value, true_type)
+		{
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value),
+				(R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT :
+				(R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO :
+				(R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF :
+				(R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF :
+				_MM_FROUND_CUR_DIRECTION));
+		#else
+			bits<float>::type fbits;
+			std::memcpy(&fbits, &value, sizeof(float));
+		#if 1
+			unsigned int sign = (fbits>>16) & 0x8000;
+			fbits &= 0x7FFFFFFF;
+			if(fbits >= 0x7F800000)
+				return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0);
+			if(fbits >= 0x47800000)
+				return overflow<R>(sign);
+			if(fbits >= 0x38800000)
+				return rounded<R,false>(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0);
+			if(fbits >= 0x33000000)
+			{
+				int i = 125 - (fbits>>23);
+				fbits = (fbits&0x7FFFFF) | 0x800000;
+				return rounded<R,false>(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast<uint32>(1)<<i)-1))!=0);
+			}
+			if(fbits != 0)
+				return underflow<R>(sign);
+			return sign;
+		#else
+			static const uint16 base_table[512] = {
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 
+				0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 
+				0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 
+				0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 
+				0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 };
+			static const unsigned char shift_table[256] = {
+				24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
+				13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
+			int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp];
+			fbits &= 0x7FFFFF;
+			uint32 m = (fbits|((exp!=0)<<23)) & -static_cast<uint32>(exp!=0xFF);
+			return rounded<R,false>(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast<uint32>(1)<<(i-1))-1)&m)!=0);
+		#endif
+		#endif
+		}
+
+		/// Convert IEEE double-precision to half-precision.
+		/// \tparam R rounding mode to use
+		/// \param value double-precision value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int float2half_impl(double value, true_type)
+		{
+		#if HALF_ENABLE_F16C_INTRINSICS
+			if(R == std::round_indeterminate)
+				return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION));
+		#endif
+			bits<double>::type dbits;
+			std::memcpy(&dbits, &value, sizeof(double));
+			uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF;
+			unsigned int sign = (hi>>16) & 0x8000;
+			hi &= 0x7FFFFFFF;
+			if(hi >= 0x7FF00000)
+				return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0);
+			if(hi >= 0x40F00000)
+				return overflow<R>(sign);
+			if(hi >= 0x3F100000)
+				return rounded<R,false>(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0);
+			if(hi >= 0x3E600000)
+			{
+				int i = 1018 - (hi>>20);
+				hi = (hi&0xFFFFF) | 0x100000;
+				return rounded<R,false>(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast<uint32>(1)<<i)-1))|lo)!=0);
+			}
+			if((hi|lo) != 0)
+				return underflow<R>(sign);
+			return sign;
+		}
+
+		/// Convert non-IEEE floating-point to half-precision.
+		/// \tparam R rounding mode to use
+		/// \tparam T source type (builtin floating-point type)
+		/// \param value floating-point value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,typename T> unsigned int float2half_impl(T value, ...)
+		{
+			unsigned int hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
+			if(value == T())
+				return hbits;
+			if(builtin_isnan(value))
+				return hbits | 0x7FFF;
+			if(builtin_isinf(value))
+				return hbits | 0x7C00;
+			int exp;
+			std::frexp(value, &exp);
+			if(exp > 16)
+				return overflow<R>(hbits);
+			if(exp < -13)
+				value = std::ldexp(value, 25);
+			else
+			{
+				value = std::ldexp(value, 12-exp);
+				hbits |= ((exp+13)<<10);
+			}
+			T ival, frac = std::modf(value, &ival);
+			int m = std::abs(static_cast<int>(ival));
+			return rounded<R,false>(hbits+(m>>1), m&1, frac!=T());
+		}
+
+		/// Convert floating-point to half-precision.
+		/// \tparam R rounding mode to use
+		/// \tparam T source type (builtin floating-point type)
+		/// \param value floating-point value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,typename T> unsigned int float2half(T value)
+		{
+			return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
+		}
+
+		/// Convert integer to half-precision floating-point.
+		/// \tparam R rounding mode to use
+		/// \tparam T type to convert (builtin integer type)
+		/// \param value integral value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,typename T> unsigned int int2half(T value)
+		{
+			unsigned int bits = static_cast<unsigned>(value<0) << 15;
+			if(!value)
+				return bits;
+			if(bits)
+				value = -value;
+			if(value > 0xFFFF)
+				return overflow<R>(bits);
+			unsigned int m = static_cast<unsigned int>(value), exp = 24;
+			for(; m<0x400; m<<=1,--exp) ;
+			for(; m>0x7FF; m>>=1,++exp) ;
+			bits |= (exp<<10) + m;
+			return (exp>24) ? rounded<R,false>(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits;
+		}
+
+		/// Convert half-precision to IEEE single-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \param value half-precision value to convert
+		/// \return single-precision value
+		inline float half2float_impl(unsigned int value, float, true_type)
+		{
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
+		#else
+		#if 0
+			bits<float>::type fbits = static_cast<bits<float>::type>(value&0x8000) << 16;
+			int abs = value & 0x7FFF;
+			if(abs)
+			{
+				fbits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
+				for(; abs<0x400; abs<<=1,fbits-=0x800000) ;
+				fbits += static_cast<bits<float>::type>(abs) << 13;
+			}
+		#else
+			static const bits<float>::type mantissa_table[2048] = {
+				0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, 
+				0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 
+				0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 
+				0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 
+				0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, 
+				0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, 
+				0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 
+				0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 
+				0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, 
+				0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 
+				0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 
+				0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 
+				0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 
+				0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, 
+				0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, 
+				0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 
+				0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 
+				0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, 
+				0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, 
+				0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 
+				0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 
+				0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 
+				0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, 
+				0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, 
+				0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 
+				0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 
+				0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, 
+				0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 
+				0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 
+				0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 
+				0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 
+				0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, 
+				0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, 
+				0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 
+				0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 
+				0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, 
+				0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 
+				0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 
+				0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 
+				0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 
+				0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, 
+				0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, 
+				0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 
+				0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 
+				0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, 
+				0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 
+				0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 
+				0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 
+				0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 
+				0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, 
+				0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, 
+				0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 
+				0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 
+				0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, 
+				0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 
+				0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 
+				0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 
+				0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 
+				0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, 
+				0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, 
+				0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 
+				0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 
+				0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, 
+				0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 
+				0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 
+				0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 
+				0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 
+				0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, 
+				0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, 
+				0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 
+				0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 
+				0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, 
+				0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 
+				0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 
+				0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 
+				0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 
+				0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, 
+				0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, 
+				0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 
+				0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 
+				0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, 
+				0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 
+				0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 
+				0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 
+				0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 
+				0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, 
+				0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, 
+				0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 
+				0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 
+				0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, 
+				0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 
+				0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 
+				0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 
+				0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 
+				0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, 
+				0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, 
+				0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 
+				0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 
+				0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, 
+				0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 
+				0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 
+				0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 
+				0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 
+				0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, 
+				0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, 
+				0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 
+				0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 
+				0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, 
+				0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 
+				0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 
+				0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 
+				0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 
+				0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, 
+				0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, 
+				0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 
+				0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 
+				0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, 
+				0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 
+				0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 
+				0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 
+				0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 
+				0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, 
+				0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, 
+				0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 
+				0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 
+				0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, 
+				0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 
+				0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
+			static const bits<float>::type exponent_table[64] = {
+				0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, 
+				0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 
+				0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 
+				0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
+			static const unsigned short offset_table[64] = {
+				0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 
+				0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
+			bits<float>::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
+		#endif
+			float out;
+			std::memcpy(&out, &fbits, sizeof(float));
+			return out;
+		#endif
+		}
+
+		/// Convert half-precision to IEEE double-precision.
+		/// \param value half-precision value to convert
+		/// \return double-precision value
+		inline double half2float_impl(unsigned int value, double, true_type)
+		{
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value))));
+		#else
+			uint32 hi = static_cast<uint32>(value&0x8000) << 16;
+			unsigned int abs = value & 0x7FFF;
+			if(abs)
+			{
+				hi |= 0x3F000000 << static_cast<unsigned>(abs>=0x7C00);
+				for(; abs<0x400; abs<<=1,hi-=0x100000) ;
+				hi += static_cast<uint32>(abs) << 10;
+			}
+			bits<double>::type dbits = static_cast<bits<double>::type>(hi) << 32;
+			double out;
+			std::memcpy(&out, &dbits, sizeof(double));
+			return out;
+		#endif
+		}
+
+		/// Convert half-precision to non-IEEE floating-point.
+		/// \tparam T type to convert to (builtin integer type)
+		/// \param value half-precision value to convert
+		/// \return floating-point value
+		template<typename T> T half2float_impl(unsigned int value, T, ...)
+		{
+			T out;
+			unsigned int abs = value & 0x7FFF;
+			if(abs > 0x7C00)
+				out = (std::numeric_limits<T>::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits<T>::signaling_NaN() :
+					std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
+			else if(abs == 0x7C00)
+				out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
+			else if(abs > 0x3FF)
+				out = std::ldexp(static_cast<T>((abs&0x3FF)|0x400), (abs>>10)-25);
+			else
+				out = std::ldexp(static_cast<T>(abs), -24);
+			return (value&0x8000) ? -out : out;
+		}
+
+		/// Convert half-precision to floating-point.
+		/// \tparam T type to convert to (builtin integer type)
+		/// \param value half-precision value to convert
+		/// \return floating-point value
+		template<typename T> T half2float(unsigned int value)
+		{
+			return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
+		}
+
+		/// Convert half-precision floating-point to integer.
+		/// \tparam R rounding mode to use
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
+		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
+		/// \param value half-precision value to convert
+		/// \return rounded integer value
+		/// \exception FE_INVALID if value is not representable in type \a T
+		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		template<std::float_round_style R,bool E,bool I,typename T> T half2int(unsigned int value)
+		{
+			unsigned int abs = value & 0x7FFF;
+			if(abs >= 0x7C00)
+			{
+				raise(FE_INVALID);
+				return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
+			}
+			if(abs < 0x3800)
+			{
+				raise(FE_INEXACT, I);
+				return	(R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) :
+						(R==std::round_toward_neg_infinity) ? -T(value>0x8000) :
+						T();
+			}
+			int exp = 25 - (abs>>10);
+			unsigned int m = (value&0x3FF) | 0x400;
+			int32 i = static_cast<int32>((exp<=0) ? (m<<-exp) : ((m+(
+				(R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) :
+				(R==std::round_toward_infinity) ? (((1<<exp)-1)&((value>>15)-1)) :
+				(R==std::round_toward_neg_infinity) ? (((1<<exp)-1)&-(value>>15)) : 0))>>exp));
+			if((!std::numeric_limits<T>::is_signed && (value&0x8000)) || (std::numeric_limits<T>::digits<16 &&
+				((value&0x8000) ? (-i<std::numeric_limits<T>::min()) : (i>std::numeric_limits<T>::max()))))
+				raise(FE_INVALID);
+			else if(I && exp > 0 && (m&((1<<exp)-1)))
+				raise(FE_INEXACT);
+			return static_cast<T>((value&0x8000) ? -i : i);
+		}
+
+		/// \}
+		/// \name Mathematics
+		/// \{
+
+		/// upper part of 64-bit multiplication.
+		/// \tparam R rounding mode to use
+		/// \param x first factor
+		/// \param y second factor
+		/// \return upper 32 bit of \a x * \a y
+		template<std::float_round_style R> uint32 mulhi(uint32 x, uint32 y)
+		{
+			uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16);
+			return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) +
+				((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0);
+		}
+
+		/// 64-bit multiplication.
+		/// \param x first factor
+		/// \param y second factor
+		/// \return upper 32 bit of \a x * \a y rounded to nearest
+		inline uint32 multiply64(uint32 x, uint32 y)
+		{
+		#if HALF_ENABLE_CPP11_LONG_LONG
+			return static_cast<uint32>((static_cast<unsigned long long>(x)*static_cast<unsigned long long>(y)+0x80000000)>>32);
+		#else
+			return mulhi<std::round_to_nearest>(x, y);
+		#endif
+		}
+
+		/// 64-bit division.
+		/// \param x upper 32 bit of dividend
+		/// \param y divisor
+		/// \param s variable to store sticky bit for rounding
+		/// \return (\a x << 32) / \a y
+		inline uint32 divide64(uint32 x, uint32 y, int &s)
+		{
+		#if HALF_ENABLE_CPP11_LONG_LONG
+			unsigned long long xx = static_cast<unsigned long long>(x) << 32;
+			return s = (xx%y!=0), static_cast<uint32>(xx/y);
+		#else
+			y >>= 1;
+			uint32 rem = x, div = 0;
+			for(unsigned int i=0; i<32; ++i)
+			{
+				div <<= 1;
+				if(rem >= y)
+				{
+					rem -= y;
+					div |= 1;
+				}
+				rem <<= 1;
+			}
+			return s = rem > 1, div;
+		#endif
+		}
+
+		/// Half precision positive modulus.
+		/// \tparam Q `true` to compute full quotient, `false` else
+		/// \tparam R `true` to compute signed remainder, `false` for positive remainder
+		/// \param x first operand as positive finite half-precision value
+		/// \param y second operand as positive finite half-precision value
+		/// \param quo adress to store quotient at, `nullptr` if \a Q `false`
+		/// \return modulus of \a x / \a y
+		template<bool Q,bool R> unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL)
+		{
+			unsigned int q = 0;
+			if(x > y)
+			{
+				int absx = x, absy = y, expx = 0, expy = 0;
+				for(; absx<0x400; absx<<=1,--expx) ;
+				for(; absy<0x400; absy<<=1,--expy) ;
+				expx += absx >> 10;
+				expy += absy >> 10;
+				int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+				for(int d=expx-expy; d; --d)
+				{
+					if(!Q && mx == my)
+						return 0;
+					if(mx >= my)
+					{
+						mx -= my;
+						q += Q;
+					}
+					mx <<= 1;
+					q <<= static_cast<int>(Q);
+				}
+				if(!Q && mx == my)
+					return 0;
+				if(mx >= my)
+				{
+					mx -= my;
+					++q;
+				}
+				if(Q)
+				{
+					q &= (1<<(std::numeric_limits<int>::digits-1)) - 1;
+					if(!mx)
+						return *quo = q, 0;
+				}
+				for(; mx<0x400; mx<<=1,--expy) ;
+				x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy));
+			}
+			if(R)
+			{
+				unsigned int a, b;
+				if(y < 0x800)
+				{
+					a = (x<0x400) ? (x<<1) : (x+0x400);
+					b = y;
+				}
+				else
+				{
+					a = x;
+					b = y - 0x400;
+				}
+				if(a > b || (a == b && (q&1)))
+				{
+					int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF);
+					int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d));
+					for(; m<0x800 && exp>1; m<<=1,--exp) ;
+					x = 0x8000 + ((exp-1)<<10) + (m>>1);
+					q += Q;
+				}
+			}
+			if(Q)
+				*quo = q;
+			return x;
+		}
+
+		/// Fixed point square root.
+		/// \tparam F number of fractional bits
+		/// \param r radicand in Q1.F fixed point format
+		/// \param exp exponent
+		/// \return square root as Q1.F/2
+		template<unsigned int F> uint32 sqrt(uint32 &r, int &exp)
+		{
+			int i = exp & 1;
+			r <<= i;
+			exp = (exp-i) / 2;
+			uint32 m = 0;
+			for(uint32 bit=static_cast<uint32>(1)<<F; bit; bit>>=2)
+			{
+				if(r < m+bit)
+					m >>= 1;
+				else
+				{
+					r -= m + bit;
+					m = (m>>1) + bit;
+				}
+			}
+			return m;
+		}
+
+		/// Fixed point binary exponential.
+		/// This uses the BKM algorithm in E-mode.
+		/// \param m exponent in [0,1) as Q0.31
+		/// \param n number of iterations (at most 32)
+		/// \return 2 ^ \a m as Q1.31
+		inline uint32 exp2(uint32 m, unsigned int n = 32)
+		{
+			static const uint32 logs[] = {
+				0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
+				0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
+				0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
+				0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
+			if(!m)
+				return 0x80000000;
+			uint32 mx = 0x80000000, my = 0;
+			for(unsigned int i=1; i<n; ++i)
+			{
+				uint32 mz = my + logs[i];
+				if(mz <= m)
+				{
+					my = mz;
+					mx += mx >> i;
+				}
+			}
+			return mx;
+		}
+
+		/// Fixed point binary logarithm.
+		/// This uses the BKM algorithm in L-mode.
+		/// \param m mantissa in [1,2) as Q1.30
+		/// \param n number of iterations (at most 32)
+		/// \return log2(\a m) as Q0.31
+		inline uint32 log2(uint32 m, unsigned int n = 32)
+		{
+			static const uint32 logs[] = {
+				0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
+				0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
+				0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
+				0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
+			if(m == 0x40000000)
+				return 0;
+			uint32 mx = 0x40000000, my = 0;
+			for(unsigned int i=1; i<n; ++i)
+			{
+				uint32 mz = mx + (mx>>i);
+				if(mz <= m)
+				{
+					mx = mz;
+					my += logs[i];
+				}
+			}
+			return my;
+		}
+
+		/// Fixed point sine and cosine.
+		/// This uses the CORDIC algorithm in rotation mode.
+		/// \param mz angle in [-pi/2,pi/2] as Q1.30
+		/// \param n number of iterations (at most 31)
+		/// \return sine and cosine of \a mz as Q1.30
+		inline std::pair<uint32,uint32> sincos(uint32 mz, unsigned int n = 31)
+		{
+			static const uint32 angles[] = {
+				0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
+				0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
+				0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
+				0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
+			uint32 mx = 0x26DD3B6A, my = 0;
+			for(unsigned int i=0; i<n; ++i)
+			{
+				uint32 sign = sign_mask(mz);
+				uint32 tx = mx - (arithmetic_shift(my, i)^sign) + sign;
+				uint32 ty = my + (arithmetic_shift(mx, i)^sign) - sign;
+				mx = tx; my = ty; mz -= (angles[i]^sign) - sign;
+			}
+			return std::make_pair(my, mx);
+		}
+
+		/// Fixed point arc tangent.
+		/// This uses the CORDIC algorithm in vectoring mode.
+		/// \param my y coordinate as Q0.30
+		/// \param mx x coordinate as Q0.30
+		/// \param n number of iterations (at most 31)
+		/// \return arc tangent of \a my / \a mx as Q1.30
+		inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31)
+		{
+			static const uint32 angles[] = {
+				0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
+				0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
+				0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
+				0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
+			uint32 mz = 0;
+			for(unsigned int i=0; i<n; ++i)
+			{
+				uint32 sign = sign_mask(my);
+				uint32 tx = mx + (arithmetic_shift(my, i)^sign) - sign;
+				uint32 ty = my - (arithmetic_shift(mx, i)^sign) + sign;
+				mx = tx; my = ty; mz += (angles[i]^sign) - sign;
+			}
+			return mz;
+		}
+
+		/// Reduce argument for trigonometric functions.
+		/// \param abs half-precision floating-point value
+		/// \param k value to take quarter period
+		/// \return \a abs reduced to [-pi/4,pi/4] as Q0.30
+		inline uint32 angle_arg(unsigned int abs, int &k)
+		{
+			uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10);
+			int exp = (abs>>10) + (abs<=0x3FF) - 15;
+			if(abs < 0x3A48)
+				return k = 0, m << (exp+20);
+		#if HALF_ENABLE_CPP11_LONG_LONG
+			unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi;
+			uint32 sign = -static_cast<uint32>(f>>63);
+			k = static_cast<int>(yi>>(62-exp));
+			return (multiply64(static_cast<uint32>((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign;
+		#else
+			uint32 yh = m*0xA2F98 + mulhi<std::round_toward_zero>(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF;
+			uint32 mask = (static_cast<uint32>(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast<uint32>(yi>yh);
+			k = static_cast<int>(yi>>(30-exp));
+			uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign;
+			return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign;
+		#endif
+		}
+
+		/// Get arguments for atan2 function.
+		/// \param abs half-precision floating-point value
+		/// \return \a abs and sqrt(1 - \a abs^2) as Q0.30
+		inline std::pair<uint32,uint32> atan2_args(unsigned int abs)
+		{
+			int exp = -15;
+			for(; abs<0x400; abs<<=1,--exp) ;
+			exp += abs >> 10;
+			uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my;
+			int rexp = 2 * exp;
+			r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast<uint32>(1)<<-rexp)-1))!=0)) : 1);
+			for(rexp=0; r<0x40000000; r<<=1,--rexp) ;
+			uint32 mx = sqrt<30>(r, rexp);
+			int d = exp - rexp;
+			if(d < 0)
+				return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx);
+			if(d > 0)
+				return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx)));
+			return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx);
+		}
+
+		/// Get exponentials for hyperbolic computation
+		/// \param abs half-precision floating-point value
+		/// \param exp variable to take unbiased exponent of larger result
+		/// \param n number of BKM iterations (at most 32)
+		/// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent
+		inline std::pair<uint32,uint32> hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32)
+		{
+			uint32 mx = detail::multiply64(static_cast<uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my;
+			int e = (abs>>10) + (abs<=0x3FF);
+			if(e < 14)
+			{
+				exp = 0;
+				mx >>= 14 - e;
+			}
+			else
+			{
+				exp = mx >> (45-e);
+				mx = (mx<<(e-14)) & 0x7FFFFFFF;
+			}
+			mx = exp2(mx, n);
+			int d = exp << 1, s;
+			if(mx > 0x80000000)
+			{
+				my = divide64(0x80000000, mx, s);
+				my |= s;
+				++d;
+			}
+			else
+				my = mx;
+			return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast<uint32>(1)<<d)-1))!=0)) : 1);
+		}
+
+		/// Postprocessing for binary exponential.
+		/// \tparam R rounding mode to use
+		/// \param m fractional part of as Q0.31
+		/// \param exp absolute value of unbiased exponent
+		/// \param esign sign of actual exponent
+		/// \param sign sign bit of result
+		/// \param n number of BKM iterations (at most 32)
+		/// \return value converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32)
+		{
+			if(esign)
+			{
+				exp = -exp - (m!=0);
+				if(exp < -25)
+					return underflow<R>(sign);
+				else if(exp == -25)
+					return rounded<R,false>(sign, 1, m!=0);
+			}
+			else if(exp > 15)
+				return overflow<R>(sign);
+			if(!m)
+				return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp));
+			m = exp2(m, n);
+			int s = 0;
+			if(esign)
+				m = divide64(0x80000000, m, s);
+			return fixed2half<R,31,false,false,true>(m, exp+14, sign, s);
+		}
+
+		/// Postprocessing for binary logarithm.
+		/// \tparam R rounding mode to use
+		/// \tparam L logarithm for base transformation as Q1.31
+		/// \param m fractional part of logarithm as Q0.31
+		/// \param ilog signed integer part of logarithm
+		/// \param exp biased exponent of result
+		/// \param sign sign bit of result
+		/// \return value base-transformed and converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,uint32 L> unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0)
+		{
+			uint32 msign = sign_mask(ilog);
+			m = (((static_cast<uint32>(ilog)<<27)+(m>>4))^msign) - msign;
+			if(!m)
+				return 0;
+			for(; m<0x80000000; m<<=1,--exp) ;
+			int i = m >= L, s;
+			exp += i;
+			m >>= 1 + i;
+			sign ^= msign & 0x8000;
+			if(exp < -11)
+				return underflow<R>(sign);
+			m = divide64(m, L, s);
+			return fixed2half<R,30,false,false,true>(m, exp, sign, 1);
+		}
+
+		/// Hypotenuse square root and postprocessing.
+		/// \tparam R rounding mode to use
+		/// \param r mantissa as Q2.30
+		/// \param exp biased exponent
+		/// \return square root converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int hypot_post(uint32 r, int exp)
+		{
+			int i = r >> 31;
+			if((exp+=i) > 46)
+				return overflow<R>();
+			if(exp < -34)
+				return underflow<R>();
+			r = (r>>i) | (r&i);
+			uint32 m = sqrt<30>(r, exp+=15);
+			return fixed2half<R,15,false,false,false>(m, exp-1, 0, r!=0);
+		}
+
+		/// Division and postprocessing for tangents.
+		/// \tparam R rounding mode to use
+		/// \param my dividend as Q1.31
+		/// \param mx divisor as Q1.31
+		/// \param exp biased exponent of result
+		/// \param sign sign bit of result
+		/// \return quotient converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R> unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0)
+		{
+			int i = my >= mx, s;
+			exp += i;
+			if(exp > 29)
+				return overflow<R>(sign);
+			if(exp < -11)
+				return underflow<R>(sign);
+			uint32 m = divide64(my>>(i+1), mx, s);
+			return fixed2half<R,30,false,false,true>(m, exp, sign, s);
+		}
+
+		/// Area function and postprocessing.
+		/// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`.
+		/// \tparam R rounding mode to use
+		/// \tparam S `true` for asinh, `false` for acosh
+		/// \param arg half-precision argument
+		/// \return asinh|acosh(\a arg) converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,bool S> unsigned int area(unsigned int arg)
+		{
+			int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i;
+			uint32 mx = static_cast<uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r;
+			for(; abs<0x400; abs<<=1,--expy) ;
+			expy += abs >> 10;
+			r = ((abs&0x3FF)|0x400) << 5;
+			r *= r;
+			i = r >> 31;
+			expy = 2*expy + i;
+			r >>= i;
+			if(S)
+			{
+				if(expy < 0)
+				{
+					r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast<uint32>(1)<<-expy)-1))!=0)) : 1);
+					expy = 0;
+				}
+				else
+				{
+					r += 0x40000000 >> expy;
+					i = r >> 31;
+					r = (r>>i) | (r&i);
+					expy += i;
+				}
+			}
+			else
+			{
+				r -= 0x40000000 >> expy;
+				for(; r<0x40000000; r<<=1,--expy) ;
+			}
+			my = sqrt<30>(r, expy);
+			my = (my<<15) + (r<<14)/my;
+			if(S)
+			{
+				mx >>= expy - expx;
+				ilog = expy;
+			}
+			else
+			{
+				my >>= expx - expy;
+				ilog = expx;
+			}
+			my += mx;
+			i = my >> 31;
+			static const int G = S && (R==std::round_to_nearest);
+			return log2_post<R,0xB8AA3B2A>(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast<unsigned>(S)<<15));
+		}
+
+		/// Class for 1.31 unsigned floating-point computation
+		struct f31
+		{
+			/// Constructor.
+			/// \param mant mantissa as 1.31
+			/// \param e exponent
+			HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {}
+
+			/// Constructor.
+			/// \param abs unsigned half-precision value
+			f31(unsigned int abs) : exp(-15)
+			{
+				for(; abs<0x400; abs<<=1,--exp) ;
+				m = static_cast<uint32>((abs&0x3FF)|0x400) << 21;
+				exp += (abs>>10);
+			}
+
+			/// Addition operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a + \a b
+			friend f31 operator+(f31 a, f31 b)
+			{
+				if(b.exp > a.exp)
+					std::swap(a, b);
+				int d = a.exp - b.exp;
+				uint32 m = a.m + ((d<32) ? (b.m>>d) : 0);
+				int i = (m&0xFFFFFFFF) < a.m;
+				return f31(((m+i)>>i)|0x80000000, a.exp+i);
+			}
+
+			/// Subtraction operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a - \a b
+			friend f31 operator-(f31 a, f31 b)
+			{
+				int d = a.exp - b.exp, exp = a.exp;
+				uint32 m = a.m - ((d<32) ? (b.m>>d) : 0);
+				if(!m)
+					return f31(0, -32);
+				for(; m<0x80000000; m<<=1,--exp) ;
+				return f31(m, exp);
+			}
+
+			/// Multiplication operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a * \a b
+			friend f31 operator*(f31 a, f31 b)
+			{
+				uint32 m = multiply64(a.m, b.m);
+				int i = m >> 31;
+				return f31(m<<(1-i), a.exp + b.exp + i);
+			}
+
+			/// Division operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a / \a b
+			friend f31 operator/(f31 a, f31 b)
+			{
+				int i = a.m >= b.m, s;
+				uint32 m = divide64((a.m+i)>>i, b.m, s);
+				return f31(m, a.exp - b.exp + i - 1);
+			}
+
+			uint32 m;			///< mantissa as 1.31.
+			int exp;			///< exponent.
+		};
+
+		/// Error function and postprocessing.
+		/// This computes the value directly in Q1.31 using the approximations given 
+		/// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions).
+		/// \tparam R rounding mode to use
+		/// \tparam C `true` for comlementary error function, `false` else
+		/// \param arg half-precision function argument
+		/// \return approximated value of error function in half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,bool C> unsigned int erf(unsigned int arg)
+		{
+			unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
+			f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t;
+			f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t /
+					((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<<x2.exp)&0x7FFFFFFF, 22), x2.m>>(31-x2.exp)));
+			return (!C || sign) ? fixed2half<R,31,false,true,true>(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) :
+					(e.exp<-25) ? underflow<R>() : fixed2half<R,30,false,false,true>(e.m>>1, e.exp+14, 0, e.m&1);
+		}
+
+		/// Gamma function and postprocessing.
+		/// This approximates the value of either the gamma function or its logarithm directly in Q1.31.
+		/// \tparam R rounding mode to use
+		/// \tparam L `true` for lograithm of gamma function, `false` for gamma function
+		/// \param arg half-precision floating-point value
+		/// \return lgamma/tgamma(\a arg) in half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if \a arg is not a positive integer
+		template<std::float_round_style R,bool L> unsigned int gamma(unsigned int arg)
+		{
+/*			static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
+			double t = arg + 4.65, s = p[0];
+			for(unsigned int i=0; i<5; ++i)
+				s += p[i+1] / (arg+i);
+			return std::log(s) + (arg-0.5)*std::log(t) - t;
+*/			static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
+			unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
+			bool bsign = sign != 0;
+			f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s =
+				f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1))
+				+ f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1));
+			int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16);
+			s = f31((static_cast<uint32>(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe;
+			if(x.exp != -1 || x.m != 0x80000000)
+			{
+				i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8);
+				f31 l = f31((static_cast<uint32>(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe;
+				s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l);
+			}
+			s = x.exp ? (s-t) : (t-s);
+			if(bsign)
+			{
+				if(z.exp >= 0)
+				{
+					sign &= (L|((z.m>>(31-z.exp))&1)) - 1;
+					for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ;
+				}
+				if(z.exp == -1)
+					z = f31(0x80000000, 0) - z;
+				if(z.exp < -1)
+				{
+					z = z * pi;
+					z.m = sincos(z.m>>(1-z.exp), 30).first;
+					for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ;
+				}
+				else
+					z = f31(0x80000000, 0);
+			}
+			if(L)
+			{
+				if(bsign)
+				{
+					f31 l(0x92868247, 0);
+					if(z.exp < 0)
+					{
+						uint32 m = log2((z.m+1)>>1, 27);
+						z = f31(-((static_cast<uint32>(z.exp)<<26)+(m>>5)), 5);
+						for(; z.m<0x80000000; z.m<<=1,--z.exp) ;
+						l = l + z / lbe;
+					}
+					sign = static_cast<unsigned>(x.exp&&(l.exp<s.exp||(l.exp==s.exp&&l.m<s.m))) << 15;
+					s = sign ? (s-l) : x.exp ? (l-s) : (l+s);
+				}
+				else
+				{
+					sign = static_cast<unsigned>(x.exp==0) << 15;
+					if(s.exp < -24)
+						return underflow<R>(sign);
+					if(s.exp > 15)
+						return overflow<R>(sign);
+				}
+			}
+			else
+			{
+				s = s * lbe;
+				uint32 m;
+				if(s.exp < 0)
+				{
+					m = s.m >> -s.exp;
+					s.exp = 0;
+				}
+				else
+				{
+					m = (s.m<<s.exp) & 0x7FFFFFFF;
+					s.exp = (s.m>>(31-s.exp));
+				}
+				s.m = exp2(m, 27);
+				if(!x.exp)
+					s = f31(0x80000000, 0) / s;
+				if(bsign)
+				{
+					if(z.exp < 0)
+						s = s * z;
+					s = pi / s;
+					if(s.exp < -24)
+						return underflow<R>(sign);
+				}
+				else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1)))
+					return ((s.exp+14)<<10) + (s.m>>21);
+				if(s.exp > 15)
+					return overflow<R>(sign);
+			}
+			return fixed2half<R,31,false,false,true>(s.m, s.exp+14, sign);
+		}
+		/// \}
+
+		template<typename,typename,std::float_round_style> struct half_caster;
+	}
+
+	/// Half-precision floating-point type.
+	/// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic 
+	/// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic 
+	/// expressions and functions with mixed-type operands to be of the most precise operand type.
+	///
+	/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and 
+	/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which 
+	/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the 
+	/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of 
+	/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most 
+	/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit 
+	/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if 
+	/// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on 
+	/// nearly any reasonable platform.
+	///
+	/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable 
+	/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
+	class half
+	{
+	public:
+		/// \name Construction and assignment
+		/// \{
+
+		/// Default constructor.
+		/// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics 
+		/// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
+		HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {}
+
+		/// Conversion constructor.
+		/// \param rhs float to convert
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		explicit half(float rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(rhs))) {}
+	
+		/// Conversion to single-precision.
+		/// \return single precision value representing expression value
+		operator float() const { return detail::half2float<float>(data_); }
+
+		/// Assignment operator.
+		/// \param rhs single-precision value to copy from
+		/// \return reference to this half
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		half& operator=(float rhs) { data_ = static_cast<detail::uint16>(detail::float2half<round_style>(rhs)); return *this; }
+
+		/// \}
+		/// \name Arithmetic updates
+		/// \{
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to add
+		/// \return reference to this half
+		/// \exception FE_... according to operator+(half,half)
+		half& operator+=(half rhs) { return *this = *this + rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to subtract
+		/// \return reference to this half
+		/// \exception FE_... according to operator-(half,half)
+		half& operator-=(half rhs) { return *this = *this - rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to multiply with
+		/// \return reference to this half
+		/// \exception FE_... according to operator*(half,half)
+		half& operator*=(half rhs) { return *this = *this * rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to divide by
+		/// \return reference to this half
+		/// \exception FE_... according to operator/(half,half)
+		half& operator/=(half rhs) { return *this = *this / rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to add
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator+=(float rhs) { return *this = *this + rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to subtract
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator-=(float rhs) { return *this = *this - rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to multiply with
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator*=(float rhs) { return *this = *this * rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to divide by
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator/=(float rhs) { return *this = *this / rhs; }
+
+		/// \}
+		/// \name Increment and decrement
+		/// \{
+
+		/// Prefix increment.
+		/// \return incremented half value
+		/// \exception FE_... according to operator+(half,half)
+		half& operator++() { return *this = *this + half(detail::binary, 0x3C00); }
+
+		/// Prefix decrement.
+		/// \return decremented half value
+		/// \exception FE_... according to operator-(half,half)
+		half& operator--() { return *this = *this + half(detail::binary, 0xBC00); }
+
+		/// Postfix increment.
+		/// \return non-incremented half value
+		/// \exception FE_... according to operator+(half,half)
+		half operator++(int) { half out(*this); ++*this; return out; }
+
+		/// Postfix decrement.
+		/// \return non-decremented half value
+		/// \exception FE_... according to operator-(half,half)
+		half operator--(int) { half out(*this); --*this; return out; }
+		/// \}
+	
+	private:
+		/// Rounding mode to use
+		static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
+
+		/// Constructor.
+		/// \param bits binary representation to set half to
+		HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast<detail::uint16>(bits)) {}
+
+		/// Internal binary representation
+		detail::uint16 data_;
+
+	#ifndef HALF_DOXYGEN_ONLY
+		friend HALF_CONSTEXPR_NOERR bool operator==(half, half);
+		friend HALF_CONSTEXPR_NOERR bool operator!=(half, half);
+		friend HALF_CONSTEXPR_NOERR bool operator<(half, half);
+		friend HALF_CONSTEXPR_NOERR bool operator>(half, half);
+		friend HALF_CONSTEXPR_NOERR bool operator<=(half, half);
+		friend HALF_CONSTEXPR_NOERR bool operator>=(half, half);
+		friend HALF_CONSTEXPR half operator-(half);
+		friend half operator+(half, half);
+		friend half operator-(half, half);
+		friend half operator*(half, half);
+		friend half operator/(half, half);
+		template<typename charT,typename traits> friend std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits>&, half);
+		template<typename charT,typename traits> friend std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits>&, half&);
+		friend HALF_CONSTEXPR half fabs(half);
+		friend half fmod(half, half);
+		friend half remainder(half, half);
+		friend half remquo(half, half, int*);
+		friend half fma(half, half, half);
+		friend HALF_CONSTEXPR_NOERR half fmax(half, half);
+		friend HALF_CONSTEXPR_NOERR half fmin(half, half);
+		friend half fdim(half, half);
+		friend half nanh(const char*);
+		friend half exp(half);
+		friend half exp2(half);
+		friend half expm1(half);
+		friend half log(half);
+		friend half log10(half);
+		friend half log2(half);
+		friend half log1p(half);
+		friend half sqrt(half);
+		friend half rsqrt(half);
+		friend half cbrt(half);
+		friend half hypot(half, half);
+		friend half hypot(half, half, half);
+		friend half pow(half, half);
+		friend void sincos(half, half*, half*);
+		friend half sin(half);
+		friend half cos(half);
+		friend half tan(half);
+		friend half asin(half);
+		friend half acos(half);
+		friend half atan(half);
+		friend half atan2(half, half);
+		friend half sinh(half);
+		friend half cosh(half);
+		friend half tanh(half);
+		friend half asinh(half);
+		friend half acosh(half);
+		friend half atanh(half);
+		friend half erf(half);
+		friend half erfc(half);
+		friend half lgamma(half);
+		friend half tgamma(half);
+		friend half ceil(half);
+		friend half floor(half);
+		friend half trunc(half);
+		friend half round(half);
+		friend long lround(half);
+		friend half rint(half);
+		friend long lrint(half);
+		friend half nearbyint(half);
+	#ifdef HALF_ENABLE_CPP11_LONG_LONG
+		friend long long llround(half);
+		friend long long llrint(half);
+	#endif
+		friend half frexp(half, int*);
+		friend half scalbln(half, long);
+		friend half modf(half, half*);
+		friend int ilogb(half);
+		friend half logb(half);
+		friend half nextafter(half, half);
+		friend half nexttoward(half, long double);
+		friend HALF_CONSTEXPR half copysign(half, half);
+		friend HALF_CONSTEXPR int fpclassify(half);
+		friend HALF_CONSTEXPR bool isfinite(half);
+		friend HALF_CONSTEXPR bool isinf(half);
+		friend HALF_CONSTEXPR bool isnan(half);
+		friend HALF_CONSTEXPR bool isnormal(half);
+		friend HALF_CONSTEXPR bool signbit(half);
+		friend HALF_CONSTEXPR bool isgreater(half, half);
+		friend HALF_CONSTEXPR bool isgreaterequal(half, half);
+		friend HALF_CONSTEXPR bool isless(half, half);
+		friend HALF_CONSTEXPR bool islessequal(half, half);
+		friend HALF_CONSTEXPR bool islessgreater(half, half);
+		template<typename,typename,std::float_round_style> friend struct detail::half_caster;
+		friend class std::numeric_limits<half>;
+	#if HALF_ENABLE_CPP11_HASH
+		friend struct std::hash<half>;
+	#endif
+	#if HALF_ENABLE_CPP11_USER_LITERALS
+		friend half literal::operator "" _h(long double);
+	#endif
+	#endif
+	};
+
+#if HALF_ENABLE_CPP11_USER_LITERALS
+	namespace literal
+	{
+		/// Half literal.
+		/// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant 
+		/// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving 
+		/// conversion operations at runtime. It is a convenience feature, not a performance optimization.
+		/// \param value literal value
+		/// \return half with of given value (possibly rounded)
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
+	}
+#endif
+
+	namespace detail
+	{
+		/// Helper class for half casts.
+		/// This class template has to be specialized for all valid cast arguments to define an appropriate static 
+		/// `cast` member function and a corresponding `type` member denoting its return type.
+		/// \tparam T destination type
+		/// \tparam U source type
+		/// \tparam R rounding mode to use
+		template<typename T,typename U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
+		template<typename U,std::float_round_style R> struct half_caster<half,U,R>
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+			static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
+		#endif
+
+			static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
+
+		private:
+			static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(arg)); }
+			static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
+		};
+		template<typename T,std::float_round_style R> struct half_caster<T,half,R>
+		{
+		#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+			static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
+		#endif
+
+			static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
+
+		private:
+			static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
+			static T cast_impl(half arg, false_type) { return half2int<R,true,true,T>(arg.data_); }
+		};
+		template<std::float_round_style R> struct half_caster<half,half,R>
+		{
+			static half cast(half arg) { return arg; }
+		};
+	}
+}
+
+/// Extensions to the C++ standard library.
+namespace std
+{
+	/// Numeric limits for half-precision floats.
+	/// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
+	template<> class numeric_limits<half_float::half>
+	{
+	public:
+		/// Is template specialization.
+		static HALF_CONSTEXPR_CONST bool is_specialized = true;
+
+		/// Supports signed values.
+		static HALF_CONSTEXPR_CONST bool is_signed = true;
+
+		/// Is not an integer type.
+		static HALF_CONSTEXPR_CONST bool is_integer = false;
+
+		/// Is not exact.
+		static HALF_CONSTEXPR_CONST bool is_exact = false;
+
+		/// Doesn't provide modulo arithmetic.
+		static HALF_CONSTEXPR_CONST bool is_modulo = false;
+
+		/// Has a finite set of values.
+		static HALF_CONSTEXPR_CONST bool is_bounded = true;
+
+		/// IEEE conformant.
+		static HALF_CONSTEXPR_CONST bool is_iec559 = true;
+
+		/// Supports infinity.
+		static HALF_CONSTEXPR_CONST bool has_infinity = true;
+
+		/// Supports quiet NaNs.
+		static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
+
+		/// Supports signaling NaNs.
+		static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true;
+
+		/// Supports subnormal values.
+		static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
+
+		/// Supports no denormalization detection.
+		static HALF_CONSTEXPR_CONST bool has_denorm_loss = false;
+
+	#if HALF_ERRHANDLING_THROWS
+		static HALF_CONSTEXPR_CONST bool traps = true;
+	#else
+		/// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated.
+		static HALF_CONSTEXPR_CONST bool traps = false;
+	#endif
+
+		/// Does not support no pre-rounding underflow detection.
+		static HALF_CONSTEXPR_CONST bool tinyness_before = false;
+
+		/// Rounding mode.
+		static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style;
+
+		/// Significant digits.
+		static HALF_CONSTEXPR_CONST int digits = 11;
+
+		/// Significant decimal digits.
+		static HALF_CONSTEXPR_CONST int digits10 = 3;
+
+		/// Required decimal digits to represent all possible values.
+		static HALF_CONSTEXPR_CONST int max_digits10 = 5;
+
+		/// Number base.
+		static HALF_CONSTEXPR_CONST int radix = 2;
+
+		/// One more than smallest exponent.
+		static HALF_CONSTEXPR_CONST int min_exponent = -13;
+
+		/// Smallest normalized representable power of 10.
+		static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
+
+		/// One more than largest exponent
+		static HALF_CONSTEXPR_CONST int max_exponent = 16;
+
+		/// Largest finitely representable power of 10.
+		static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
+
+		/// Smallest positive normal value.
+		static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); }
+
+		/// Smallest finite value.
+		static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); }
+
+		/// Largest finite value.
+		static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); }
+
+		/// Difference between 1 and next representable value.
+		static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); }
+
+		/// Maximum rounding error in ULP (units in the last place).
+		static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW
+			{ return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
+
+		/// Positive infinity.
+		static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); }
+
+		/// Quiet NaN.
+		static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); }
+
+		/// Signaling NaN.
+		static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); }
+
+		/// Smallest positive subnormal value.
+		static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); }
+	};
+
+#if HALF_ENABLE_CPP11_HASH
+	/// Hash function for half-precision floats.
+	/// This is only defined if C++11 `std::hash` is supported and enabled.
+	///
+	/// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash)
+	template<> struct hash<half_float::half>
+	{
+		/// Type of function argument.
+		typedef half_float::half argument_type;
+
+		/// Function return type.
+		typedef size_t result_type;
+
+		/// Compute hash function.
+		/// \param arg half to hash
+		/// \return hash value
+		result_type operator()(argument_type arg) const { return hash<half_float::detail::uint16>()(arg.data_&-static_cast<unsigned>(arg.data_!=0x8000)); }
+	};
+#endif
+}
+
+namespace half_float
+{
+	/// \anchor compop
+	/// \name Comparison operators
+	/// \{
+
+	/// Comparison for equality.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if operands equal
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y)
+	{
+		return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF));
+	}
+
+	/// Comparison for inequality.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if operands not equal
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y)
+	{
+		return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF));
+	}
+
+	/// Comparison for less than.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less than \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y)
+	{
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+
+	/// Comparison for greater than.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater than \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y)
+	{
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+
+	/// Comparison for less equal.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less equal \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y)
+	{
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+
+	/// Comparison for greater equal.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater equal \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y)
+	{
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+
+	/// \}
+	/// \anchor arithmetics
+	/// \name Arithmetic operators
+	/// \{
+
+	/// Identity.
+	/// \param arg operand
+	/// \return unchanged operand
+	inline HALF_CONSTEXPR half operator+(half arg) { return arg; }
+
+	/// Negation.
+	/// \param arg operand
+	/// \return negated operand
+	inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); }
+
+	/// Addition.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return sum of half expressions
+	/// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator+(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)+detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF;
+		bool sub = ((x.data_^y.data_)&0x8000) != 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ :
+										(sub && absx==0x7C00) ? detail::invalid() : y.data_);
+		if(!absx)
+			return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_));
+		if(!absy)
+			return x;
+		unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000;
+		if(absy > absx)
+			std::swap(absx, absy);
+		int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my;
+		if(d < 13)
+		{
+			my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3;
+			my = (my>>d) | ((my&((1<<d)-1))!=0);
+		}
+		else
+			my = 1;
+		if(sub)
+		{
+			if(!(mx-=my))
+				return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
+			for(; mx<0x2000 && exp>1; mx<<=1,--exp) ;
+		}
+		else
+		{
+			mx += my;
+			int i = mx >> 14;
+			if((exp+=i) > 30)
+				return half(detail::binary, detail::overflow<half::round_style>(sign));
+			mx = (mx>>i) | (mx&i);
+		}
+		return half(detail::binary, detail::rounded<half::round_style,false>(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0));
+	#endif
+	}
+
+	/// Subtraction.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return difference of half expressions
+	/// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator-(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)-detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		return x + -y;
+	#endif
+	}
+
+	/// Multiplication.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return product of half expressions
+	/// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator*(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)*detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00));
+		if(!absx || !absy)
+			return half(detail::binary, sign);
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
+		int i = m >> 21, s = m & i;
+		exp += (absx>>10) + (absy>>10) + i;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -11)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,20,false,false,false>(m>>i, exp, sign, s));
+	#endif
+	}
+
+	/// Division.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return quotient of half expressions
+	/// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN
+	/// \exception FE_DIVBYZERO if dividing finite value by 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator/(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)/detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0)));
+		if(!absx)
+			return half(detail::binary, absy ? sign : detail::invalid());
+		if(!absy)
+			return half(detail::binary, detail::pole(sign));
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,++exp) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+		int i = mx < my;
+		exp += (absx>>10) - (absy>>10) - i;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -11)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		mx <<= 12 + i;
+		my <<= 1;
+		return half(detail::binary, detail::fixed2half<half::round_style,11,false,false,false>(mx/my, exp, sign, mx%my!=0));
+	#endif
+	}
+
+	/// \}
+	/// \anchor streaming
+	/// \name Input and output
+	/// \{
+
+	/// Output operator.
+	///	This uses the built-in functionality for streaming out floating-point numbers.
+	/// \param out output stream to write into
+	/// \param arg half expression to write
+	/// \return reference to output stream
+	template<typename charT,typename traits> std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits> &out, half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return out << detail::half2float<detail::internal_t>(arg.data_);
+	#else
+		return out << detail::half2float<float>(arg.data_);
+	#endif
+	}
+
+	/// Input operator.
+	///	This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating 
+	/// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first 
+	/// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded 
+	/// to half-precision using the library's half-precision rounding mode.
+	/// \param in input stream to read from
+	/// \param arg half to read into
+	/// \return reference to input stream
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<typename charT,typename traits> std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits> &in, half &arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t f;
+	#else
+		double f;
+	#endif
+		if(in >> f)
+			arg.data_ = detail::float2half<half::round_style>(f);
+		return in;
+	}
+
+	/// \}
+	/// \anchor basic
+	/// \name Basic mathematical operations
+	/// \{
+
+	/// Absolute value.
+	/// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
+	/// \param arg operand
+	/// \return absolute value of \a arg
+	inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); }
+
+	/// Absolute value.
+	/// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
+	/// \param arg operand
+	/// \return absolute value of \a arg
+	inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); }
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half fmod(half x, half y)
+	{
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : x.data_);
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		if(!absx)
+			return x;
+		if(absx == absy)
+			return half(detail::binary, sign);
+		return half(detail::binary, sign|detail::mod<false,false>(absx, absy));
+	}
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half remainder(half x, half y)
+	{
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : x.data_);
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		if(absx == absy)
+			return half(detail::binary, sign);
+		return half(detail::binary, sign^detail::mod<false,true>(absx, absy));
+	}
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo).
+	/// \param x first operand
+	/// \param y second operand
+	/// \param quo address to store some bits of quotient at
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half remquo(half x, half y, int *quo)
+	{
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_));
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		bool qsign = ((value^y.data_)&0x8000) != 0;
+		int q = 1;
+		if(absx != absy)
+			value ^= detail::mod<true, true>(absx, absy, &q);
+		return *quo = qsign ? -q : q, half(detail::binary, value);
+	}
+
+	/// Fused multiply add.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma).
+	/// \param x first operand
+	/// \param y second operand
+	/// \param z third operand
+	/// \return ( \a x * \a y ) + \a z rounded as one operation.
+	/// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition
+	inline half fma(half x, half y, half z)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_), fz = detail::half2float<detail::internal_t>(z.data_);
+		#if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA
+			return half(detail::binary, detail::float2half<half::round_style>(std::fma(fx, fy, fz)));
+		#else
+			return half(detail::binary, detail::float2half<half::round_style>(fx*fy+fz));
+		#endif
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		bool sub = ((sign^z.data_)&0x8000) != 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
+			return	(absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) :
+					(absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) :
+					(absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z;
+		if(!absx || !absy)
+			return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign));
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
+		int i = m >> 21;
+		exp += (absx>>10) + (absy>>10) + i;
+		m <<= 3 - i;
+		if(absz)
+		{
+			int expz = 0;
+			for(; absz<0x400; absz<<=1,--expz) ;
+			expz += absz >> 10;
+			detail::uint32 mz = static_cast<detail::uint32>((absz&0x3FF)|0x400) << 13;
+			if(expz > exp || (expz == exp && mz > m))
+			{
+				std::swap(m, mz);
+				std::swap(exp, expz);
+				if(sub)
+					sign = z.data_ & 0x8000;
+			}
+			int d = exp - expz;
+			mz = (d<23) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+			if(sub)
+			{
+				m = m - mz;
+				if(!m)
+					return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
+				for(; m<0x800000; m<<=1,--exp) ;
+			}
+			else
+			{
+				m += mz;
+				i = m >> 24;
+				m = (m>>i) | (m&i);
+				exp += i;
+			}
+		}
+		if(exp > 30)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -10)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,23,false,false,false>(m, exp-1, sign));
+	#endif
+	}
+
+	/// Maximum of half expressions.
+	/// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return maximum of operands, ignoring quiet NaNs
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
+	{
+		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < 
+			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
+	}
+
+	/// Minimum of half expressions.
+	/// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return minimum of operands, ignoring quiet NaNs
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	inline HALF_CONSTEXPR_NOERR half fmin(half x, half y)
+	{
+		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) >
+			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
+	}
+
+	/// Positive difference.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return \a x - \a y or 0 if difference negative
+	/// \exception FE_... according to operator-(half,half)
+	inline half fdim(half x, half y)
+	{
+		if(isnan(x) || isnan(y))
+			return half(detail::binary, detail::signal(x.data_, y.data_));
+		return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y);
+	}
+
+	/// Get NaN value.
+	/// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan).
+	/// \param arg string code
+	/// \return quiet NaN
+	inline half nanh(const char *arg)
+	{
+		unsigned int value = 0x7FFF;
+		while(*arg)
+			value ^= static_cast<unsigned>(*arg++) & 0xFF;
+		return half(detail::binary, value);
+	}
+
+	/// \}
+	/// \anchor exponential
+	/// \name Exponential functions
+	/// \{
+
+	/// Exponential function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp).
+	/// \param arg function argument
+	/// \return e raised to \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half exp(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::exp(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
+		if(abs >= 0x4C80)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
+		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
+		if(e < 14)
+		{
+			exp = 0;
+			m >>= 14 - e;
+		}
+		else
+		{
+			exp = m >> (45-e);
+			m = (m<<(e-14)) & 0x7FFFFFFF;
+		}
+		return half(detail::binary, detail::exp2_post<half::round_style>(m, exp, (arg.data_&0x8000)!=0, 0, 26));
+	#endif
+	}
+
+	/// Binary exponential.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2).
+	/// \param arg function argument
+	/// \return 2 raised to \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half exp2(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::exp2(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
+		if(abs >= 0x4E40)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
+		return half(detail::binary, detail::exp2_post<half::round_style>(
+			(static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28));
+	#endif
+	}
+
+	/// Exponential minus one.
+	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` 
+	/// and in <1% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1).
+	/// \param arg function argument
+	/// \return e raised to \a arg and subtracted by 1
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half expm1(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::expm1(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_));
+		if(abs >= 0x4A00)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::rounded<half::round_style,true>(0xBBFF, 1, 1) : detail::overflow<half::round_style>());
+		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
+		if(e < 14)
+		{
+			exp = 0;
+			m >>= 14 - e;
+		}
+		else
+		{
+			exp = m >> (45-e);
+			m = (m<<(e-14)) & 0x7FFFFFFF;
+		}
+		m = detail::exp2(m);
+		if(sign)
+		{
+			int s = 0;
+			if(m > 0x80000000)
+			{
+				++exp;
+				m = detail::divide64(0x80000000, m, s);
+			}
+			m = 0x80000000 - ((m>>exp)|((m&((static_cast<detail::uint32>(1)<<exp)-1))!=0)|s);
+			exp = 0;
+		}
+		else
+			m -= (exp<31) ? (0x80000000>>exp) : 1;
+		for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>());
+		return half(detail::binary, detail::rounded<half::round_style,true>(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0));
+	#endif
+	}
+
+	/// Natural logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base e
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::log(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(
+			detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17));
+	#endif
+	}
+
+	/// Common logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base 10
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log10(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::log10(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		switch(abs)
+		{
+			case 0x4900: return half(detail::binary, 0x3C00);
+			case 0x5640: return half(detail::binary, 0x4000);
+			case 0x63D0: return half(detail::binary, 0x4200);
+			case 0x70E2: return half(detail::binary, 0x4400);
+		}
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		return half(detail::binary, detail::log2_post<half::round_style,0xD49A784C>(
+			detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16));
+	#endif
+	}
+
+	/// Binary logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base 2
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log2(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::log2(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15, s = 0;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		if(abs == 0x3C00)
+			return half(detail::binary, 0);
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += (abs>>10);
+		if(!(abs&0x3FF))
+		{
+			unsigned int value = static_cast<unsigned>(exp<0) << 15, m = std::abs(exp) << 6;
+			for(exp=18; m<0x400; m<<=1,--exp) ;
+			return half(detail::binary, value+(exp<<10)+m);
+		}
+		detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = 
+			(((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign;
+		if(!m)
+			return half(detail::binary, 0);
+		for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ;
+		for(; m>0xFFFFFFF; m>>=1,++exp)
+			s |= m & 1;
+		return half(detail::binary, detail::fixed2half<half::round_style,27,false,false,true>(m, exp, sign&0x8000, s));
+	#endif
+	}
+
+	/// Natural logarithm plus one.
+	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` 
+	/// and in ~1% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p).
+	/// \param arg function argument
+	/// \return logarithm of \a arg plus 1 to base e
+	/// \exception FE_INVALID for signaling NaN or argument <-1
+	/// \exception FE_DIVBYZERO for -1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log1p(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::log1p(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		if(arg.data_ >= 0xBC00)
+			return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 20;
+		if(arg.data_ & 0x8000)
+		{
+			m = 0x40000000 - (m>>-exp);
+			for(exp=0; m<0x40000000; m<<=1,--exp) ;
+		}
+		else
+		{
+			if(exp < 0)
+			{
+				m = 0x40000000 + (m>>-exp);
+				exp = 0;
+			}
+			else
+			{
+				m += 0x40000000 >> exp;
+				int i = m >> 31;
+				m >>= i;
+				exp += i;
+			}
+		}
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(detail::log2(m), exp, 17));
+	#endif
+	}
+
+	/// \}
+	/// \anchor power
+	/// \name Power functions
+	/// \{
+
+	/// Square root.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt).
+	/// \param arg function argument
+	/// \return square root of \a arg
+	/// \exception FE_INVALID for signaling NaN and negative arguments
+	/// \exception FE_INEXACT according to rounding
+	inline half sqrt(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 15;
+		if(!abs || arg.data_ >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_);
+		for(; abs<0x400; abs<<=1,--exp) ;
+		detail::uint32 r = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10);
+		return half(detail::binary, detail::rounded<half::round_style,false>((exp<<10)+(m&0x3FF), r>m, r!=0));
+	#endif
+	}
+
+	/// Inverse square root.
+	/// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing 
+	/// 1 / sqrt(\a arg) in half-precision, in addition to also being faster.
+	/// \param arg function argument
+	/// \return reciprocal of square root of \a arg
+	/// \exception FE_INVALID for signaling NaN and negative arguments
+	/// \exception FE_INEXACT according to rounding
+	inline half rsqrt(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::internal_t(1)/std::sqrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000;
+		if(!abs || arg.data_ >= 0x7C00)
+			return half(detail::binary,	(abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ?
+										detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0);
+		for(; abs<0x400; abs<<=1,bias-=0x400) ;
+		unsigned int frac = (abs+=bias) & 0x7FF;
+		if(frac == 0x400)
+			return half(detail::binary, 0x7A00-(abs>>1));
+		if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) ||
+		   (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B)))
+			return pow(arg, half(detail::binary, 0xB800));
+		detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my;
+		int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21;
+		for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ;
+		i = (my*=mz>>10) >> 31;
+		expy += i;
+		my = (my>>(20+i)) + 1;
+		i = (mz=my*my) >> 21;
+		for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ;
+		i = (my*=(mz>>10)+1) >> 31;
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,false,true>(my>>i, expy+i+14));
+	#endif
+	}
+
+	/// Cubic root.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt).
+	/// \param arg function argument
+	/// \return cubic root of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT according to rounding
+	inline half cbrt(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::cbrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs || abs == 0x3C00 || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1, --exp);
+		detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = 
+			(((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign;
+		for(exp=2; m<0x80000000; m<<=1,--exp) ;
+		m = detail::multiply64(m, 0xAAAAAAAB);
+		int i = m >> 31, s;
+		exp += i;
+		m <<= 1 - i;
+		if(exp < 0)
+		{
+			f = m >> -exp;
+			exp = 0;
+		}
+		else
+		{
+			f = (m<<exp) & 0x7FFFFFFF;
+			exp = m >> (31-exp);
+		}
+		m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26);
+		if(sign)
+		{
+			if(m > 0x80000000)
+			{
+				m = detail::divide64(0x80000000, m, s);
+				++exp;
+			}
+			exp = -exp;
+		}
+		return half(detail::binary, (half::round_style==std::round_to_nearest) ?
+			detail::fixed2half<half::round_style,31,false,false,false>(m, exp+14, arg.data_&0x8000) :
+			detail::fixed2half<half::round_style,23,false,false,false>((m+0x80)>>8, exp+14, arg.data_&0x8000));
+	#endif
+	}
+
+	/// Hypotenuse function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
+	/// \param x first argument
+	/// \param y second argument
+	/// \return square root of sum of squares without internal over- or underflows
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	inline half hypot(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_);
+		#if HALF_ENABLE_CPP11_CMATH
+			return half(detail::binary, detail::float2half<half::round_style>(std::hypot(fx, fy)));
+		#else
+			return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(fx*fx+fy*fy)));
+		#endif
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx==0x7C00) ? detail::select(0x7C00, y.data_) :
+				(absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_));
+		if(!absx)
+			return half(detail::binary, absy ? detail::check_underflow(absy) : 0);
+		if(!absy)
+			return half(detail::binary, detail::check_underflow(absx));
+		if(absy > absx)
+			std::swap(absx, absy);
+		for(; absx<0x400; absx<<=1,--expx) ;
+		for(; absy<0x400; absy<<=1,--expy) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+		mx *= mx;
+		my *= my;
+		int ix = mx >> 21, iy = my >> 21;
+		expx = 2*(expx+(absx>>10)) - 15 + ix;
+		expy = 2*(expy+(absy>>10)) - 15 + iy;
+		mx <<= 10 - ix;
+		my <<= 10 - iy;
+		int d = expx - expy;
+		my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		return half(detail::binary, detail::hypot_post<half::round_style>(mx+my, expx));
+	#endif
+	}
+
+	/// Hypotenuse function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
+	/// \param x first argument
+	/// \param y second argument
+	/// \param z third argument
+	/// \return square root of sum of squares without internal over- or underflows
+	/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	inline half hypot(half x, half y, half z)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_), fz = detail::half2float<detail::internal_t>(z.data_);
+		return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(fx*fx+fy*fy+fz*fz)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0;
+		if(!absx)
+			return hypot(y, z);
+		if(!absy)
+			return hypot(x, z);
+		if(!absz)
+			return hypot(x, y);
+		if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
+			return half(detail::binary,	(absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) :
+										(absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) :
+										(absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) :
+										detail::signal(x.data_, y.data_, z.data_));
+		if(absz > absy)
+			std::swap(absy, absz);
+		if(absy > absx)
+			std::swap(absx, absy);
+		if(absz > absy)
+			std::swap(absy, absz);
+		for(; absx<0x400; absx<<=1,--expx) ;
+		for(; absy<0x400; absy<<=1,--expy) ;
+		for(; absz<0x400; absz<<=1,--expz) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400;
+		mx *= mx;
+		my *= my;
+		mz *= mz;
+		int ix = mx >> 21, iy = my >> 21, iz = mz >> 21;
+		expx = 2*(expx+(absx>>10)) - 15 + ix;
+		expy = 2*(expy+(absy>>10)) - 15 + iy;
+		expz = 2*(expz+(absz>>10)) - 15 + iz;
+		mx <<= 10 - ix;
+		my <<= 10 - iy;
+		mz <<= 10 - iz;
+		int d = expy - expz;
+		mz = (d<30) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		my += mz;
+		if(my & 0x80000000)
+		{
+			my = (my>>1) | (my&1);
+			if(++expy > expx)
+			{
+				std::swap(mx, my);
+				std::swap(expx, expy);
+			}
+		}
+		d = expx - expy;
+		my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		return half(detail::binary, detail::hypot_post<half::round_style>(mx+my, expx));
+	#endif
+	}
+
+	/// Power function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs.
+	///
+	/// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow).
+	/// \param x base
+	/// \param y exponent
+	/// \return \a x raised to \a y
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral
+	/// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half pow(half x, half y)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::pow(detail::half2float<detail::internal_t>(x.data_), detail::half2float<detail::internal_t>(y.data_))));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15;
+		if(!absy || x.data_ == 0x3C00)
+			return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_));
+		bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1)));
+		unsigned int sign = x.data_ & (static_cast<unsigned>((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15);
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() :
+										(0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U))));
+		if(!absx)
+			return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign);
+		if((x.data_&0x8000) && !is_int)
+			return half(detail::binary, detail::invalid());
+		if(x.data_ == 0xBC00)
+			return half(detail::binary, sign|0x3C00);
+		switch(y.data_)
+		{
+			case 0x3800: return sqrt(x);
+			case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_));
+			case 0x4000: return x * x;
+			case 0xBC00: return half(detail::binary, 0x3C00) / x;
+		}
+		for(; absx<0x400; absx<<=1,--exp) ;
+		detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = 
+			(((ilog<<27)+((detail::log2(static_cast<detail::uint32>((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign;
+		for(exp=-11; m<0x80000000; m<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		m = detail::multiply64(m, static_cast<detail::uint32>((absy&0x3FF)|0x400)<<21);
+		int i = m >> 31;
+		exp += (absy>>10) + i;
+		m <<= 1 - i;
+		if(exp < 0)
+		{
+			f = m >> -exp;
+			exp = 0;
+		}
+		else
+		{
+			f = (m<<exp) & 0x7FFFFFFF;
+			exp = m >> (31-exp);
+		}
+		return half(detail::binary, detail::exp2_post<half::round_style>(f, exp, ((msign&1)^(y.data_>>15))!=0, sign));
+	#endif
+	}
+
+	/// \}
+	/// \anchor trigonometric
+	/// \name Trigonometric functions
+	/// \{
+
+	/// Compute sine and cosine simultaneously.
+	///	This returns the same results as sin() and cos() but is faster than calling each function individually.
+	///
+	/// This function is exact to rounding for all rounding modes.
+	/// \param arg function argument
+	/// \param sin variable to take sine of \a arg
+	/// \param cos variable to take cosine of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline void sincos(half arg, half *sin, half *cos)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t f = detail::half2float<detail::internal_t>(arg.data_);
+		*sin = half(detail::binary, detail::float2half<half::round_style>(std::sin(f)));
+		*cos = half(detail::binary, detail::float2half<half::round_style>(std::cos(f)));
+	#else
+		int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k;
+		if(abs >= 0x7C00)
+			*sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		else if(!abs)
+		{
+			*sin = arg;
+			*cos = half(detail::binary, 0x3C00);
+		}
+		else if(abs < 0x2500)
+		{
+			*sin = half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+			*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+		}
+		else
+		{
+			if(half::round_style != std::round_to_nearest)
+			{
+				switch(abs)
+				{
+				case 0x48B7:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0xBBFF, 1, 1));
+					return;
+				case 0x598C:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x80FC, 1, 1));
+					return;
+				case 0x6A64:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x27FF, 1, 1));
+					return;
+				case 0x6D8C:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+					return;
+				}
+			}
+			std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+			switch(k & 3)
+			{
+				case 1: sc = std::make_pair(sc.second, -sc.first); break;
+				case 2: sc = std::make_pair(-sc.first, -sc.second); break;
+				case 3: sc = std::make_pair(-sc.second, sc.first); break;
+			}
+			*sin = half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((sc.first^-static_cast<detail::uint32>(sign))+sign));
+			*cos = half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>(sc.second));
+		}
+	#endif
+	}
+
+	/// Sine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin).
+	/// \param arg function argument
+	/// \return sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half sin(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sin(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, k;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs)
+			{
+				case 0x48B7: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
+				case 0x6A64: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
+				case 0x6D8C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
+			}
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+		detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)&1)^(arg.data_>>15));
+		return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.second : sc.first)^sign) - sign));
+	#endif
+	}
+
+	/// Cosine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos).
+	/// \param arg function argument
+	/// \return cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half cos(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::cos(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, k;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2500)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+		if(half::round_style != std::round_to_nearest && abs == 0x598C)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x80FC, 1, 1));
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+		detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)^k)&1);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.first : sc.second)^sign) - sign));
+	#endif
+	}
+
+	/// Tangent function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan).
+	/// \param arg function argument
+	/// \return tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tan(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::tan(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 13, k;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs)
+			{
+				case 0x658C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x07E6, 1, 1));
+				case 0x7330: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x4B62, 1, 1));
+			}
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30);
+		if(k & 1)
+			sc = std::make_pair(-sc.second, sc.first);
+		detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second);
+		detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx;
+		for(; my<0x80000000; my<<=1,--exp) ;
+		for(; mx<0x80000000; mx<<=1,++exp) ;
+		return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp, (signy^signx^arg.data_)&0x8000));
+	#endif
+	}
+
+	/// Arc sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin).
+	/// \param arg function argument
+	/// \return arc sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half asin(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::asin(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(!abs)
+			return arg;
+		if(abs >= 0x3C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
+										detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1));
+		if(abs < 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3))
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_+1, 1, 1));
+		std::pair<detail::uint32,detail::uint32> sc = detail::atan2_args(abs);
+		detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,true,true>(m, 14, sign));
+	#endif
+	}
+
+	/// Arc cosine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos).
+	/// \param arg function argument
+	/// \return arc cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half acos(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::acos(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15;
+		if(!abs)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x3E48, 0, 1));
+		if(abs >= 0x3C00)
+			return half(detail::binary,	(abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
+										sign ? detail::rounded<half::round_style,true>(0x4248, 0, 1) : 0);
+		std::pair<detail::uint32,detail::uint32> cs = detail::atan2_args(abs);
+		detail::uint32 m = detail::atan2(cs.second, cs.first, 28);
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(sign ? (0xC90FDAA2-m) : m, 15, 0, sign));
+	#endif
+	}
+
+	/// Arc tangent function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan).
+	/// \param arg function argument
+	/// \return arc tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atan(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::atan(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1) : detail::signal(arg.data_));
+		if(abs <= 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		int exp = (abs>>10) + (abs<=0x3FF);
+		detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10);
+		detail::uint32 m = (exp>15) ?	detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) :
+										detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,true,true>(m, 14, sign));
+	#endif
+	}
+
+	/// Arc tangent function.
+	/// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, 
+	/// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2).
+	/// \param y numerator
+	/// \param x denominator
+	/// \return arc tangent value
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atan2(half y, half x)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::atan2(detail::half2float<detail::internal_t>(y.data_), detail::half2float<detail::internal_t>(x.data_))));
+	#else
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+		{
+			if(absx > 0x7C00 || absy > 0x7C00)
+				return half(detail::binary, detail::signal(x.data_, y.data_));
+			if(absy == 0x7C00)
+				return half(detail::binary, (absx<0x7C00) ?	detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1) :
+													signx ?	detail::rounded<half::round_style,true>(signy|0x40B6, 0, 1) :
+															detail::rounded<half::round_style,true>(signy|0x3A48, 0, 1));
+			return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
+		}
+		if(!absy)
+			return signx ? half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1)) : y;
+		if(!absx)
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
+		int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF);
+		if(d > (signx ? 18 : 12))
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
+		if(signx && d < -11)
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
+		if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9))
+		{
+			for(; absy<0x400; absy<<=1,--d) ;
+			detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800;
+			int i = my < mx;
+			d -= i;
+			if(d < -25)
+				return half(detail::binary, detail::underflow<half::round_style>(signy));
+			my <<= 11 + i;
+			return half(detail::binary, detail::fixed2half<half::round_style,11,false,false,true>(my/mx, d+14, signy, my%mx!=0));
+		}
+		detail::uint32 m = detail::atan2(	((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)),
+											((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1)));
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(signx ? (0xC90FDAA2-m) : m, 15, signy, signx));
+	#endif
+	}
+
+	/// \}
+	/// \anchor hyperbolic
+	/// \name Hyperbolic functions
+	/// \{
+
+	/// Hyperbolic sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh).
+	/// \param arg function argument
+	/// \return hyperbolic sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half sinh(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sinh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		if(abs <= 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27);
+		detail::uint32 m = mm.first - mm.second;
+		for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ;
+		unsigned int sign = arg.data_ & 0x8000;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,true>(m, exp, sign));
+	#endif
+	}
+
+	/// Hyperbolic cosine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh).
+	/// \param arg function argument
+	/// \return hyperbolic cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half cosh(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::cosh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00);
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26);
+		detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31;
+		m = (m>>i) | (m&i) | 0x80000000;
+		if((exp+=13+i) > 29)
+			return half(detail::binary, detail::overflow<half::round_style>());
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,true>(m, exp));
+	#endif
+	}
+
+	/// Hyperbolic tangent.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh).
+	/// \param arg function argument
+	/// \return hyperbolic tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tanh(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::tanh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000));
+		if(abs >= 0x4500)
+			return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest && abs == 0x2D3F)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-3, 0, 1));
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, 27);
+		detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31;
+		for(exp=13; my<0x80000000; my<<=1,--exp) ;
+		mx = (mx>>i) | 0x80000000;
+		return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp-i, arg.data_&0x8000));
+	#endif
+	}
+
+	/// Hyperbolic area sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh).
+	/// \param arg function argument
+	/// \return area sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half asinh(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::asinh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		if(abs <= 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs)
+			{
+				case 0x32D4: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-13, 1, 1));
+				case 0x3B5B: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-197, 1, 1));
+			}
+		return half(detail::binary, detail::area<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Hyperbolic area cosine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh).
+	/// \param arg function argument
+	/// \return area cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or arguments <1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half acosh(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::acosh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if((arg.data_&0x8000) || abs < 0x3C00)
+			return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs == 0x3C00)
+			return half(detail::binary, 0);
+		if(arg.data_ >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		return half(detail::binary, detail::area<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// Hyperbolic area tangent.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh).
+	/// \param arg function argument
+	/// \return area tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_DIVBYZERO for +/-1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atanh(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::atanh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 0;
+		if(!abs)
+			return arg;
+		if(abs >= 0x3C00)
+			return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m;
+		for(; mx<0x80000000; mx<<=1,++exp) ;
+		int i = my >= mx, s;
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(detail::log2(
+			(detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000));
+	#endif
+	}
+
+	/// \}
+	/// \anchor special
+	/// \name Error and gamma functions
+	/// \{
+
+	/// Error function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
+	///
+	/// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf).
+	/// \param arg function argument
+	/// \return error function value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half erf(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::erf(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(!abs || abs >= 0x7C00)
+			return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg;
+		if(abs >= 0x4200)
+			return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+		return half(detail::binary, detail::erf<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// Complementary error function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
+	///
+	/// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc).
+	/// \param arg function argument
+	/// \return 1 minus error function value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half erfc(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::erfc(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(abs >= 0x7C00)
+			return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x4400)
+			return half(detail::binary, detail::rounded<half::round_style,true>((sign>>1)-(sign>>15), sign>>15, 1));
+		return half(detail::binary, detail::erf<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Natural logarithm of gamma function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs.
+	///
+	/// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma).
+	/// \param arg function argument
+	/// \return natural logarith of gamma function for \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_DIVBYZERO for 0 or negative integer arguments
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half lgamma(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::lgamma(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
+		if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
+			return half(detail::binary, detail::pole());
+		if(arg.data_ == 0x3C00 || arg.data_ == 0x4000)
+			return half(detail::binary, 0);
+		return half(detail::binary, detail::gamma<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Gamma function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs.
+	///
+	/// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma).
+	/// \param arg function argument
+	/// \return gamma function value of \a arg
+	/// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tgamma(half arg)
+	{
+	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
+		return half(detail::binary, detail::float2half<half::round_style>(std::tgamma(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(!abs)
+			return half(detail::binary, detail::pole(arg.data_));
+		if(abs >= 0x7C00)
+			return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
+			return half(detail::binary, detail::invalid());
+		if(arg.data_ >= 0xCA80)
+			return half(detail::binary, detail::underflow<half::round_style>((1-((abs>>(25-(abs>>10)))&1))<<15));
+		if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000))
+			return half(detail::binary, detail::overflow<half::round_style>());
+		if(arg.data_ == 0x3C00)
+			return arg;
+		return half(detail::binary, detail::gamma<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// \}
+	/// \anchor rounding
+	/// \name Rounding
+	/// \{
+
+	/// Nearest integer not less than half value.
+	/// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil).
+	/// \param arg half to round
+	/// \return nearest integer not less than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half ceil(half arg) { return half(detail::binary, detail::integral<std::round_toward_infinity,true,true>(arg.data_)); }
+
+	/// Nearest integer not greater than half value.
+	/// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor).
+	/// \param arg half to round
+	/// \return nearest integer not greater than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half floor(half arg) { return half(detail::binary, detail::integral<std::round_toward_neg_infinity,true,true>(arg.data_)); }
+
+	/// Nearest integer not greater in magnitude than half value.
+	/// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc).
+	/// \param arg half to round
+	/// \return nearest integer not greater in magnitude than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half trunc(half arg) { return half(detail::binary, detail::integral<std::round_toward_zero,true,true>(arg.data_)); }
+
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half round(half arg) { return half(detail::binary, detail::integral<std::round_to_nearest,false,true>(arg.data_)); }
+
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID if value is not representable as `long`
+	inline long lround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half rint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,true>(arg.data_)); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID if value is not representable as `long`
+	/// \exception FE_INEXACT if value had to be rounded
+	inline long lrint(half arg) { return detail::half2int<half::round_style,true,true,long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID for signaling NaN
+	inline half nearbyint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,false>(arg.data_)); }
+#if HALF_ENABLE_CPP11_LONG_LONG
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID if value is not representable as `long long`
+	inline long long llround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID if value is not representable as `long long`
+	/// \exception FE_INEXACT if value had to be rounded
+	inline long long llrint(half arg) { return detail::half2int<half::round_style,true,true,long long>(arg.data_); }
+#endif
+
+	/// \}
+	/// \anchor float
+	/// \name Floating point manipulation
+	/// \{
+
+	/// Decompress floating-point number.
+	/// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp).
+	/// \param arg number to decompress
+	/// \param exp address to store exponent at
+	/// \return significant in range [0.5, 1)
+	/// \exception FE_INVALID for signaling NaN
+	inline half frexp(half arg, int *exp)
+	{
+		*exp = 0;
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(abs >= 0x7C00 || !abs)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--*exp) ;
+		*exp += (abs>>10) - 14;
+		return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF));
+	}
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half scalbln(half arg, long exp)
+	{
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(abs >= 0x7C00 || !abs)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		if(exp > 30)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -10)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		else if(exp > 0)
+			return half(detail::binary, sign|(exp<<10)|(abs&0x3FF));
+		unsigned int m = (abs&0x3FF) | 0x400;
+		return half(detail::binary, detail::rounded<half::round_style,false>(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0));
+	}
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half scalbn(half arg, int exp) { return scalbln(arg, exp); }
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half ldexp(half arg, int exp) { return scalbln(arg, exp); }
+
+	/// Extract integer and fractional parts.
+	/// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf).
+	/// \param arg number to decompress
+	/// \param iptr address to store integer part at
+	/// \return fractional part
+	/// \exception FE_INVALID for signaling NaN
+	inline half modf(half arg, half *iptr)
+	{
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(abs > 0x7C00)
+		{
+			arg = half(detail::binary, detail::signal(arg.data_));
+			return *iptr = arg, arg;
+		}
+		if(abs >= 0x6400)
+			return *iptr = arg, half(detail::binary, arg.data_&0x8000);
+		if(abs < 0x3C00)
+			return iptr->data_ = arg.data_ & 0x8000, arg;
+		unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask;
+		iptr->data_ = arg.data_ & ~mask;
+		if(!m)
+			return half(detail::binary, arg.data_&0x8000);
+		for(; m<0x400; m<<=1,--exp) ;
+		return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF));
+	}
+
+	/// Extract exponent.
+	/// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb).
+	/// \param arg number to query
+	/// \return floating-point exponent
+	/// \retval FP_ILOGB0 for zero
+	/// \retval FP_ILOGBNAN for NaN
+	/// \retval INT_MAX for infinity
+	/// \exception FE_INVALID for 0 or infinite values
+	inline int ilogb(half arg)
+	{
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs || abs >= 0x7C00)
+		{
+			detail::raise(FE_INVALID);
+			return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN;
+		}
+		for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
+		return exp;
+	}
+
+	/// Extract exponent.
+	/// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb).
+	/// \param arg number to query
+	/// \return floating-point exponent
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_DIVBYZERO for 0
+	inline half logb(half arg)
+	{
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
+		for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
+		unsigned int value = static_cast<unsigned>(exp<0) << 15;
+		if(exp)
+		{
+			unsigned int m = std::abs(exp) << 6;
+			for(exp=18; m<0x400; m<<=1,--exp) ;
+			value |= (exp<<10) + m;
+		}
+		return half(detail::binary, value);
+	}
+
+	/// Next representable value.
+	/// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter).
+	/// \param from value to compute next representable value for
+	/// \param to direction towards which to compute next value
+	/// \return next representable value after \a from in direction towards \a to
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW for infinite result from finite argument
+	/// \exception FE_UNDERFLOW for subnormal result
+	inline half nextafter(half from, half to)
+	{
+		int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
+		if(fabs > 0x7C00 || tabs > 0x7C00)
+			return half(detail::binary, detail::signal(from.data_, to.data_));
+		if(from.data_ == to.data_ || !(fabs|tabs))
+			return to;
+		if(!fabs)
+		{
+			detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT);
+			return half(detail::binary, (to.data_&0x8000)+1);
+		}
+		unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(
+			(from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1;
+		detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00);
+		detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400);
+		return half(detail::binary, out);
+	}
+
+	/// Next representable value.
+	/// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward).
+	/// \param from value to compute next representable value for
+	/// \param to direction towards which to compute next value
+	/// \return next representable value after \a from in direction towards \a to
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW for infinite result from finite argument
+	/// \exception FE_UNDERFLOW for subnormal result
+	inline half nexttoward(half from, long double to)
+	{
+		int fabs = from.data_ & 0x7FFF;
+		if(fabs > 0x7C00)
+			return half(detail::binary, detail::signal(from.data_));
+		long double lfrom = static_cast<long double>(from);
+		if(detail::builtin_isnan(to) || lfrom == to)
+			return half(static_cast<float>(to));
+		if(!fabs)
+		{
+			detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT);
+			return half(detail::binary, (static_cast<unsigned>(detail::builtin_signbit(to))<<15)+1);
+		}
+		unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(lfrom<to))<<1) - 1;
+		detail::raise(FE_OVERFLOW, (out&0x7FFF)==0x7C00);
+		detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7FFF)<0x400);
+		return half(detail::binary, out);
+	}
+
+	/// Take sign.
+	/// **See also:** Documentation for [std::copysign](https://en.cppreference.com/w/cpp/numeric/math/copysign).
+	/// \param x value to change sign for
+	/// \param y value to take sign from
+	/// \return value equal to \a x in magnitude and to \a y in sign
+	inline HALF_CONSTEXPR half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); }
+
+	/// \}
+	/// \anchor classification
+	/// \name Floating point classification
+	/// \{
+
+	/// Classify floating-point value.
+	/// **See also:** Documentation for [std::fpclassify](https://en.cppreference.com/w/cpp/numeric/math/fpclassify).
+	/// \param arg number to classify
+	/// \retval FP_ZERO for positive and negative zero
+	/// \retval FP_SUBNORMAL for subnormal numbers
+	/// \retval FP_INFINITY for positive and negative infinity
+	/// \retval FP_NAN for NaNs
+	/// \retval FP_NORMAL for all other (normal) values
+	inline HALF_CONSTEXPR int fpclassify(half arg)
+	{
+		return	!(arg.data_&0x7FFF) ? FP_ZERO :
+				((arg.data_&0x7FFF)<0x400) ? FP_SUBNORMAL :
+				((arg.data_&0x7FFF)<0x7C00) ? FP_NORMAL :
+				((arg.data_&0x7FFF)==0x7C00) ? FP_INFINITE :
+				FP_NAN;
+	}
+
+	/// Check if finite number.
+	/// **See also:** Documentation for [std::isfinite](https://en.cppreference.com/w/cpp/numeric/math/isfinite).
+	/// \param arg number to check
+	/// \retval true if neither infinity nor NaN
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
+
+	/// Check for infinity.
+	/// **See also:** Documentation for [std::isinf](https://en.cppreference.com/w/cpp/numeric/math/isinf).
+	/// \param arg number to check
+	/// \retval true for positive or negative infinity
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
+
+	/// Check for NaN.
+	/// **See also:** Documentation for [std::isnan](https://en.cppreference.com/w/cpp/numeric/math/isnan).
+	/// \param arg number to check
+	/// \retval true for NaNs
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
+
+	/// Check if normal number.
+	/// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal).
+	/// \param arg number to check
+	/// \retval true if normal number
+	/// \retval false if either subnormal, zero, infinity or NaN
+	inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
+
+	/// Check sign.
+	/// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit).
+	/// \param arg number to check
+	/// \retval true for negative number
+	/// \retval false for positive number
+	inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
+
+	/// \}
+	/// \anchor compfunc
+	/// \name Comparison
+	/// \{
+
+	/// Quiet comparison for greater than.
+	/// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater than \a y
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isgreater(half x, half y)
+	{
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for greater equal.
+	/// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater equal \a y
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isgreaterequal(half x, half y)
+	{
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for less than.
+	/// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less than \a y
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isless(half x, half y)
+	{
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for less equal.
+	/// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less equal \a y
+	/// \retval false else
+	inline HALF_CONSTEXPR bool islessequal(half x, half y)
+	{
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comarison for less or greater.
+	/// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if either less or greater
+	/// \retval false else
+	inline HALF_CONSTEXPR bool islessgreater(half x, half y)
+	{
+		return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet check if unordered.
+	/// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if unordered (one or two NaN operands)
+	/// \retval false else
+	inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
+
+	/// \}
+	/// \anchor casting
+	/// \name Casting
+	/// \{
+
+	/// Cast to or from half-precision floating-point number.
+	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 
+	/// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+	///
+	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 
+	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 
+	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
+	/// \tparam T destination type (half or built-in arithmetic type)
+	/// \tparam U source type (half or built-in arithmetic type)
+	/// \param arg value to cast
+	/// \return \a arg converted to destination type
+	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<typename T,typename U> T half_cast(U arg) { return detail::half_caster<T,U>::cast(arg); }
+
+	/// Cast to or from half-precision floating-point number.
+	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 
+	/// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+	///
+	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 
+	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 
+	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
+	/// \tparam T destination type (half or built-in arithmetic type)
+	/// \tparam R rounding mode to use.
+	/// \tparam U source type (half or built-in arithmetic type)
+	/// \param arg value to cast
+	/// \return \a arg converted to destination type
+	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<typename T,std::float_round_style R,typename U> T half_cast(U arg) { return detail::half_caster<T,U,R>::cast(arg); }
+	/// \}
+
+	/// \}
+	/// \anchor errors
+	/// \name Error handling
+	/// \{
+
+	/// Clear exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept).
+	/// \param excepts OR of exceptions to clear
+	/// \retval 0 all selected flags cleared successfully
+	inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; }
+
+	/// Test exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept).
+	/// \param excepts OR of exceptions to test
+	/// \return OR of selected exceptions if raised
+	inline int fetestexcept(int excepts) { return detail::errflags() & excepts; }
+
+	/// Raise exception flags.
+	/// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as 
+	/// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept).
+	/// \param excepts OR of exceptions to raise
+	/// \retval 0 all selected exceptions raised successfully
+	inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; }
+
+	/// Save exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
+	/// \param flagp adress to store flag state at
+	/// \param excepts OR of flags to save
+	/// \retval 0 for success
+	inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; }
+
+	/// Restore exception flags.
+	/// This only copies the specified exception state (including unset flags) without incurring any additional exception handling.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
+	/// \param flagp adress to take flag state from
+	/// \param excepts OR of flags to restore
+	/// \retval 0 for success
+	inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; }
+
+	/// Throw C++ exceptions based on set exception flags.
+	/// This function manually throws a corresponding C++ exception if one of the specified flags is set, 
+	/// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	/// \param excepts OR of exceptions to test
+	/// \param msg error message to use for exception description
+	/// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set
+	/// \throw std::overflow_error if `FE_OVERFLOW` is selected and set
+	/// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set
+	/// \throw std::range_error if `FE_INEXACT` is selected and set
+	inline void fethrowexcept(int excepts, const char *msg = "")
+	{
+		excepts &= detail::errflags();
+		if(excepts & (FE_INVALID|FE_DIVBYZERO))
+			throw std::domain_error(msg);
+		if(excepts & FE_OVERFLOW)
+			throw std::overflow_error(msg);
+		if(excepts & FE_UNDERFLOW)
+			throw std::underflow_error(msg);
+		if(excepts & FE_INEXACT)
+			throw std::range_error(msg);
+	}
+	/// \}
+}
+
+
+#undef HALF_UNUSED_NOERR
+#undef HALF_CONSTEXPR
+#undef HALF_CONSTEXPR_CONST
+#undef HALF_CONSTEXPR_NOERR
+#undef HALF_NOEXCEPT
+#undef HALF_NOTHROW
+#undef HALF_THREAD_LOCAL
+#undef HALF_TWOS_COMPLEMENT_INT
+#ifdef HALF_POP_WARNINGS
+	#pragma warning(pop)
+	#undef HALF_POP_WARNINGS
+#endif
+
+#endif
diff --git a/ext/miniz b/ext/miniz
index 1ff82be7d..35528ad76 160000
--- a/ext/miniz
+++ b/ext/miniz
@@ -1 +1 @@
-Subproject commit 1ff82be7d67f5c2f8b5497f538eea247861e0717
+Subproject commit 35528ad769143b9ed38a95a22d460b963e39f278
diff --git a/ext/stb/CMakeLists.txt b/ext/stb/CMakeLists.txt
index 70a3ab18d..bca64d93d 100644
--- a/ext/stb/CMakeLists.txt
+++ b/ext/stb/CMakeLists.txt
@@ -7,3 +7,7 @@ add_library(${PROJECT_NAME} INTERFACE
         "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image_resize2.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/stb_image_write.h")
+
+target_include_directories(${PROJECT_NAME} INTERFACE
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
diff --git a/ext/tinyexr/CMakeLists.txt b/ext/tinyexr/CMakeLists.txt
new file mode 100644
index 000000000..9a766886f
--- /dev/null
+++ b/ext/tinyexr/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.16)
+project(sourcepp_tinyexr)
+set(CMAKE_CXX_STANDARD 20)
+
+# Create library
+add_library(${PROJECT_NAME} INTERFACE
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/tinyexr.h")
+
+target_include_directories(${PROJECT_NAME} INTERFACE
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
diff --git a/ext/tinyexr/LICENSE b/ext/tinyexr/LICENSE
new file mode 100644
index 000000000..292ab32ec
--- /dev/null
+++ b/ext/tinyexr/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/ext/tinyexr/include/tinyexr.h b/ext/tinyexr/include/tinyexr.h
new file mode 100644
index 000000000..64ee67f2c
--- /dev/null
+++ b/ext/tinyexr/include/tinyexr.h
@@ -0,0 +1,9304 @@
+#ifndef TINYEXR_H_
+#define TINYEXR_H_
+/*
+Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Syoyo Fujita nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// TinyEXR contains some OpenEXR code, which is licensed under ------------
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
+// Digital Ltd. LLC
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *       Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *       Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// *       Neither the name of Industrial Light & Magic nor the names of
+// its contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// End of OpenEXR license -------------------------------------------------
+
+
+//
+//
+//   Do this:
+//    #define TINYEXR_IMPLEMENTATION
+//   before you include this file in *one* C or C++ file to create the
+//   implementation.
+//
+//   // i.e. it should look like this:
+//   #include ...
+//   #include ...
+//   #include ...
+//   #define TINYEXR_IMPLEMENTATION
+//   #include "tinyexr.h"
+//
+//
+
+#include <stddef.h>  // for size_t
+#include <stdint.h>  // guess stdint.h is available(C99)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
+    defined(__i386) || defined(__i486__) || defined(__i486) ||  \
+    defined(i386) || defined(__ia64__) || defined(__x86_64__)
+#define TINYEXR_X86_OR_X64_CPU 1
+#else
+#define TINYEXR_X86_OR_X64_CPU 0
+#endif
+
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || TINYEXR_X86_OR_X64_CPU
+#define TINYEXR_LITTLE_ENDIAN 1
+#else
+#define TINYEXR_LITTLE_ENDIAN 0
+#endif
+
+// Use miniz or not to decode ZIP format pixel. Linking with zlib
+// required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0.
+#ifndef TINYEXR_USE_MINIZ
+#define TINYEXR_USE_MINIZ (1)
+#endif
+
+// Use the ZIP implementation of stb_image.h and stb_image_write.h.
+#ifndef TINYEXR_USE_STB_ZLIB
+#define TINYEXR_USE_STB_ZLIB (0)
+#endif
+
+// Use nanozlib.
+#ifndef TINYEXR_USE_NANOZLIB
+#define TINYEXR_USE_NANOZLIB (0)
+#endif
+
+// Disable PIZ compression when applying cpplint.
+#ifndef TINYEXR_USE_PIZ
+#define TINYEXR_USE_PIZ (1)
+#endif
+
+#ifndef TINYEXR_USE_ZFP
+#define TINYEXR_USE_ZFP (0)  // TinyEXR extension.
+// http://computation.llnl.gov/projects/floating-point-compression
+#endif
+
+#ifndef TINYEXR_USE_THREAD
+#define TINYEXR_USE_THREAD (0)  // No threaded loading.
+// http://computation.llnl.gov/projects/floating-point-compression
+#endif
+
+#ifndef TINYEXR_USE_OPENMP
+#ifdef _OPENMP
+#define TINYEXR_USE_OPENMP (1)
+#else
+#define TINYEXR_USE_OPENMP (0)
+#endif
+#endif
+
+#define TINYEXR_SUCCESS (0)
+#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1)
+#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2)
+#define TINYEXR_ERROR_INVALID_ARGUMENT (-3)
+#define TINYEXR_ERROR_INVALID_DATA (-4)
+#define TINYEXR_ERROR_INVALID_FILE (-5)
+#define TINYEXR_ERROR_INVALID_PARAMETER (-6)
+#define TINYEXR_ERROR_CANT_OPEN_FILE (-7)
+#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8)
+#define TINYEXR_ERROR_INVALID_HEADER (-9)
+#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10)
+#define TINYEXR_ERROR_CANT_WRITE_FILE (-11)
+#define TINYEXR_ERROR_SERIALIZATION_FAILED (-12)
+#define TINYEXR_ERROR_LAYER_NOT_FOUND (-13)
+#define TINYEXR_ERROR_DATA_TOO_LARGE (-14)
+
+// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf }
+
+// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2
+#define TINYEXR_PIXELTYPE_UINT (0)
+#define TINYEXR_PIXELTYPE_HALF (1)
+#define TINYEXR_PIXELTYPE_FLOAT (2)
+
+#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024)
+#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128)
+
+#define TINYEXR_COMPRESSIONTYPE_NONE (0)
+#define TINYEXR_COMPRESSIONTYPE_RLE (1)
+#define TINYEXR_COMPRESSIONTYPE_ZIPS (2)
+#define TINYEXR_COMPRESSIONTYPE_ZIP (3)
+#define TINYEXR_COMPRESSIONTYPE_PIZ (4)
+#define TINYEXR_COMPRESSIONTYPE_ZFP (128)  // TinyEXR extension
+
+#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0)
+#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1)
+#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2)
+
+#define TINYEXR_TILE_ONE_LEVEL (0)
+#define TINYEXR_TILE_MIPMAP_LEVELS (1)
+#define TINYEXR_TILE_RIPMAP_LEVELS (2)
+
+#define TINYEXR_TILE_ROUND_DOWN (0)
+#define TINYEXR_TILE_ROUND_UP (1)
+
+typedef struct TEXRVersion {
+  int version;    // this must be 2
+  // tile format image;
+  // not zero for only a single-part "normal" tiled file (according to spec.)
+  int tiled;
+  int long_name;  // long name attribute
+  // deep image(EXR 2.0);
+  // for a multi-part file, indicates that at least one part is of type deep* (according to spec.)
+  int non_image;
+  int multipart;  // multi-part(EXR 2.0)
+} EXRVersion;
+
+typedef struct TEXRAttribute {
+  char name[256];  // name and type are up to 255 chars long.
+  char type[256];
+  unsigned char *value;  // uint8_t*
+  int size;
+  int pad0;
+} EXRAttribute;
+
+typedef struct TEXRChannelInfo {
+  char name[256];  // less than 255 bytes long
+  int pixel_type;
+  int x_sampling;
+  int y_sampling;
+  unsigned char p_linear;
+  unsigned char pad[3];
+} EXRChannelInfo;
+
+typedef struct TEXRTile {
+  int offset_x;
+  int offset_y;
+  int level_x;
+  int level_y;
+
+  int width;   // actual width in a tile.
+  int height;  // actual height int a tile.
+
+  unsigned char **images;  // image[channels][pixels]
+} EXRTile;
+
+typedef struct TEXRBox2i {
+  int min_x;
+  int min_y;
+  int max_x;
+  int max_y;
+} EXRBox2i;
+
+typedef struct TEXRHeader {
+  float pixel_aspect_ratio;
+  int line_order;
+  EXRBox2i data_window;
+  EXRBox2i display_window;
+  float screen_window_center[2];
+  float screen_window_width;
+
+  int chunk_count;
+
+  // Properties for tiled format(`tiledesc`).
+  int tiled;
+  int tile_size_x;
+  int tile_size_y;
+  int tile_level_mode;
+  int tile_rounding_mode;
+
+  int long_name;
+  // for a single-part file, agree with the version field bit 11
+  // for a multi-part file, it is consistent with the type of part
+  int non_image;
+  int multipart;
+  unsigned int header_len;
+
+  // Custom attributes(exludes required attributes(e.g. `channels`,
+  // `compression`, etc)
+  int num_custom_attributes;
+  EXRAttribute *custom_attributes;  // array of EXRAttribute. size =
+                                    // `num_custom_attributes`.
+
+  EXRChannelInfo *channels;  // [num_channels]
+
+  int *pixel_types;  // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for
+  // each channel. This is overwritten with `requested_pixel_types` when
+  // loading.
+  int num_channels;
+
+  int compression_type;        // compression type(TINYEXR_COMPRESSIONTYPE_*)
+  int *requested_pixel_types;  // Filled initially by
+                               // ParseEXRHeaderFrom(Meomory|File), then users
+                               // can edit it(only valid for HALF pixel type
+                               // channel)
+  // name attribute required for multipart files;
+  // must be unique and non empty (according to spec.);
+  // use EXRSetNameAttr for setting value;
+  // max 255 character allowed - excluding terminating zero
+  char name[256];
+} EXRHeader;
+
+typedef struct TEXRMultiPartHeader {
+  int num_headers;
+  EXRHeader *headers;
+
+} EXRMultiPartHeader;
+
+typedef struct TEXRImage {
+  EXRTile *tiles;  // Tiled pixel data. The application must reconstruct image
+                   // from tiles manually. NULL if scanline format.
+  struct TEXRImage* next_level; // NULL if scanline format or image is the last level.
+  int level_x; // x level index
+  int level_y; // y level index
+
+  unsigned char **images;  // image[channels][pixels]. NULL if tiled format.
+
+  int width;
+  int height;
+  int num_channels;
+
+  // Properties for tile format.
+  int num_tiles;
+
+} EXRImage;
+
+typedef struct TEXRMultiPartImage {
+  int num_images;
+  EXRImage *images;
+
+} EXRMultiPartImage;
+
+typedef struct TDeepImage {
+  const char **channel_names;
+  float ***image;      // image[channels][scanlines][samples]
+  int **offset_table;  // offset_table[scanline][offsets]
+  int num_channels;
+  int width;
+  int height;
+  int pad0;
+} DeepImage;
+
+// @deprecated { For backward compatibility. Not recommended to use. }
+// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel
+// alpha) or RGB(A) channels.
+// Application must free image data as returned by `out_rgba`
+// Result image format is: float x RGBA x width x hight
+// Returns negative value and may set error string in `err` when there's an
+// error
+extern int LoadEXR(float **out_rgba, int *width, int *height,
+                   const char *filename, const char **err);
+
+// Loads single-frame OpenEXR image by specifying layer name. Assume EXR image
+// contains A(single channel alpha) or RGB(A) channels. Application must free
+// image data as returned by `out_rgba` Result image format is: float x RGBA x
+// width x hight Returns negative value and may set error string in `err` when
+// there's an error When the specified layer name is not found in the EXR file,
+// the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`.
+extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
+                            const char *filename, const char *layer_name,
+                            const char **err);
+
+//
+// Get layer infos from EXR file.
+//
+// @param[out] layer_names List of layer names. Application must free memory
+// after using this.
+// @param[out] num_layers The number of layers
+// @param[out] err Error string(will be filled when the function returns error
+// code). Free it using FreeEXRErrorMessage after using this value.
+//
+// @return TINYEXR_SUCCEES upon success.
+//
+extern int EXRLayers(const char *filename, const char **layer_names[],
+                     int *num_layers, const char **err);
+
+// @deprecated
+// Simple wrapper API for ParseEXRHeaderFromFile.
+// checking given file is a EXR file(by just look up header)
+// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for
+// others
+extern int IsEXR(const char *filename);
+
+// Simple wrapper API for ParseEXRHeaderFromMemory.
+// Check if given data is a EXR image(by just looking up a header section)
+// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for
+// others
+extern int IsEXRFromMemory(const unsigned char *memory, size_t size);
+
+// @deprecated
+// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels.
+// components must be 1(Grayscale), 3(RGB) or 4(RGBA).
+// Input image format is: `float x width x height`, or `float x RGB(A) x width x
+// hight`
+// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero
+// value.
+// Save image as fp32(FLOAT) format when `save_as_fp16` is 0.
+// Use ZIP compression by default.
+// `buffer` is the pointer to write EXR data.
+// Memory for `buffer` is allocated internally in SaveEXRToMemory.
+// Returns the data size of EXR file when the value is positive(up to 2GB EXR data).
+// Returns negative value and may set error string in `err` when there's an
+// error
+extern int SaveEXRToMemory(const float *data, const int width, const int height,
+                   const int components, const int save_as_fp16,
+                   unsigned char **buffer, const char **err);
+
+// @deprecated { Not recommended, but handy to use. }
+// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels.
+// components must be 1(Grayscale), 3(RGB) or 4(RGBA).
+// Input image format is: `float x width x height`, or `float x RGB(A) x width x
+// hight`
+// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero
+// value.
+// Save image as fp32(FLOAT) format when `save_as_fp16` is 0.
+// Use ZIP compression by default.
+// Returns TINYEXR_SUCCEES(0) when success.
+// Returns negative value and may set error string in `err` when there's an
+// error
+extern int SaveEXR(const float *data, const int width, const int height,
+                   const int components, const int save_as_fp16,
+                   const char *filename, const char **err);
+
+// Returns the number of resolution levels of the image (including the base)
+extern int EXRNumLevels(const EXRImage* exr_image);
+
+// Initialize EXRHeader struct
+extern void InitEXRHeader(EXRHeader *exr_header);
+
+// Set name attribute of EXRHeader struct (it makes a copy)
+extern void EXRSetNameAttr(EXRHeader *exr_header, const char* name);
+
+// Initialize EXRImage struct
+extern void InitEXRImage(EXRImage *exr_image);
+
+// Frees internal data of EXRHeader struct
+extern int FreeEXRHeader(EXRHeader *exr_header);
+
+// Frees internal data of EXRImage struct
+extern int FreeEXRImage(EXRImage *exr_image);
+
+// Frees error message
+extern void FreeEXRErrorMessage(const char *msg);
+
+// Parse EXR version header of a file.
+extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename);
+
+// Parse EXR version header from memory-mapped EXR data.
+extern int ParseEXRVersionFromMemory(EXRVersion *version,
+                                     const unsigned char *memory, size_t size);
+
+// Parse single-part OpenEXR header from a file and initialize `EXRHeader`.
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version,
+                                  const char *filename, const char **err);
+
+// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`.
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int ParseEXRHeaderFromMemory(EXRHeader *header,
+                                    const EXRVersion *version,
+                                    const unsigned char *memory, size_t size,
+                                    const char **err);
+
+// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*`
+// array.
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers,
+                                           int *num_headers,
+                                           const EXRVersion *version,
+                                           const char *filename,
+                                           const char **err);
+
+// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*`
+// array
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers,
+                                             int *num_headers,
+                                             const EXRVersion *version,
+                                             const unsigned char *memory,
+                                             size_t size, const char **err);
+
+// Loads single-part OpenEXR image from a file.
+// Application must setup `ParseEXRHeaderFromFile` before calling this function.
+// Application can free EXRImage using `FreeEXRImage`
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header,
+                                const char *filename, const char **err);
+
+// Loads single-part OpenEXR image from a memory.
+// Application must setup `EXRHeader` with
+// `ParseEXRHeaderFromMemory` before calling this function.
+// Application can free EXRImage using `FreeEXRImage`
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header,
+                                  const unsigned char *memory,
+                                  const size_t size, const char **err);
+
+// Loads multi-part OpenEXR image from a file.
+// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this
+// function.
+// Application can free EXRImage using `FreeEXRImage`
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadEXRMultipartImageFromFile(EXRImage *images,
+                                         const EXRHeader **headers,
+                                         unsigned int num_parts,
+                                         const char *filename,
+                                         const char **err);
+
+// Loads multi-part OpenEXR image from a memory.
+// Application must setup `EXRHeader*` array with
+// `ParseEXRMultipartHeaderFromMemory` before calling this function.
+// Application can free EXRImage using `FreeEXRImage`
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadEXRMultipartImageFromMemory(EXRImage *images,
+                                           const EXRHeader **headers,
+                                           unsigned int num_parts,
+                                           const unsigned char *memory,
+                                           const size_t size, const char **err);
+
+// Saves multi-channel, single-frame OpenEXR image to a file.
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int SaveEXRImageToFile(const EXRImage *image,
+                              const EXRHeader *exr_header, const char *filename,
+                              const char **err);
+
+// Saves multi-channel, single-frame OpenEXR image to a memory.
+// Image is compressed using EXRImage.compression value.
+// Return the number of bytes if success.
+// Return zero and will set error string in `err` when there's an
+// error.
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern size_t SaveEXRImageToMemory(const EXRImage *image,
+                                   const EXRHeader *exr_header,
+                                   unsigned char **memory, const char **err);
+
+// Saves multi-channel, multi-frame OpenEXR image to a memory.
+// Image is compressed using EXRImage.compression value.
+// File global attributes (eg. display_window) must be set in the first header.
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int SaveEXRMultipartImageToFile(const EXRImage *images,
+                                       const EXRHeader **exr_headers,
+                                       unsigned int num_parts,
+                                       const char *filename, const char **err);
+
+// Saves multi-channel, multi-frame OpenEXR image to a memory.
+// Image is compressed using EXRImage.compression value.
+// File global attributes (eg. display_window) must be set in the first header.
+// Return the number of bytes if success.
+// Return zero and will set error string in `err` when there's an
+// error.
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern size_t SaveEXRMultipartImageToMemory(const EXRImage *images,
+                                            const EXRHeader **exr_headers,
+                                            unsigned int num_parts,
+                                            unsigned char **memory, const char **err);
+// Loads single-frame OpenEXR deep image.
+// Application must free memory of variables in DeepImage(image, offset_table)
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadDeepEXR(DeepImage *out_image, const char *filename,
+                       const char **err);
+
+// NOT YET IMPLEMENTED:
+// Saves single-frame OpenEXR deep image.
+// Returns negative value and may set error string in `err` when there's an
+// error
+// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename,
+//                       const char **err);
+
+// NOT YET IMPLEMENTED:
+// Loads multi-part OpenEXR deep image.
+// Application must free memory of variables in DeepImage(image, offset_table)
+// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const
+// char *filename,
+//                       const char **err);
+
+// For emscripten.
+// Loads single-frame OpenEXR image from memory. Assume EXR image contains
+// RGB(A) channels.
+// Returns negative value and may set error string in `err` when there's an
+// error
+// When there was an error message, Application must free `err` with
+// FreeEXRErrorMessage()
+extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
+                             const unsigned char *memory, size_t size,
+                             const char **err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TINYEXR_H_
+
+#ifdef TINYEXR_IMPLEMENTATION
+#ifndef TINYEXR_IMPLEMENTATION_DEFINED
+#define TINYEXR_IMPLEMENTATION_DEFINED
+
+#ifdef _WIN32
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>  // for UTF-8 and memory-mapping
+
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)
+#define TINYEXR_USE_WIN32_MMAP (1)
+#endif
+
+#elif defined(__linux__) || defined(__unix__)
+#include <fcntl.h>     // for open()
+#include <sys/mman.h>  // for memory-mapping
+#include <sys/stat.h>  // for stat
+#include <unistd.h>    // for close()
+#define TINYEXR_USE_POSIX_MMAP (1)
+#endif
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <sstream>
+
+//#include <iostream> // debug
+
+#include <limits>
+#include <string>
+#include <vector>
+#include <set>
+
+// https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support
+#if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900)
+#define TINYEXR_HAS_CXX11 (1)
+// C++11
+#include <cstdint>
+
+#if TINYEXR_USE_THREAD
+#include <atomic>
+#include <thread>
+#endif
+
+#else  // __cplusplus > 199711L
+#define TINYEXR_HAS_CXX11 (0)
+#endif  // __cplusplus > 199711L
+
+#if TINYEXR_USE_OPENMP
+#include <omp.h>
+#endif
+
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+#include <miniz.h>
+#else
+//  Issue #46. Please include your own zlib-compatible API header before
+//  including `tinyexr.h`
+//#include "zlib.h"
+#endif
+
+#if defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+#define NANOZLIB_IMPLEMENTATION
+#include "nanozlib.h"
+#endif
+
+#if TINYEXR_USE_STB_ZLIB
+// Since we don't know where a project has stb_image.h and stb_image_write.h
+// and whether they are in the include path, we don't include them here, and
+// instead declare the two relevant functions manually.
+// from stb_image.h:
+extern "C" int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+// from stb_image_write.h:
+extern "C" unsigned char *stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality);
+#endif
+
+
+#if TINYEXR_USE_ZFP
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#endif
+
+#include "zfp.h"
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#endif
+
+// cond: conditional expression
+// msg: std::string
+// err: std::string*
+#define TINYEXR_CHECK_AND_RETURN_MSG(cond, msg, err) do { \
+  if (!(cond)) { \
+    if (!err) { \
+      std::ostringstream ss_e; \
+      ss_e << __func__ << "():" << __LINE__ << msg << "\n"; \
+      (*err) += ss_e.str(); \
+    } \
+    return false;\
+  } \
+  } while(0)
+
+// no error message.
+#define TINYEXR_CHECK_AND_RETURN_C(cond, retcode) do { \
+  if (!(cond)) { \
+    return retcode; \
+  } \
+  } while(0)
+
+namespace tinyexr {
+
+#if __cplusplus > 199711L
+// C++11
+typedef uint64_t tinyexr_uint64;
+typedef int64_t tinyexr_int64;
+#else
+// Although `long long` is not a standard type pre C++11, assume it is defined
+// as a compiler's extension.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++11-long-long"
+#endif
+typedef unsigned long long tinyexr_uint64;
+typedef long long tinyexr_int64;
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+#endif
+
+// static bool IsBigEndian(void) {
+//  union {
+//    unsigned int i;
+//    char c[4];
+//  } bint = {0x01020304};
+//
+//  return bint.c[0] == 1;
+//}
+
+static void SetErrorMessage(const std::string &msg, const char **err) {
+  if (err) {
+#ifdef _WIN32
+    (*err) = _strdup(msg.c_str());
+#else
+    (*err) = strdup(msg.c_str());
+#endif
+  }
+}
+
+#if 0
+static void SetWarningMessage(const std::string &msg, const char **warn) {
+  if (warn) {
+#ifdef _WIN32
+    (*warn) = _strdup(msg.c_str());
+#else
+    (*warn) = strdup(msg.c_str());
+#endif
+  }
+}
+#endif
+
+static const int kEXRVersionSize = 8;
+
+static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
+  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
+  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
+
+  dst[0] = src[0];
+  dst[1] = src[1];
+}
+
+static void swap2(unsigned short *val) {
+#if TINYEXR_LITTLE_ENDIAN
+  (void)val;
+#else
+  unsigned short tmp = *val;
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[1];
+  dst[1] = src[0];
+#endif
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+static void cpy4(int *dst_val, const int *src_val) {
+  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
+  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
+
+  dst[0] = src[0];
+  dst[1] = src[1];
+  dst[2] = src[2];
+  dst[3] = src[3];
+}
+
+static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
+  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
+  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
+
+  dst[0] = src[0];
+  dst[1] = src[1];
+  dst[2] = src[2];
+  dst[3] = src[3];
+}
+
+static void cpy4(float *dst_val, const float *src_val) {
+  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
+  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
+
+  dst[0] = src[0];
+  dst[1] = src[1];
+  dst[2] = src[2];
+  dst[3] = src[3];
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+static void swap4(unsigned int *val) {
+#if TINYEXR_LITTLE_ENDIAN
+  (void)val;
+#else
+  unsigned int tmp = *val;
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[3];
+  dst[1] = src[2];
+  dst[2] = src[1];
+  dst[3] = src[0];
+#endif
+}
+
+static void swap4(int *val) {
+#if TINYEXR_LITTLE_ENDIAN
+  (void)val;
+#else
+  int tmp = *val;
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[3];
+  dst[1] = src[2];
+  dst[2] = src[1];
+  dst[3] = src[0];
+#endif
+}
+
+static void swap4(float *val) {
+#if TINYEXR_LITTLE_ENDIAN
+  (void)val;
+#else
+  float tmp = *val;
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[3];
+  dst[1] = src[2];
+  dst[2] = src[1];
+  dst[3] = src[0];
+#endif
+}
+
+#if 0
+static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
+  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
+  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
+
+  dst[0] = src[0];
+  dst[1] = src[1];
+  dst[2] = src[2];
+  dst[3] = src[3];
+  dst[4] = src[4];
+  dst[5] = src[5];
+  dst[6] = src[6];
+  dst[7] = src[7];
+}
+#endif
+
+static void swap8(tinyexr::tinyexr_uint64 *val) {
+#if TINYEXR_LITTLE_ENDIAN
+  (void)val;
+#else
+  tinyexr::tinyexr_uint64 tmp = (*val);
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[7];
+  dst[1] = src[6];
+  dst[2] = src[5];
+  dst[3] = src[4];
+  dst[4] = src[3];
+  dst[5] = src[2];
+  dst[6] = src[1];
+  dst[7] = src[0];
+#endif
+}
+
+// https://gist.github.com/rygorous/2156668
+union FP32 {
+  unsigned int u;
+  float f;
+  struct {
+#if TINYEXR_LITTLE_ENDIAN
+    unsigned int Mantissa : 23;
+    unsigned int Exponent : 8;
+    unsigned int Sign : 1;
+#else
+    unsigned int Sign : 1;
+    unsigned int Exponent : 8;
+    unsigned int Mantissa : 23;
+#endif
+  } s;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+union FP16 {
+  unsigned short u;
+  struct {
+#if TINYEXR_LITTLE_ENDIAN
+    unsigned int Mantissa : 10;
+    unsigned int Exponent : 5;
+    unsigned int Sign : 1;
+#else
+    unsigned int Sign : 1;
+    unsigned int Exponent : 5;
+    unsigned int Mantissa : 10;
+#endif
+  } s;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+static FP32 half_to_float(FP16 h) {
+  static const FP32 magic = {113 << 23};
+  static const unsigned int shifted_exp = 0x7c00
+                                          << 13;  // exponent mask after shift
+  FP32 o;
+
+  o.u = (h.u & 0x7fffU) << 13U;           // exponent/mantissa bits
+  unsigned int exp_ = shifted_exp & o.u;  // just the exponent
+  o.u += (127 - 15) << 23;                // exponent adjust
+
+  // handle exponent special cases
+  if (exp_ == shifted_exp)    // Inf/NaN?
+    o.u += (128 - 16) << 23;  // extra exp adjust
+  else if (exp_ == 0)         // Zero/Denormal?
+  {
+    o.u += 1 << 23;  // extra exp adjust
+    o.f -= magic.f;  // renormalize
+  }
+
+  o.u |= (h.u & 0x8000U) << 16U;  // sign bit
+  return o;
+}
+
+static FP16 float_to_half_full(FP32 f) {
+  FP16 o = {0};
+
+  // Based on ISPC reference code (with minor modifications)
+  if (f.s.Exponent == 0)  // Signed zero/denormal (which will underflow)
+    o.s.Exponent = 0;
+  else if (f.s.Exponent == 255)  // Inf or NaN (all exponent bits set)
+  {
+    o.s.Exponent = 31;
+    o.s.Mantissa = f.s.Mantissa ? 0x200 : 0;  // NaN->qNaN and Inf->Inf
+  } else                                      // Normalized number
+  {
+    // Exponent unbias the single, then bias the halfp
+    int newexp = f.s.Exponent - 127 + 15;
+    if (newexp >= 31)  // Overflow, return signed infinity
+      o.s.Exponent = 31;
+    else if (newexp <= 0)  // Underflow
+    {
+      if ((14 - newexp) <= 24)  // Mantissa might be non-zero
+      {
+        unsigned int mant = f.s.Mantissa | 0x800000;  // Hidden 1 bit
+        o.s.Mantissa = mant >> (14 - newexp);
+        if ((mant >> (13 - newexp)) & 1)  // Check for rounding
+          o.u++;  // Round, might overflow into exp bit, but this is OK
+      }
+    } else {
+      o.s.Exponent = static_cast<unsigned int>(newexp);
+      o.s.Mantissa = f.s.Mantissa >> 13;
+      if (f.s.Mantissa & 0x1000)  // Check for rounding
+        o.u++;                    // Round, might overflow to inf, this is OK
+    }
+  }
+
+  o.s.Sign = f.s.Sign;
+  return o;
+}
+
+// NOTE: From OpenEXR code
+// #define IMF_INCREASING_Y  0
+// #define IMF_DECREASING_Y  1
+// #define IMF_RAMDOM_Y    2
+//
+// #define IMF_NO_COMPRESSION  0
+// #define IMF_RLE_COMPRESSION 1
+// #define IMF_ZIPS_COMPRESSION  2
+// #define IMF_ZIP_COMPRESSION 3
+// #define IMF_PIZ_COMPRESSION 4
+// #define IMF_PXR24_COMPRESSION 5
+// #define IMF_B44_COMPRESSION 6
+// #define IMF_B44A_COMPRESSION  7
+
+#ifdef __clang__
+#pragma clang diagnostic push
+
+#if __has_warning("-Wzero-as-null-pointer-constant")
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+
+#endif
+
+static const char *ReadString(std::string *s, const char *ptr, size_t len) {
+  // Read untile NULL(\0).
+  const char *p = ptr;
+  const char *q = ptr;
+  while ((size_t(q - ptr) < len) && (*q) != 0) {
+    q++;
+  }
+
+  if (size_t(q - ptr) >= len) {
+    (*s).clear();
+    return NULL;
+  }
+
+  (*s) = std::string(p, q);
+
+  return q + 1;  // skip '\0'
+}
+
+static bool ReadAttribute(std::string *name, std::string *type,
+                          std::vector<unsigned char> *data, size_t *marker_size,
+                          const char *marker, size_t size) {
+  size_t name_len = strnlen(marker, size);
+  if (name_len == size) {
+    // String does not have a terminating character.
+    return false;
+  }
+  *name = std::string(marker, name_len);
+
+  marker += name_len + 1;
+  size -= name_len + 1;
+
+  size_t type_len = strnlen(marker, size);
+  if (type_len == size) {
+    return false;
+  }
+  *type = std::string(marker, type_len);
+
+  marker += type_len + 1;
+  size -= type_len + 1;
+
+  if (size < sizeof(uint32_t)) {
+    return false;
+  }
+
+  uint32_t data_len;
+  memcpy(&data_len, marker, sizeof(uint32_t));
+  tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
+
+  if (data_len == 0) {
+    if ((*type).compare("string") == 0) {
+      // Accept empty string attribute.
+
+      marker += sizeof(uint32_t);
+      size -= sizeof(uint32_t);
+
+      *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t);
+
+      data->resize(1);
+      (*data)[0] = '\0';
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  marker += sizeof(uint32_t);
+  size -= sizeof(uint32_t);
+
+  if (size < data_len) {
+    return false;
+  }
+
+  data->resize(static_cast<size_t>(data_len));
+  memcpy(&data->at(0), marker, static_cast<size_t>(data_len));
+
+  *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len;
+  return true;
+}
+
+static void WriteAttributeToMemory(std::vector<unsigned char> *out,
+                                   const char *name, const char *type,
+                                   const unsigned char *data, int len) {
+  out->insert(out->end(), name, name + strlen(name) + 1);
+  out->insert(out->end(), type, type + strlen(type) + 1);
+
+  int outLen = len;
+  tinyexr::swap4(&outLen);
+  out->insert(out->end(), reinterpret_cast<unsigned char *>(&outLen),
+              reinterpret_cast<unsigned char *>(&outLen) + sizeof(int));
+  out->insert(out->end(), data, data + len);
+}
+
+typedef struct TChannelInfo {
+  std::string name;  // less than 255 bytes long
+  int pixel_type;
+  int requested_pixel_type;
+  int x_sampling;
+  int y_sampling;
+  unsigned char p_linear;
+  unsigned char pad[3];
+} ChannelInfo;
+
+typedef struct {
+  int min_x;
+  int min_y;
+  int max_x;
+  int max_y;
+} Box2iInfo;
+
+struct HeaderInfo {
+  std::vector<tinyexr::ChannelInfo> channels;
+  std::vector<EXRAttribute> attributes;
+
+  Box2iInfo data_window;
+  int line_order;
+  Box2iInfo display_window;
+  float screen_window_center[2];
+  float screen_window_width;
+  float pixel_aspect_ratio;
+
+  int chunk_count;
+
+  // Tiled format
+  int tiled; // Non-zero if the part is tiled.
+  int tile_size_x;
+  int tile_size_y;
+  int tile_level_mode;
+  int tile_rounding_mode;
+
+  unsigned int header_len;
+
+  int compression_type;
+
+  // required for multi-part or non-image files
+  std::string name;
+  // required for multi-part or non-image files
+  std::string type;
+
+  void clear() {
+    channels.clear();
+    attributes.clear();
+
+    data_window.min_x = 0;
+    data_window.min_y = 0;
+    data_window.max_x = 0;
+    data_window.max_y = 0;
+    line_order = 0;
+    display_window.min_x = 0;
+    display_window.min_y = 0;
+    display_window.max_x = 0;
+    display_window.max_y = 0;
+    screen_window_center[0] = 0.0f;
+    screen_window_center[1] = 0.0f;
+    screen_window_width = 0.0f;
+    pixel_aspect_ratio = 0.0f;
+
+    chunk_count = 0;
+
+    // Tiled format
+    tiled = 0;
+    tile_size_x = 0;
+    tile_size_y = 0;
+    tile_level_mode = 0;
+    tile_rounding_mode = 0;
+
+    header_len = 0;
+    compression_type = 0;
+
+    name.clear();
+    type.clear();
+  }
+};
+
+static bool ReadChannelInfo(std::vector<ChannelInfo> &channels,
+                            const std::vector<unsigned char> &data) {
+  const char *p = reinterpret_cast<const char *>(&data.at(0));
+
+  for (;;) {
+    if ((*p) == 0) {
+      break;
+    }
+    ChannelInfo info;
+    info.requested_pixel_type = 0;
+
+    tinyexr_int64 data_len = static_cast<tinyexr_int64>(data.size()) -
+                             (p - reinterpret_cast<const char *>(data.data()));
+    if (data_len < 0) {
+      return false;
+    }
+
+    p = ReadString(&info.name, p, size_t(data_len));
+    if ((p == NULL) && (info.name.empty())) {
+      // Buffer overrun. Issue #51.
+      return false;
+    }
+
+    const unsigned char *data_end =
+        reinterpret_cast<const unsigned char *>(p) + 16;
+    if (data_end >= (data.data() + data.size())) {
+      return false;
+    }
+
+    memcpy(&info.pixel_type, p, sizeof(int));
+    p += 4;
+    info.p_linear = static_cast<unsigned char>(p[0]);  // uchar
+    p += 1 + 3;                                        // reserved: uchar[3]
+    memcpy(&info.x_sampling, p, sizeof(int));          // int
+    p += 4;
+    memcpy(&info.y_sampling, p, sizeof(int));  // int
+    p += 4;
+
+    tinyexr::swap4(&info.pixel_type);
+    tinyexr::swap4(&info.x_sampling);
+    tinyexr::swap4(&info.y_sampling);
+
+    channels.push_back(info);
+  }
+
+  return true;
+}
+
+static void WriteChannelInfo(std::vector<unsigned char> &data,
+                             const std::vector<ChannelInfo> &channels) {
+  size_t sz = 0;
+
+  // Calculate total size.
+  for (size_t c = 0; c < channels.size(); c++) {
+    sz += channels[c].name.length() + 1;  // +1 for \0
+    sz += 16;                                    // 4 * int
+  }
+  data.resize(sz + 1);
+
+  unsigned char *p = &data.at(0);
+
+  for (size_t c = 0; c < channels.size(); c++) {
+    memcpy(p, channels[c].name.c_str(), channels[c].name.length());
+    p += channels[c].name.length();
+    (*p) = '\0';
+    p++;
+
+    int pixel_type = channels[c].requested_pixel_type;
+    int x_sampling = channels[c].x_sampling;
+    int y_sampling = channels[c].y_sampling;
+    tinyexr::swap4(&pixel_type);
+    tinyexr::swap4(&x_sampling);
+    tinyexr::swap4(&y_sampling);
+
+    memcpy(p, &pixel_type, sizeof(int));
+    p += sizeof(int);
+
+    (*p) = channels[c].p_linear;
+    p += 4;
+
+    memcpy(p, &x_sampling, sizeof(int));
+    p += sizeof(int);
+
+    memcpy(p, &y_sampling, sizeof(int));
+    p += sizeof(int);
+  }
+
+  (*p) = '\0';
+}
+
+static bool CompressZip(unsigned char *dst,
+                        tinyexr::tinyexr_uint64 &compressedSize,
+                        const unsigned char *src, unsigned long src_size) {
+  std::vector<unsigned char> tmpBuf(src_size);
+
+  //
+  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
+  // ImfZipCompressor.cpp
+  //
+
+  //
+  // Reorder the pixel data.
+  //
+
+  const char *srcPtr = reinterpret_cast<const char *>(src);
+
+  {
+    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
+    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
+    const char *stop = srcPtr + src_size;
+
+    for (;;) {
+      if (srcPtr < stop)
+        *(t1++) = *(srcPtr++);
+      else
+        break;
+
+      if (srcPtr < stop)
+        *(t2++) = *(srcPtr++);
+      else
+        break;
+    }
+  }
+
+  //
+  // Predictor.
+  //
+
+  {
+    unsigned char *t = &tmpBuf.at(0) + 1;
+    unsigned char *stop = &tmpBuf.at(0) + src_size;
+    int p = t[-1];
+
+    while (t < stop) {
+      int d = int(t[0]) - p + (128 + 256);
+      p = t[0];
+      t[0] = static_cast<unsigned char>(d);
+      ++t;
+    }
+  }
+
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  //
+  // Compress the data using miniz
+  //
+
+  mz_ulong outSize = mz_compressBound(src_size);
+  int ret = mz_compress(
+      dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)),
+      src_size);
+  if (ret != MZ_OK) {
+    return false;
+  }
+
+  compressedSize = outSize;
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+  int outSize;
+  unsigned char* ret = stbi_zlib_compress(const_cast<unsigned char*>(&tmpBuf.at(0)), src_size, &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  memcpy(dst, ret, outSize);
+  free(ret);
+
+  compressedSize = outSize;
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  uint64_t dstSize = nanoz_compressBound(static_cast<uint64_t>(src_size));
+  int outSize{0};
+  unsigned char *ret = nanoz_compress(&tmpBuf.at(0), src_size, &outSize, /* quality */8);
+  if (!ret) {
+    return false;
+  }
+
+  memcpy(dst, ret, outSize);
+  free(ret);
+  
+  compressedSize = outSize;
+#else
+  uLong outSize = compressBound(static_cast<uLong>(src_size));
+  int ret = compress(dst, &outSize, static_cast<const Bytef *>(&tmpBuf.at(0)),
+                     src_size);
+  if (ret != Z_OK) {
+    return false;
+  }
+
+  compressedSize = outSize;
+#endif
+
+  // Use uncompressed data when compressed data is larger than uncompressed.
+  // (Issue 40)
+  if (compressedSize >= src_size) {
+    compressedSize = src_size;
+    memcpy(dst, src, src_size);
+  }
+
+  return true;
+}
+
+static bool DecompressZip(unsigned char *dst,
+                          unsigned long *uncompressed_size /* inout */,
+                          const unsigned char *src, unsigned long src_size) {
+  if ((*uncompressed_size) == src_size) {
+    // Data is not compressed(Issue 40).
+    memcpy(dst, src, src_size);
+    return true;
+  }
+  std::vector<unsigned char> tmpBuf(*uncompressed_size);
+
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  int ret =
+      mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
+  if (MZ_OK != ret) {
+    return false;
+  }
+#elif TINYEXR_USE_STB_ZLIB
+  int ret = stbi_zlib_decode_buffer(reinterpret_cast<char*>(&tmpBuf.at(0)),
+      *uncompressed_size, reinterpret_cast<const char*>(src), src_size);
+  if (ret < 0) {
+    return false;
+  }
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  uint64_t dest_size = (*uncompressed_size);
+  uint64_t uncomp_size{0};
+  nanoz_status_t ret =
+      nanoz_uncompress(src, src_size, dest_size, &tmpBuf.at(0), &uncomp_size);
+  if (NANOZ_SUCCESS != ret) {
+    return false;
+  }
+  if ((*uncompressed_size) != uncomp_size) {
+    return false;
+  }
+#else
+  int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
+  if (Z_OK != ret) {
+    return false;
+  }
+#endif
+
+  //
+  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
+  // ImfZipCompressor.cpp
+  //
+
+  // Predictor.
+  {
+    unsigned char *t = &tmpBuf.at(0) + 1;
+    unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size);
+
+    while (t < stop) {
+      int d = int(t[-1]) + int(t[0]) - 128;
+      t[0] = static_cast<unsigned char>(d);
+      ++t;
+    }
+  }
+
+  // Reorder the pixel data.
+  {
+    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
+    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
+                     (*uncompressed_size + 1) / 2;
+    char *s = reinterpret_cast<char *>(dst);
+    char *stop = s + (*uncompressed_size);
+
+    for (;;) {
+      if (s < stop)
+        *(s++) = *(t1++);
+      else
+        break;
+
+      if (s < stop)
+        *(s++) = *(t2++);
+      else
+        break;
+    }
+  }
+
+  return true;
+}
+
+// RLE code from OpenEXR --------------------------------------
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#if __has_warning("-Wextra-semi-stmt")
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#endif
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4204)  // nonstandard extension used : non-constant
+                                 // aggregate initializer (also supported by GNU
+                                 // C and C99, so no big deal)
+#pragma warning(disable : 4244)  // 'initializing': conversion from '__int64' to
+                                 // 'int', possible loss of data
+#pragma warning(disable : 4267)  // 'argument': conversion from '__int64' to
+                                 // 'int', possible loss of data
+#pragma warning(disable : 4996)  // 'strdup': The POSIX name for this item is
+                                 // deprecated. Instead, use the ISO C and C++
+                                 // conformant name: _strdup.
+#endif
+
+const int MIN_RUN_LENGTH = 3;
+const int MAX_RUN_LENGTH = 127;
+
+//
+// Compress an array of bytes, using run-length encoding,
+// and return the length of the compressed data.
+//
+
+static int rleCompress(int inLength, const char in[], signed char out[]) {
+  const char *inEnd = in + inLength;
+  const char *runStart = in;
+  const char *runEnd = in + 1;
+  signed char *outWrite = out;
+
+  while (runStart < inEnd) {
+    while (runEnd < inEnd && *runStart == *runEnd &&
+           runEnd - runStart - 1 < MAX_RUN_LENGTH) {
+      ++runEnd;
+    }
+
+    if (runEnd - runStart >= MIN_RUN_LENGTH) {
+      //
+      // Compressible run
+      //
+
+      *outWrite++ = static_cast<char>(runEnd - runStart) - 1;
+      *outWrite++ = *(reinterpret_cast<const signed char *>(runStart));
+      runStart = runEnd;
+    } else {
+      //
+      // Uncompressable run
+      //
+
+      while (runEnd < inEnd &&
+             ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) ||
+              (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) &&
+             runEnd - runStart < MAX_RUN_LENGTH) {
+        ++runEnd;
+      }
+
+      *outWrite++ = static_cast<char>(runStart - runEnd);
+
+      while (runStart < runEnd) {
+        *outWrite++ = *(reinterpret_cast<const signed char *>(runStart++));
+      }
+    }
+
+    ++runEnd;
+  }
+
+  return static_cast<int>(outWrite - out);
+}
+
+//
+// Uncompress an array of bytes compressed with rleCompress().
+// Returns the length of the uncompressed data, or 0 if the
+// length of the uncompressed data would be more than maxLength.
+//
+
+static int rleUncompress(int inLength, int maxLength, const signed char in[],
+                         char out[]) {
+  char *outStart = out;
+
+  while (inLength > 0) {
+    if (*in < 0) {
+      int count = -(static_cast<int>(*in++));
+      inLength -= count + 1;
+
+      // Fixes #116: Add bounds check to in buffer.
+      if ((0 > (maxLength -= count)) || (inLength < 0)) return 0;
+
+      memcpy(out, in, count);
+      out += count;
+      in += count;
+    } else {
+      int count = *in++;
+      inLength -= 2;
+
+      if ((0 > (maxLength -= count + 1)) || (inLength < 0)) return 0;
+
+      memset(out, *reinterpret_cast<const char *>(in), count + 1);
+      out += count + 1;
+
+      in++;
+    }
+  }
+
+  return static_cast<int>(out - outStart);
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+// End of RLE code from OpenEXR -----------------------------------
+
+static bool CompressRle(unsigned char *dst,
+                        tinyexr::tinyexr_uint64 &compressedSize,
+                        const unsigned char *src, unsigned long src_size) {
+  std::vector<unsigned char> tmpBuf(src_size);
+
+  //
+  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
+  // ImfRleCompressor.cpp
+  //
+
+  //
+  // Reorder the pixel data.
+  //
+
+  const char *srcPtr = reinterpret_cast<const char *>(src);
+
+  {
+    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
+    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
+    const char *stop = srcPtr + src_size;
+
+    for (;;) {
+      if (srcPtr < stop)
+        *(t1++) = *(srcPtr++);
+      else
+        break;
+
+      if (srcPtr < stop)
+        *(t2++) = *(srcPtr++);
+      else
+        break;
+    }
+  }
+
+  //
+  // Predictor.
+  //
+
+  {
+    unsigned char *t = &tmpBuf.at(0) + 1;
+    unsigned char *stop = &tmpBuf.at(0) + src_size;
+    int p = t[-1];
+
+    while (t < stop) {
+      int d = int(t[0]) - p + (128 + 256);
+      p = t[0];
+      t[0] = static_cast<unsigned char>(d);
+      ++t;
+    }
+  }
+
+  // outSize will be (srcSiz * 3) / 2 at max.
+  int outSize = rleCompress(static_cast<int>(src_size),
+                            reinterpret_cast<const char *>(&tmpBuf.at(0)),
+                            reinterpret_cast<signed char *>(dst));
+  TINYEXR_CHECK_AND_RETURN_C(outSize > 0, false);
+
+  compressedSize = static_cast<tinyexr::tinyexr_uint64>(outSize);
+
+  // Use uncompressed data when compressed data is larger than uncompressed.
+  // (Issue 40)
+  if (compressedSize >= src_size) {
+    compressedSize = src_size;
+    memcpy(dst, src, src_size);
+  }
+
+  return true;
+}
+
+static bool DecompressRle(unsigned char *dst,
+                          const unsigned long uncompressed_size,
+                          const unsigned char *src, unsigned long src_size) {
+  if (uncompressed_size == src_size) {
+    // Data is not compressed(Issue 40).
+    memcpy(dst, src, src_size);
+    return true;
+  }
+
+  // Workaround for issue #112.
+  // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`.
+  if (src_size <= 2) {
+    return false;
+  }
+
+  std::vector<unsigned char> tmpBuf(uncompressed_size);
+
+  int ret = rleUncompress(static_cast<int>(src_size),
+                          static_cast<int>(uncompressed_size),
+                          reinterpret_cast<const signed char *>(src),
+                          reinterpret_cast<char *>(&tmpBuf.at(0)));
+  if (ret != static_cast<int>(uncompressed_size)) {
+    return false;
+  }
+
+  //
+  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
+  // ImfRleCompressor.cpp
+  //
+
+  // Predictor.
+  {
+    unsigned char *t = &tmpBuf.at(0) + 1;
+    unsigned char *stop = &tmpBuf.at(0) + uncompressed_size;
+
+    while (t < stop) {
+      int d = int(t[-1]) + int(t[0]) - 128;
+      t[0] = static_cast<unsigned char>(d);
+      ++t;
+    }
+  }
+
+  // Reorder the pixel data.
+  {
+    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
+    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
+                     (uncompressed_size + 1) / 2;
+    char *s = reinterpret_cast<char *>(dst);
+    char *stop = s + uncompressed_size;
+
+    for (;;) {
+      if (s < stop)
+        *(s++) = *(t1++);
+      else
+        break;
+
+      if (s < stop)
+        *(s++) = *(t2++);
+      else
+        break;
+    }
+  }
+
+  return true;
+}
+
+#if TINYEXR_USE_PIZ
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++11-long-long"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wpadded"
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#pragma clang diagnostic ignored "-Wc++11-extensions"
+#pragma clang diagnostic ignored "-Wconversion"
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+
+#if __has_warning("-Wcast-qual")
+#pragma clang diagnostic ignored "-Wcast-qual"
+#endif
+
+#if __has_warning("-Wextra-semi-stmt")
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#endif
+
+#endif
+
+//
+// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp
+//
+// -----------------------------------------------------------------
+// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas
+// Digital Ltd. LLC)
+// (3 clause BSD license)
+//
+
+struct PIZChannelData {
+  unsigned short *start;
+  unsigned short *end;
+  int nx;
+  int ny;
+  int ys;
+  int size;
+};
+
+//-----------------------------------------------------------------------------
+//
+//  16-bit Haar Wavelet encoding and decoding
+//
+//  The source code in this file is derived from the encoding
+//  and decoding routines written by Christian Rouet for his
+//  PIZ image file format.
+//
+//-----------------------------------------------------------------------------
+
+//
+// Wavelet basis functions without modulo arithmetic; they produce
+// the best compression ratios when the wavelet-transformed data are
+// Huffman-encoded, but the wavelet transform works only for 14-bit
+// data (untransformed data values must be less than (1 << 14)).
+//
+
+inline void wenc14(unsigned short a, unsigned short b, unsigned short &l,
+                   unsigned short &h) {
+  short as = static_cast<short>(a);
+  short bs = static_cast<short>(b);
+
+  short ms = (as + bs) >> 1;
+  short ds = as - bs;
+
+  l = static_cast<unsigned short>(ms);
+  h = static_cast<unsigned short>(ds);
+}
+
+inline void wdec14(unsigned short l, unsigned short h, unsigned short &a,
+                   unsigned short &b) {
+  short ls = static_cast<short>(l);
+  short hs = static_cast<short>(h);
+
+  int hi = hs;
+  int ai = ls + (hi & 1) + (hi >> 1);
+
+  short as = static_cast<short>(ai);
+  short bs = static_cast<short>(ai - hi);
+
+  a = static_cast<unsigned short>(as);
+  b = static_cast<unsigned short>(bs);
+}
+
+//
+// Wavelet basis functions with modulo arithmetic; they work with full
+// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't
+// compress the data quite as well.
+//
+
+const int NBITS = 16;
+const int A_OFFSET = 1 << (NBITS - 1);
+const int M_OFFSET = 1 << (NBITS - 1);
+const int MOD_MASK = (1 << NBITS) - 1;
+
+inline void wenc16(unsigned short a, unsigned short b, unsigned short &l,
+                   unsigned short &h) {
+  int ao = (a + A_OFFSET) & MOD_MASK;
+  int m = ((ao + b) >> 1);
+  int d = ao - b;
+
+  if (d < 0) m = (m + M_OFFSET) & MOD_MASK;
+
+  d &= MOD_MASK;
+
+  l = static_cast<unsigned short>(m);
+  h = static_cast<unsigned short>(d);
+}
+
+inline void wdec16(unsigned short l, unsigned short h, unsigned short &a,
+                   unsigned short &b) {
+  int m = l;
+  int d = h;
+  int bb = (m - (d >> 1)) & MOD_MASK;
+  int aa = (d + bb - A_OFFSET) & MOD_MASK;
+  b = static_cast<unsigned short>(bb);
+  a = static_cast<unsigned short>(aa);
+}
+
+//
+// 2D Wavelet encoding:
+//
+
+static void wav2Encode(
+    unsigned short *in,  // io: values are transformed in place
+    int nx,              // i : x size
+    int ox,              // i : x offset
+    int ny,              // i : y size
+    int oy,              // i : y offset
+    unsigned short mx)   // i : maximum in[x][y] value
+{
+  bool w14 = (mx < (1 << 14));
+  int n = (nx > ny) ? ny : nx;
+  int p = 1;   // == 1 <<  level
+  int p2 = 2;  // == 1 << (level+1)
+
+  //
+  // Hierarchical loop on smaller dimension n
+  //
+
+  while (p2 <= n) {
+    unsigned short *py = in;
+    unsigned short *ey = in + oy * (ny - p2);
+    int oy1 = oy * p;
+    int oy2 = oy * p2;
+    int ox1 = ox * p;
+    int ox2 = ox * p2;
+    unsigned short i00, i01, i10, i11;
+
+    //
+    // Y loop
+    //
+
+    for (; py <= ey; py += oy2) {
+      unsigned short *px = py;
+      unsigned short *ex = py + ox * (nx - p2);
+
+      //
+      // X loop
+      //
+
+      for (; px <= ex; px += ox2) {
+        unsigned short *p01 = px + ox1;
+        unsigned short *p10 = px + oy1;
+        unsigned short *p11 = p10 + ox1;
+
+        //
+        // 2D wavelet encoding
+        //
+
+        if (w14) {
+          wenc14(*px, *p01, i00, i01);
+          wenc14(*p10, *p11, i10, i11);
+          wenc14(i00, i10, *px, *p10);
+          wenc14(i01, i11, *p01, *p11);
+        } else {
+          wenc16(*px, *p01, i00, i01);
+          wenc16(*p10, *p11, i10, i11);
+          wenc16(i00, i10, *px, *p10);
+          wenc16(i01, i11, *p01, *p11);
+        }
+      }
+
+      //
+      // Encode (1D) odd column (still in Y loop)
+      //
+
+      if (nx & p) {
+        unsigned short *p10 = px + oy1;
+
+        if (w14)
+          wenc14(*px, *p10, i00, *p10);
+        else
+          wenc16(*px, *p10, i00, *p10);
+
+        *px = i00;
+      }
+    }
+
+    //
+    // Encode (1D) odd line (must loop in X)
+    //
+
+    if (ny & p) {
+      unsigned short *px = py;
+      unsigned short *ex = py + ox * (nx - p2);
+
+      for (; px <= ex; px += ox2) {
+        unsigned short *p01 = px + ox1;
+
+        if (w14)
+          wenc14(*px, *p01, i00, *p01);
+        else
+          wenc16(*px, *p01, i00, *p01);
+
+        *px = i00;
+      }
+    }
+
+    //
+    // Next level
+    //
+
+    p = p2;
+    p2 <<= 1;
+  }
+}
+
+//
+// 2D Wavelet decoding:
+//
+
+static void wav2Decode(
+    unsigned short *in,  // io: values are transformed in place
+    int nx,              // i : x size
+    int ox,              // i : x offset
+    int ny,              // i : y size
+    int oy,              // i : y offset
+    unsigned short mx)   // i : maximum in[x][y] value
+{
+  bool w14 = (mx < (1 << 14));
+  int n = (nx > ny) ? ny : nx;
+  int p = 1;
+  int p2;
+
+  //
+  // Search max level
+  //
+
+  while (p <= n) p <<= 1;
+
+  p >>= 1;
+  p2 = p;
+  p >>= 1;
+
+  //
+  // Hierarchical loop on smaller dimension n
+  //
+
+  while (p >= 1) {
+    unsigned short *py = in;
+    unsigned short *ey = in + oy * (ny - p2);
+    int oy1 = oy * p;
+    int oy2 = oy * p2;
+    int ox1 = ox * p;
+    int ox2 = ox * p2;
+    unsigned short i00, i01, i10, i11;
+
+    //
+    // Y loop
+    //
+
+    for (; py <= ey; py += oy2) {
+      unsigned short *px = py;
+      unsigned short *ex = py + ox * (nx - p2);
+
+      //
+      // X loop
+      //
+
+      for (; px <= ex; px += ox2) {
+        unsigned short *p01 = px + ox1;
+        unsigned short *p10 = px + oy1;
+        unsigned short *p11 = p10 + ox1;
+
+        //
+        // 2D wavelet decoding
+        //
+
+        if (w14) {
+          wdec14(*px, *p10, i00, i10);
+          wdec14(*p01, *p11, i01, i11);
+          wdec14(i00, i01, *px, *p01);
+          wdec14(i10, i11, *p10, *p11);
+        } else {
+          wdec16(*px, *p10, i00, i10);
+          wdec16(*p01, *p11, i01, i11);
+          wdec16(i00, i01, *px, *p01);
+          wdec16(i10, i11, *p10, *p11);
+        }
+      }
+
+      //
+      // Decode (1D) odd column (still in Y loop)
+      //
+
+      if (nx & p) {
+        unsigned short *p10 = px + oy1;
+
+        if (w14)
+          wdec14(*px, *p10, i00, *p10);
+        else
+          wdec16(*px, *p10, i00, *p10);
+
+        *px = i00;
+      }
+    }
+
+    //
+    // Decode (1D) odd line (must loop in X)
+    //
+
+    if (ny & p) {
+      unsigned short *px = py;
+      unsigned short *ex = py + ox * (nx - p2);
+
+      for (; px <= ex; px += ox2) {
+        unsigned short *p01 = px + ox1;
+
+        if (w14)
+          wdec14(*px, *p01, i00, *p01);
+        else
+          wdec16(*px, *p01, i00, *p01);
+
+        *px = i00;
+      }
+    }
+
+    //
+    // Next level
+    //
+
+    p2 = p;
+    p >>= 1;
+  }
+}
+
+//-----------------------------------------------------------------------------
+//
+//  16-bit Huffman compression and decompression.
+//
+//  The source code in this file is derived from the 8-bit
+//  Huffman compression and decompression routines written
+//  by Christian Rouet for his PIZ image file format.
+//
+//-----------------------------------------------------------------------------
+
+// Adds some modification for tinyexr.
+
+const int HUF_ENCBITS = 16;  // literal (value) bit length
+const int HUF_DECBITS = 14;  // decoding bit size (>= 8)
+
+const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1;  // encoding table size
+const int HUF_DECSIZE = 1 << HUF_DECBITS;        // decoding table size
+const int HUF_DECMASK = HUF_DECSIZE - 1;
+
+struct HufDec {  // short code    long code
+  //-------------------------------
+  unsigned int len : 8;   // code length    0
+  unsigned int lit : 24;  // lit      p size
+  unsigned int *p;        // 0      lits
+};
+
+inline long long hufLength(long long code) { return code & 63; }
+
+inline long long hufCode(long long code) { return code >> 6; }
+
+inline void outputBits(int nBits, long long bits, long long &c, int &lc,
+                       char *&out) {
+  c <<= nBits;
+  lc += nBits;
+
+  c |= bits;
+
+  while (lc >= 8) *out++ = static_cast<char>((c >> (lc -= 8)));
+}
+
+inline long long getBits(int nBits, long long &c, int &lc, const char *&in) {
+  while (lc < nBits) {
+    c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
+    lc += 8;
+  }
+
+  lc -= nBits;
+  return (c >> lc) & ((1 << nBits) - 1);
+}
+
+//
+// ENCODING TABLE BUILDING & (UN)PACKING
+//
+
+//
+// Build a "canonical" Huffman code table:
+//  - for each (uncompressed) symbol, hcode contains the length
+//    of the corresponding code (in the compressed data)
+//  - canonical codes are computed and stored in hcode
+//  - the rules for constructing canonical codes are as follows:
+//    * shorter codes (if filled with zeroes to the right)
+//      have a numerically higher value than longer codes
+//    * for codes with the same length, numerical values
+//      increase with numerical symbol values
+//  - because the canonical code table can be constructed from
+//    symbol lengths alone, the code table can be transmitted
+//    without sending the actual code values
+//  - see http://www.compressconsult.com/huffman/
+//
+
+static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) {
+  long long n[59];
+
+  //
+  // For each i from 0 through 58, count the
+  // number of different codes of length i, and
+  // store the count in n[i].
+  //
+
+  for (int i = 0; i <= 58; ++i) n[i] = 0;
+
+  for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1;
+
+  //
+  // For each i from 58 through 1, compute the
+  // numerically lowest code with length i, and
+  // store that code in n[i].
+  //
+
+  long long c = 0;
+
+  for (int i = 58; i > 0; --i) {
+    long long nc = ((c + n[i]) >> 1);
+    n[i] = c;
+    c = nc;
+  }
+
+  //
+  // hcode[i] contains the length, l, of the
+  // code for symbol i.  Assign the next available
+  // code of length l to the symbol and store both
+  // l and the code in hcode[i].
+  //
+
+  for (int i = 0; i < HUF_ENCSIZE; ++i) {
+    int l = static_cast<int>(hcode[i]);
+
+    if (l > 0) hcode[i] = l | (n[l]++ << 6);
+  }
+}
+
+//
+// Compute Huffman codes (based on frq input) and store them in frq:
+//  - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
+//  - max code length is 58 bits;
+//  - codes outside the range [im-iM] have a null length (unused values);
+//  - original frequencies are destroyed;
+//  - encoding tables are used by hufEncode() and hufBuildDecTable();
+//
+
+struct FHeapCompare {
+  bool operator()(long long *a, long long *b) { return *a > *b; }
+};
+
+static bool hufBuildEncTable(
+    long long *frq,  // io: input frequencies [HUF_ENCSIZE], output table
+    int *im,         //  o: min frq index
+    int *iM)         //  o: max frq index
+{
+  //
+  // This function assumes that when it is called, array frq
+  // indicates the frequency of all possible symbols in the data
+  // that are to be Huffman-encoded.  (frq[i] contains the number
+  // of occurrences of symbol i in the data.)
+  //
+  // The loop below does three things:
+  //
+  // 1) Finds the minimum and maximum indices that point
+  //    to non-zero entries in frq:
+  //
+  //     frq[im] != 0, and frq[i] == 0 for all i < im
+  //     frq[iM] != 0, and frq[i] == 0 for all i > iM
+  //
+  // 2) Fills array fHeap with pointers to all non-zero
+  //    entries in frq.
+  //
+  // 3) Initializes array hlink such that hlink[i] == i
+  //    for all array entries.
+  //
+
+  std::vector<int> hlink(HUF_ENCSIZE);
+  std::vector<long long *> fHeap(HUF_ENCSIZE);
+
+  *im = 0;
+
+  while (!frq[*im]) (*im)++;
+
+  int nf = 0;
+
+  for (int i = *im; i < HUF_ENCSIZE; i++) {
+    hlink[i] = i;
+
+    if (frq[i]) {
+      fHeap[nf] = &frq[i];
+      nf++;
+      *iM = i;
+    }
+  }
+
+  //
+  // Add a pseudo-symbol, with a frequency count of 1, to frq;
+  // adjust the fHeap and hlink array accordingly.  Function
+  // hufEncode() uses the pseudo-symbol for run-length encoding.
+  //
+
+  (*iM)++;
+  frq[*iM] = 1;
+  fHeap[nf] = &frq[*iM];
+  nf++;
+
+  //
+  // Build an array, scode, such that scode[i] contains the number
+  // of bits assigned to symbol i.  Conceptually this is done by
+  // constructing a tree whose leaves are the symbols with non-zero
+  // frequency:
+  //
+  //     Make a heap that contains all symbols with a non-zero frequency,
+  //     with the least frequent symbol on top.
+  //
+  //     Repeat until only one symbol is left on the heap:
+  //
+  //         Take the two least frequent symbols off the top of the heap.
+  //         Create a new node that has first two nodes as children, and
+  //         whose frequency is the sum of the frequencies of the first
+  //         two nodes.  Put the new node back into the heap.
+  //
+  // The last node left on the heap is the root of the tree.  For each
+  // leaf node, the distance between the root and the leaf is the length
+  // of the code for the corresponding symbol.
+  //
+  // The loop below doesn't actually build the tree; instead we compute
+  // the distances of the leaves from the root on the fly.  When a new
+  // node is added to the heap, then that node's descendants are linked
+  // into a single linear list that starts at the new node, and the code
+  // lengths of the descendants (that is, their distance from the root
+  // of the tree) are incremented by one.
+  //
+
+  std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
+
+  std::vector<long long> scode(HUF_ENCSIZE);
+  memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE);
+
+  while (nf > 1) {
+    //
+    // Find the indices, mm and m, of the two smallest non-zero frq
+    // values in fHeap, add the smallest frq to the second-smallest
+    // frq, and remove the smallest frq value from fHeap.
+    //
+
+    int mm = fHeap[0] - frq;
+    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
+    --nf;
+
+    int m = fHeap[0] - frq;
+    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
+
+    frq[m] += frq[mm];
+    std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
+
+    //
+    // The entries in scode are linked into lists with the
+    // entries in hlink serving as "next" pointers and with
+    // the end of a list marked by hlink[j] == j.
+    //
+    // Traverse the lists that start at scode[m] and scode[mm].
+    // For each element visited, increment the length of the
+    // corresponding code by one bit. (If we visit scode[j]
+    // during the traversal, then the code for symbol j becomes
+    // one bit longer.)
+    //
+    // Merge the lists that start at scode[m] and scode[mm]
+    // into a single list that starts at scode[m].
+    //
+
+    //
+    // Add a bit to all codes in the first list.
+    //
+
+    for (int j = m;; j = hlink[j]) {
+      scode[j]++;
+
+      TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false);
+
+      if (hlink[j] == j) {
+        //
+        // Merge the two lists.
+        //
+
+        hlink[j] = mm;
+        break;
+      }
+    }
+
+    //
+    // Add a bit to all codes in the second list
+    //
+
+    for (int j = mm;; j = hlink[j]) {
+      scode[j]++;
+
+      TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false);
+
+      if (hlink[j] == j) break;
+    }
+  }
+
+  //
+  // Build a canonical Huffman code table, replacing the code
+  // lengths in scode with (code, code length) pairs.  Copy the
+  // code table from scode into frq.
+  //
+
+  hufCanonicalCodeTable(scode.data());
+  memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE);
+
+  return true;
+}
+
+//
+// Pack an encoding table:
+//  - only code lengths, not actual codes, are stored
+//  - runs of zeroes are compressed as follows:
+//
+//    unpacked    packed
+//    --------------------------------
+//    1 zero    0  (6 bits)
+//    2 zeroes    59
+//    3 zeroes    60
+//    4 zeroes    61
+//    5 zeroes    62
+//    n zeroes (6 or more)  63 n-6  (6 + 8 bits)
+//
+
+const int SHORT_ZEROCODE_RUN = 59;
+const int LONG_ZEROCODE_RUN = 63;
+const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
+const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN;
+
+static void hufPackEncTable(
+    const long long *hcode,  // i : encoding table [HUF_ENCSIZE]
+    int im,                  // i : min hcode index
+    int iM,                  // i : max hcode index
+    char **pcode)            //  o: ptr to packed table (updated)
+{
+  char *p = *pcode;
+  long long c = 0;
+  int lc = 0;
+
+  for (; im <= iM; im++) {
+    int l = hufLength(hcode[im]);
+
+    if (l == 0) {
+      int zerun = 1;
+
+      while ((im < iM) && (zerun < LONGEST_LONG_RUN)) {
+        if (hufLength(hcode[im + 1]) > 0) break;
+        im++;
+        zerun++;
+      }
+
+      if (zerun >= 2) {
+        if (zerun >= SHORTEST_LONG_RUN) {
+          outputBits(6, LONG_ZEROCODE_RUN, c, lc, p);
+          outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p);
+        } else {
+          outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p);
+        }
+        continue;
+      }
+    }
+
+    outputBits(6, l, c, lc, p);
+  }
+
+  if (lc > 0) *p++ = (unsigned char)(c << (8 - lc));
+
+  *pcode = p;
+}
+
+//
+// Unpack an encoding table packed by hufPackEncTable():
+//
+
+static bool hufUnpackEncTable(
+    const char **pcode,  // io: ptr to packed table (updated)
+    int ni,              // i : input size (in bytes)
+    int im,              // i : min hcode index
+    int iM,              // i : max hcode index
+    long long *hcode)    //  o: encoding table [HUF_ENCSIZE]
+{
+  memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE);
+
+  const char *p = *pcode;
+  long long c = 0;
+  int lc = 0;
+
+  for (; im <= iM; im++) {
+    if (p - *pcode >= ni) {
+      return false;
+    }
+
+    long long l = hcode[im] = getBits(6, c, lc, p);  // code length
+
+    if (l == (long long)LONG_ZEROCODE_RUN) {
+      if (p - *pcode > ni) {
+        return false;
+      }
+
+      int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN;
+
+      if (im + zerun > iM + 1) {
+        return false;
+      }
+
+      while (zerun--) hcode[im++] = 0;
+
+      im--;
+    } else if (l >= (long long)SHORT_ZEROCODE_RUN) {
+      int zerun = l - SHORT_ZEROCODE_RUN + 2;
+
+      if (im + zerun > iM + 1) {
+        return false;
+      }
+
+      while (zerun--) hcode[im++] = 0;
+
+      im--;
+    }
+  }
+
+  *pcode = const_cast<char *>(p);
+
+  hufCanonicalCodeTable(hcode);
+
+  return true;
+}
+
+//
+// DECODING TABLE BUILDING
+//
+
+//
+// Clear a newly allocated decoding table so that it contains only zeroes.
+//
+
+static void hufClearDecTable(HufDec *hdecod)  // io: (allocated by caller)
+//     decoding table [HUF_DECSIZE]
+{
+  for (int i = 0; i < HUF_DECSIZE; i++) {
+    hdecod[i].len = 0;
+    hdecod[i].lit = 0;
+    hdecod[i].p = NULL;
+  }
+  // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE);
+}
+
+//
+// Build a decoding hash table based on the encoding table hcode:
+//  - short codes (<= HUF_DECBITS) are resolved with a single table access;
+//  - long code entry allocations are not optimized, because long codes are
+//    unfrequent;
+//  - decoding tables are used by hufDecode();
+//
+
+static bool hufBuildDecTable(const long long *hcode,  // i : encoding table
+                             int im,                  // i : min index in hcode
+                             int iM,                  // i : max index in hcode
+                             HufDec *hdecod)  //  o: (allocated by caller)
+//     decoding table [HUF_DECSIZE]
+{
+  //
+  // Init hashtable & loop on all codes.
+  // Assumes that hufClearDecTable(hdecod) has already been called.
+  //
+
+  for (; im <= iM; im++) {
+    long long c = hufCode(hcode[im]);
+    int l = hufLength(hcode[im]);
+
+    if (c >> l) {
+      //
+      // Error: c is supposed to be an l-bit code,
+      // but c contains a value that is greater
+      // than the largest l-bit number.
+      //
+
+      // invalidTableEntry();
+      return false;
+    }
+
+    if (l > HUF_DECBITS) {
+      //
+      // Long code: add a secondary entry
+      //
+
+      HufDec *pl = hdecod + (c >> (l - HUF_DECBITS));
+
+      if (pl->len) {
+        //
+        // Error: a short code has already
+        // been stored in table entry *pl.
+        //
+
+        // invalidTableEntry();
+        return false;
+      }
+
+      pl->lit++;
+
+      if (pl->p) {
+        unsigned int *p = pl->p;
+        pl->p = new unsigned int[pl->lit];
+
+        for (unsigned int i = 0; i < pl->lit - 1u; ++i) pl->p[i] = p[i];
+
+        delete[] p;
+      } else {
+        pl->p = new unsigned int[1];
+      }
+
+      pl->p[pl->lit - 1] = im;
+    } else if (l) {
+      //
+      // Short code: init all primary entries
+      //
+
+      HufDec *pl = hdecod + (c << (HUF_DECBITS - l));
+
+      for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) {
+        if (pl->len || pl->p) {
+          //
+          // Error: a short code or a long code has
+          // already been stored in table entry *pl.
+          //
+
+          // invalidTableEntry();
+          return false;
+        }
+
+        pl->len = l;
+        pl->lit = im;
+      }
+    }
+  }
+
+  return true;
+}
+
+//
+// Free the long code entries of a decoding table built by hufBuildDecTable()
+//
+
+static void hufFreeDecTable(HufDec *hdecod)  // io: Decoding table
+{
+  for (int i = 0; i < HUF_DECSIZE; i++) {
+    if (hdecod[i].p) {
+      delete[] hdecod[i].p;
+      hdecod[i].p = 0;
+    }
+  }
+}
+
+//
+// ENCODING
+//
+
+inline void outputCode(long long code, long long &c, int &lc, char *&out) {
+  outputBits(hufLength(code), hufCode(code), c, lc, out);
+}
+
+inline void sendCode(long long sCode, int runCount, long long runCode,
+                     long long &c, int &lc, char *&out) {
+  //
+  // Output a run of runCount instances of the symbol sCount.
+  // Output the symbols explicitly, or if that is shorter, output
+  // the sCode symbol once followed by a runCode symbol and runCount
+  // expressed as an 8-bit number.
+  //
+
+  if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) {
+    outputCode(sCode, c, lc, out);
+    outputCode(runCode, c, lc, out);
+    outputBits(8, runCount, c, lc, out);
+  } else {
+    while (runCount-- >= 0) outputCode(sCode, c, lc, out);
+  }
+}
+
+//
+// Encode (compress) ni values based on the Huffman encoding table hcode:
+//
+
+static int hufEncode            // return: output size (in bits)
+    (const long long *hcode,    // i : encoding table
+     const unsigned short *in,  // i : uncompressed input buffer
+     const int ni,              // i : input buffer size (in bytes)
+     int rlc,                   // i : rl code
+     char *out)                 //  o: compressed output buffer
+{
+  char *outStart = out;
+  long long c = 0;  // bits not yet written to out
+  int lc = 0;       // number of valid bits in c (LSB)
+  int s = in[0];
+  int cs = 0;
+
+  //
+  // Loop on input values
+  //
+
+  for (int i = 1; i < ni; i++) {
+    //
+    // Count same values or send code
+    //
+
+    if (s == in[i] && cs < 255) {
+      cs++;
+    } else {
+      sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
+      cs = 0;
+    }
+
+    s = in[i];
+  }
+
+  //
+  // Send remaining code
+  //
+
+  sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
+
+  if (lc) *out = (c << (8 - lc)) & 0xff;
+
+  return (out - outStart) * 8 + lc;
+}
+
+//
+// DECODING
+//
+
+//
+// In order to force the compiler to inline them,
+// getChar() and getCode() are implemented as macros
+// instead of "inline" functions.
+//
+
+#define getChar(c, lc, in)                   \
+  {                                          \
+    c = (c << 8) | *(unsigned char *)(in++); \
+    lc += 8;                                 \
+  }
+
+#if 0
+#define getCode(po, rlc, c, lc, in, out, ob, oe) \
+  {                                              \
+    if (po == rlc) {                             \
+      if (lc < 8) getChar(c, lc, in);            \
+                                                 \
+      lc -= 8;                                   \
+                                                 \
+      unsigned char cs = (c >> lc);              \
+                                                 \
+      if (out + cs > oe) return false;           \
+                                                 \
+      /* TinyEXR issue 78 */                     \
+      unsigned short s = out[-1];                \
+                                                 \
+      while (cs-- > 0) *out++ = s;               \
+    } else if (out < oe) {                       \
+      *out++ = po;                               \
+    } else {                                     \
+      return false;                              \
+    }                                            \
+  }
+#else
+static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in,
+                    const char *in_end, unsigned short *&out,
+                    const unsigned short *ob, const unsigned short *oe) {
+  (void)ob;
+  if (po == rlc) {
+    if (lc < 8) {
+      /* TinyEXR issue 78 */
+      /* TinyEXR issue 160. in + 1 -> in */
+      if (in >= in_end) {
+        return false;
+      }
+
+      getChar(c, lc, in);
+    }
+
+    lc -= 8;
+
+    unsigned char cs = (c >> lc);
+
+    if (out + cs > oe) return false;
+
+    // Bounds check for safety
+    // Issue 100.
+    if ((out - 1) < ob) return false;
+    unsigned short s = out[-1];
+
+    while (cs-- > 0) *out++ = s;
+  } else if (out < oe) {
+    *out++ = po;
+  } else {
+    return false;
+  }
+  return true;
+}
+#endif
+
+//
+// Decode (uncompress) ni bits based on encoding & decoding tables:
+//
+
+static bool hufDecode(const long long *hcode,  // i : encoding table
+                      const HufDec *hdecod,    // i : decoding table
+                      const char *in,          // i : compressed input buffer
+                      int ni,                  // i : input size (in bits)
+                      int rlc,                 // i : run-length code
+                      int no,  // i : expected output size (in bytes)
+                      unsigned short *out)  //  o: uncompressed output buffer
+{
+  long long c = 0;
+  int lc = 0;
+  unsigned short *outb = out;          // begin
+  unsigned short *oe = out + no;       // end
+  const char *ie = in + (ni + 7) / 8;  // input byte size
+
+  //
+  // Loop on input bytes
+  //
+
+  while (in < ie) {
+    getChar(c, lc, in);
+
+    //
+    // Access decoding table
+    //
+
+    while (lc >= HUF_DECBITS) {
+      const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK];
+
+      if (pl.len) {
+        //
+        // Get short code
+        //
+
+        lc -= pl.len;
+        // std::cout << "lit = " << pl.lit << std::endl;
+        // std::cout << "rlc = " << rlc << std::endl;
+        // std::cout << "c = " << c << std::endl;
+        // std::cout << "lc = " << lc << std::endl;
+        // std::cout << "in = " << in << std::endl;
+        // std::cout << "out = " << out << std::endl;
+        // std::cout << "oe = " << oe << std::endl;
+        if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) {
+          return false;
+        }
+      } else {
+        if (!pl.p) {
+          return false;
+        }
+        // invalidCode(); // wrong code
+
+        //
+        // Search long code
+        //
+
+        unsigned int j;
+
+        for (j = 0; j < pl.lit; j++) {
+          int l = hufLength(hcode[pl.p[j]]);
+
+          while (lc < l && in < ie)  // get more bits
+            getChar(c, lc, in);
+
+          if (lc >= l) {
+            if (hufCode(hcode[pl.p[j]]) ==
+                ((c >> (lc - l)) & (((long long)(1) << l) - 1))) {
+              //
+              // Found : get long code
+              //
+
+              lc -= l;
+              if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) {
+                return false;
+              }
+              break;
+            }
+          }
+        }
+
+        if (j == pl.lit) {
+          return false;
+          // invalidCode(); // Not found
+        }
+      }
+    }
+  }
+
+  //
+  // Get remaining (short) codes
+  //
+
+  int i = (8 - ni) & 7;
+  c >>= i;
+  lc -= i;
+
+  while (lc > 0) {
+    const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
+
+    if (pl.len) {
+      lc -= pl.len;
+      if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) {
+        return false;
+      }
+    } else {
+      return false;
+      // invalidCode(); // wrong (long) code
+    }
+  }
+
+  if (out - outb != no) {
+    return false;
+  }
+  // notEnoughData ();
+
+  return true;
+}
+
+static void countFrequencies(std::vector<long long> &freq,
+                             const unsigned short data[/*n*/], int n) {
+  for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0;
+
+  for (int i = 0; i < n; ++i) ++freq[data[i]];
+}
+
+static void writeUInt(char buf[4], unsigned int i) {
+  unsigned char *b = (unsigned char *)buf;
+
+  b[0] = i;
+  b[1] = i >> 8;
+  b[2] = i >> 16;
+  b[3] = i >> 24;
+}
+
+static unsigned int readUInt(const char buf[4]) {
+  const unsigned char *b = (const unsigned char *)buf;
+
+  return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) |
+         ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000);
+}
+
+//
+// EXTERNAL INTERFACE
+//
+
+static int hufCompress(const unsigned short raw[], int nRaw,
+                       char compressed[]) {
+  if (nRaw == 0) return 0;
+
+  std::vector<long long> freq(HUF_ENCSIZE);
+
+  countFrequencies(freq, raw, nRaw);
+
+  int im = 0;
+  int iM = 0;
+  hufBuildEncTable(freq.data(), &im, &iM);
+
+  char *tableStart = compressed + 20;
+  char *tableEnd = tableStart;
+  hufPackEncTable(freq.data(), im, iM, &tableEnd);
+  int tableLength = tableEnd - tableStart;
+
+  char *dataStart = tableEnd;
+  int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart);
+  int data_length = (nBits + 7) / 8;
+
+  writeUInt(compressed, im);
+  writeUInt(compressed + 4, iM);
+  writeUInt(compressed + 8, tableLength);
+  writeUInt(compressed + 12, nBits);
+  writeUInt(compressed + 16, 0);  // room for future extensions
+
+  return dataStart + data_length - compressed;
+}
+
+static bool hufUncompress(const char compressed[], int nCompressed,
+                          std::vector<unsigned short> *raw) {
+  if (nCompressed == 0) {
+    if (raw->size() != 0) return false;
+
+    return false;
+  }
+
+  int im = readUInt(compressed);
+  int iM = readUInt(compressed + 4);
+  // int tableLength = readUInt (compressed + 8);
+  int nBits = readUInt(compressed + 12);
+
+  if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false;
+
+  const char *ptr = compressed + 20;
+
+  //
+  // Fast decoder needs at least 2x64-bits of compressed data, and
+  // needs to be run-able on this platform. Otherwise, fall back
+  // to the original decoder
+  //
+
+  // if (FastHufDecoder::enabled() && nBits > 128)
+  //{
+  //    FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM);
+  //    fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw);
+  //}
+  // else
+  {
+    std::vector<long long> freq(HUF_ENCSIZE);
+    std::vector<HufDec> hdec(HUF_DECSIZE);
+
+    hufClearDecTable(&hdec.at(0));
+
+    hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM,
+                      &freq.at(0));
+
+    {
+      if (nBits > 8 * (nCompressed - (ptr - compressed))) {
+        return false;
+      }
+
+      hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0));
+      hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(),
+                raw->data());
+    }
+    // catch (...)
+    //{
+    //    hufFreeDecTable (hdec);
+    //    throw;
+    //}
+
+    hufFreeDecTable(&hdec.at(0));
+  }
+
+  return true;
+}
+
+//
+// Functions to compress the range of values in the pixel data
+//
+
+const int USHORT_RANGE = (1 << 16);
+const int BITMAP_SIZE = (USHORT_RANGE >> 3);
+
+static void bitmapFromData(const unsigned short data[/*nData*/], int nData,
+                           unsigned char bitmap[BITMAP_SIZE],
+                           unsigned short &minNonZero,
+                           unsigned short &maxNonZero) {
+  for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0;
+
+  for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7));
+
+  bitmap[0] &= ~1;  // zero is not explicitly stored in
+                    // the bitmap; we assume that the
+                    // data always contain zeroes
+  minNonZero = BITMAP_SIZE - 1;
+  maxNonZero = 0;
+
+  for (int i = 0; i < BITMAP_SIZE; ++i) {
+    if (bitmap[i]) {
+      if (minNonZero > i) minNonZero = i;
+      if (maxNonZero < i) maxNonZero = i;
+    }
+  }
+}
+
+static unsigned short forwardLutFromBitmap(
+    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
+  int k = 0;
+
+  for (int i = 0; i < USHORT_RANGE; ++i) {
+    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7))))
+      lut[i] = k++;
+    else
+      lut[i] = 0;
+  }
+
+  return k - 1;  // maximum value stored in lut[],
+}  // i.e. number of ones in bitmap minus 1
+
+static unsigned short reverseLutFromBitmap(
+    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
+  int k = 0;
+
+  for (int i = 0; i < USHORT_RANGE; ++i) {
+    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i;
+  }
+
+  int n = k - 1;
+
+  while (k < USHORT_RANGE) lut[k++] = 0;
+
+  return n;  // maximum k where lut[k] is non-zero,
+}  // i.e. number of ones in bitmap minus 1
+
+static void applyLut(const unsigned short lut[USHORT_RANGE],
+                     unsigned short data[/*nData*/], int nData) {
+  for (int i = 0; i < nData; ++i) data[i] = lut[data[i]];
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif  // __clang__
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize,
+                        const unsigned char *inPtr, size_t inSize,
+                        const std::vector<ChannelInfo> &channelInfo,
+                        int data_width, int num_lines) {
+  std::vector<unsigned char> bitmap(BITMAP_SIZE);
+  unsigned short minNonZero;
+  unsigned short maxNonZero;
+
+#if !TINYEXR_LITTLE_ENDIAN
+  // @todo { PIZ compression on BigEndian architecture. }
+  return false;
+#endif
+
+  // Assume `inSize` is multiple of 2 or 4.
+  std::vector<unsigned short> tmpBuffer(inSize / sizeof(unsigned short));
+
+  std::vector<PIZChannelData> channelData(channelInfo.size());
+  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
+
+  for (size_t c = 0; c < channelData.size(); c++) {
+    PIZChannelData &cd = channelData[c];
+
+    cd.start = tmpBufferEnd;
+    cd.end = cd.start;
+
+    cd.nx = data_width;
+    cd.ny = num_lines;
+    // cd.ys = c.channel().ySampling;
+
+    size_t pixelSize = sizeof(int);  // UINT and FLOAT
+    if (channelInfo[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pixelSize = sizeof(short);
+    }
+
+    cd.size = static_cast<int>(pixelSize / sizeof(short));
+
+    tmpBufferEnd += cd.nx * cd.ny * cd.size;
+  }
+
+  const unsigned char *ptr = inPtr;
+  for (int y = 0; y < num_lines; ++y) {
+    for (size_t i = 0; i < channelData.size(); ++i) {
+      PIZChannelData &cd = channelData[i];
+
+      // if (modp (y, cd.ys) != 0)
+      //    continue;
+
+      size_t n = static_cast<size_t>(cd.nx * cd.size);
+      memcpy(cd.end, ptr, n * sizeof(unsigned short));
+      ptr += n * sizeof(unsigned short);
+      cd.end += n;
+    }
+  }
+
+  bitmapFromData(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()),
+                 bitmap.data(), minNonZero, maxNonZero);
+
+  std::vector<unsigned short> lut(USHORT_RANGE);
+  unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data());
+  applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()));
+
+  //
+  // Store range compression info in _outBuffer
+  //
+
+  char *buf = reinterpret_cast<char *>(outPtr);
+
+  memcpy(buf, &minNonZero, sizeof(unsigned short));
+  buf += sizeof(unsigned short);
+  memcpy(buf, &maxNonZero, sizeof(unsigned short));
+  buf += sizeof(unsigned short);
+
+  if (minNonZero <= maxNonZero) {
+    memcpy(buf, reinterpret_cast<char *>(&bitmap[0] + minNonZero),
+           maxNonZero - minNonZero + 1);
+    buf += maxNonZero - minNonZero + 1;
+  }
+
+  //
+  // Apply wavelet encoding
+  //
+
+  for (size_t i = 0; i < channelData.size(); ++i) {
+    PIZChannelData &cd = channelData[i];
+
+    for (int j = 0; j < cd.size; ++j) {
+      wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
+                 maxValue);
+    }
+  }
+
+  //
+  // Apply Huffman encoding; append the result to _outBuffer
+  //
+
+  // length header(4byte), then huff data. Initialize length header with zero,
+  // then later fill it by `length`.
+  char *lengthPtr = buf;
+  int zero = 0;
+  memcpy(buf, &zero, sizeof(int));
+  buf += sizeof(int);
+
+  int length =
+      hufCompress(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), buf);
+  memcpy(lengthPtr, &length, sizeof(int));
+
+  (*outSize) = static_cast<unsigned int>(
+      (reinterpret_cast<unsigned char *>(buf) - outPtr) +
+      static_cast<unsigned int>(length));
+
+  // Use uncompressed data when compressed data is larger than uncompressed.
+  // (Issue 40)
+  if ((*outSize) >= inSize) {
+    (*outSize) = static_cast<unsigned int>(inSize);
+    memcpy(outPtr, inPtr, inSize);
+  }
+  return true;
+}
+
+static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr,
+                          size_t tmpBufSizeInBytes, size_t inLen, int num_channels,
+                          const EXRChannelInfo *channels, int data_width,
+                          int num_lines) {
+  if (inLen == tmpBufSizeInBytes) {
+    // Data is not compressed(Issue 40).
+    memcpy(outPtr, inPtr, inLen);
+    return true;
+  }
+
+  std::vector<unsigned char> bitmap(BITMAP_SIZE);
+  unsigned short minNonZero;
+  unsigned short maxNonZero;
+
+#if !TINYEXR_LITTLE_ENDIAN
+  // @todo { PIZ compression on BigEndian architecture. }
+  return false;
+#endif
+
+  memset(bitmap.data(), 0, BITMAP_SIZE);
+
+  if (inLen < 4) {
+    return false;
+  }
+
+  size_t readLen = 0;
+
+  const unsigned char *ptr = inPtr;
+  // minNonZero = *(reinterpret_cast<const unsigned short *>(ptr));
+  tinyexr::cpy2(&minNonZero, reinterpret_cast<const unsigned short *>(ptr));
+  // maxNonZero = *(reinterpret_cast<const unsigned short *>(ptr + 2));
+  tinyexr::cpy2(&maxNonZero, reinterpret_cast<const unsigned short *>(ptr + 2));
+  ptr += 4;
+  readLen += 4;
+
+  if (maxNonZero >= BITMAP_SIZE) {
+    return false;
+  }
+
+  //printf("maxNonZero = %d\n", maxNonZero);
+  //printf("minNonZero = %d\n", minNonZero);
+  //printf("len = %d\n", (maxNonZero - minNonZero + 1));
+  //printf("BITMAPSIZE - min = %d\n", (BITMAP_SIZE - minNonZero));
+
+  if (minNonZero <= maxNonZero) {
+    if (((maxNonZero - minNonZero + 1) + readLen) > inLen) {
+      // Input too short
+      return false;
+    }
+
+    memcpy(reinterpret_cast<char *>(&bitmap[0] + minNonZero), ptr,
+           maxNonZero - minNonZero + 1);
+    ptr += maxNonZero - minNonZero + 1;
+    readLen += maxNonZero - minNonZero + 1;
+  } else {
+    // Issue 194
+    if ((minNonZero == (BITMAP_SIZE - 1)) && (maxNonZero == 0)) {
+      // OK. all pixels are zero. And no need to read `bitmap` data.
+    } else {
+      // invalid minNonZero/maxNonZero combination.
+      return false;
+    }
+  }
+
+  std::vector<unsigned short> lut(USHORT_RANGE);
+  memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE);
+  unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data());
+
+  //
+  // Huffman decoding
+  //
+
+  if ((readLen + 4) > inLen) {
+    return false;
+  }
+
+  int length=0;
+
+  // length = *(reinterpret_cast<const int *>(ptr));
+  tinyexr::cpy4(&length, reinterpret_cast<const int *>(ptr));
+  ptr += sizeof(int);
+
+  if (size_t((ptr - inPtr) + length) > inLen) {
+    return false;
+  }
+
+  std::vector<unsigned short> tmpBuffer(tmpBufSizeInBytes / sizeof(unsigned short));
+  hufUncompress(reinterpret_cast<const char *>(ptr), length, &tmpBuffer);
+
+  //
+  // Wavelet decoding
+  //
+
+  std::vector<PIZChannelData> channelData(static_cast<size_t>(num_channels));
+
+  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
+
+  for (size_t i = 0; i < static_cast<size_t>(num_channels); ++i) {
+    const EXRChannelInfo &chan = channels[i];
+
+    size_t pixelSize = sizeof(int);  // UINT and FLOAT
+    if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pixelSize = sizeof(short);
+    }
+
+    channelData[i].start = tmpBufferEnd;
+    channelData[i].end = channelData[i].start;
+    channelData[i].nx = data_width;
+    channelData[i].ny = num_lines;
+    // channelData[i].ys = 1;
+    channelData[i].size = static_cast<int>(pixelSize / sizeof(short));
+
+    tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size;
+  }
+
+  for (size_t i = 0; i < channelData.size(); ++i) {
+    PIZChannelData &cd = channelData[i];
+
+    for (int j = 0; j < cd.size; ++j) {
+      wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
+                 maxValue);
+    }
+  }
+
+  //
+  // Expand the pixel data to their original range
+  //
+
+  applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBufSizeInBytes / sizeof(unsigned short)));
+
+  for (int y = 0; y < num_lines; y++) {
+    for (size_t i = 0; i < channelData.size(); ++i) {
+      PIZChannelData &cd = channelData[i];
+
+      // if (modp (y, cd.ys) != 0)
+      //    continue;
+
+      size_t n = static_cast<size_t>(cd.nx * cd.size);
+      memcpy(outPtr, cd.end, static_cast<size_t>(n * sizeof(unsigned short)));
+      outPtr += n * sizeof(unsigned short);
+      cd.end += n;
+    }
+  }
+
+  return true;
+}
+#endif  // TINYEXR_USE_PIZ
+
+#if TINYEXR_USE_ZFP
+
+struct ZFPCompressionParam {
+  double rate;
+  unsigned int precision;
+  unsigned int __pad0;
+  double tolerance;
+  int type;  // TINYEXR_ZFP_COMPRESSIONTYPE_*
+  unsigned int __pad1;
+
+  ZFPCompressionParam() {
+    type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE;
+    rate = 2.0;
+    precision = 0;
+    tolerance = 0.0;
+  }
+};
+
+static bool FindZFPCompressionParam(ZFPCompressionParam *param,
+                                    const EXRAttribute *attributes,
+                                    int num_attributes, std::string *err) {
+  bool foundType = false;
+
+  for (int i = 0; i < num_attributes; i++) {
+    if ((strcmp(attributes[i].name, "zfpCompressionType") == 0)) {
+      if (attributes[i].size == 1) {
+        param->type = static_cast<int>(attributes[i].value[0]);
+        foundType = true;
+        break;
+      } else {
+        if (err) {
+          (*err) +=
+              "zfpCompressionType attribute must be uchar(1 byte) type.\n";
+        }
+        return false;
+      }
+    }
+  }
+
+  if (!foundType) {
+    if (err) {
+      (*err) += "`zfpCompressionType` attribute not found.\n";
+    }
+    return false;
+  }
+
+  if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
+    for (int i = 0; i < num_attributes; i++) {
+      if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) &&
+          (attributes[i].size == 8)) {
+        param->rate = *(reinterpret_cast<double *>(attributes[i].value));
+        return true;
+      }
+    }
+
+    if (err) {
+      (*err) += "`zfpCompressionRate` attribute not found.\n";
+    }
+
+  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
+    for (int i = 0; i < num_attributes; i++) {
+      if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) &&
+          (attributes[i].size == 4)) {
+        param->rate = *(reinterpret_cast<int *>(attributes[i].value));
+        return true;
+      }
+    }
+
+    if (err) {
+      (*err) += "`zfpCompressionPrecision` attribute not found.\n";
+    }
+
+  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
+    for (int i = 0; i < num_attributes; i++) {
+      if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) &&
+          (attributes[i].size == 8)) {
+        param->tolerance = *(reinterpret_cast<double *>(attributes[i].value));
+        return true;
+      }
+    }
+
+    if (err) {
+      (*err) += "`zfpCompressionTolerance` attribute not found.\n";
+    }
+  } else {
+    if (err) {
+      (*err) += "Unknown value specified for `zfpCompressionType`.\n";
+    }
+  }
+
+  return false;
+}
+
+// Assume pixel format is FLOAT for all channels.
+static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines,
+                          size_t num_channels, const unsigned char *src,
+                          unsigned long src_size,
+                          const ZFPCompressionParam &param) {
+  size_t uncompressed_size =
+      size_t(dst_width) * size_t(dst_num_lines) * num_channels;
+
+  if (uncompressed_size == src_size) {
+    // Data is not compressed(Issue 40).
+    memcpy(dst, src, src_size);
+  }
+
+  zfp_stream *zfp = NULL;
+  zfp_field *field = NULL;
+
+  TINYEXR_CHECK_AND_RETURN_C((dst_width % 4) == 0, false);
+  TINYEXR_CHECK_AND_RETURN_C((dst_num_lines % 4) == 0, false);
+
+  if ((size_t(dst_width) & 3U) || (size_t(dst_num_lines) & 3U)) {
+    return false;
+  }
+
+  field =
+      zfp_field_2d(reinterpret_cast<void *>(const_cast<unsigned char *>(src)),
+                   zfp_type_float, static_cast<unsigned int>(dst_width),
+                   static_cast<unsigned int>(dst_num_lines) *
+                       static_cast<unsigned int>(num_channels));
+  zfp = zfp_stream_open(NULL);
+
+  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
+    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimension */ 2,
+                        /* write random access */ 0);
+  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
+    zfp_stream_set_precision(zfp, param.precision);
+  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
+    zfp_stream_set_accuracy(zfp, param.tolerance);
+  } else {
+    return false;
+  }
+
+  size_t buf_size = zfp_stream_maximum_size(zfp, field);
+  std::vector<unsigned char> buf(buf_size);
+  memcpy(&buf.at(0), src, src_size);
+
+  bitstream *stream = stream_open(&buf.at(0), buf_size);
+  zfp_stream_set_bit_stream(zfp, stream);
+  zfp_stream_rewind(zfp);
+
+  size_t image_size = size_t(dst_width) * size_t(dst_num_lines);
+
+  for (size_t c = 0; c < size_t(num_channels); c++) {
+    // decompress 4x4 pixel block.
+    for (size_t y = 0; y < size_t(dst_num_lines); y += 4) {
+      for (size_t x = 0; x < size_t(dst_width); x += 4) {
+        float fblock[16];
+        zfp_decode_block_float_2(zfp, fblock);
+        for (size_t j = 0; j < 4; j++) {
+          for (size_t i = 0; i < 4; i++) {
+            dst[c * image_size + ((y + j) * size_t(dst_width) + (x + i))] =
+                fblock[j * 4 + i];
+          }
+        }
+      }
+    }
+  }
+
+  zfp_field_free(field);
+  zfp_stream_close(zfp);
+  stream_close(stream);
+
+  return true;
+}
+
+// Assume pixel format is FLOAT for all channels.
+static bool CompressZfp(std::vector<unsigned char> *outBuf,
+                        unsigned int *outSize, const float *inPtr, int width,
+                        int num_lines, int num_channels,
+                        const ZFPCompressionParam &param) {
+  zfp_stream *zfp = NULL;
+  zfp_field *field = NULL;
+
+  TINYEXR_CHECK_AND_RETURN_C((width % 4) == 0, false);
+  TINYEXR_CHECK_AND_RETURN_C((num_lines % 4) == 0, false);
+
+  if ((size_t(width) & 3U) || (size_t(num_lines) & 3U)) {
+    return false;
+  }
+
+  // create input array.
+  field = zfp_field_2d(reinterpret_cast<void *>(const_cast<float *>(inPtr)),
+                       zfp_type_float, static_cast<unsigned int>(width),
+                       static_cast<unsigned int>(num_lines * num_channels));
+
+  zfp = zfp_stream_open(NULL);
+
+  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
+    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0);
+  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
+    zfp_stream_set_precision(zfp, param.precision);
+  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
+    zfp_stream_set_accuracy(zfp, param.tolerance);
+  } else {
+    return false;
+  }
+
+  size_t buf_size = zfp_stream_maximum_size(zfp, field);
+
+  outBuf->resize(buf_size);
+
+  bitstream *stream = stream_open(&outBuf->at(0), buf_size);
+  zfp_stream_set_bit_stream(zfp, stream);
+  zfp_field_free(field);
+
+  size_t image_size = size_t(width) * size_t(num_lines);
+
+  for (size_t c = 0; c < size_t(num_channels); c++) {
+    // compress 4x4 pixel block.
+    for (size_t y = 0; y < size_t(num_lines); y += 4) {
+      for (size_t x = 0; x < size_t(width); x += 4) {
+        float fblock[16];
+        for (size_t j = 0; j < 4; j++) {
+          for (size_t i = 0; i < 4; i++) {
+            fblock[j * 4 + i] =
+                inPtr[c * image_size + ((y + j) * size_t(width) + (x + i))];
+          }
+        }
+        zfp_encode_block_float_2(zfp, fblock);
+      }
+    }
+  }
+
+  zfp_stream_flush(zfp);
+  (*outSize) = static_cast<unsigned int>(zfp_stream_compressed_size(zfp));
+
+  zfp_stream_close(zfp);
+
+  return true;
+}
+
+#endif
+
+//
+// -----------------------------------------------------------------
+//
+
+// heuristics
+#define TINYEXR_DIMENSION_THRESHOLD (1024 * 8192)
+
+// TODO(syoyo): Refactor function arguments.
+static bool DecodePixelData(/* out */ unsigned char **out_images,
+                            const int *requested_pixel_types,
+                            const unsigned char *data_ptr, size_t data_len,
+                            int compression_type, int line_order, int width,
+                            int height, int x_stride, int y, int line_no,
+                            int num_lines, size_t pixel_data_size,
+                            size_t num_attributes,
+                            const EXRAttribute *attributes, size_t num_channels,
+                            const EXRChannelInfo *channels,
+                            const std::vector<size_t> &channel_offset_list) {
+  if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {  // PIZ
+#if TINYEXR_USE_PIZ
+    if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) {
+      // Invalid input #90
+      return false;
+    }
+
+    // Allocate original data size.
+    std::vector<unsigned char> outBuf(static_cast<size_t>(
+        static_cast<size_t>(width * num_lines) * pixel_data_size));
+    size_t tmpBufLen = outBuf.size();
+
+    bool ret = tinyexr::DecompressPiz(
+        reinterpret_cast<unsigned char *>(&outBuf.at(0)), data_ptr, tmpBufLen,
+        data_len, static_cast<int>(num_channels), channels, width, num_lines);
+
+    if (!ret) {
+      return false;
+    }
+
+    // For PIZ_COMPRESSION:
+    //   pixel sample data for channel 0 for scanline 0
+    //   pixel sample data for channel 1 for scanline 0
+    //   pixel sample data for channel ... for scanline 0
+    //   pixel sample data for channel n for scanline 0
+    //   pixel sample data for channel 0 for scanline 1
+    //   pixel sample data for channel 1 for scanline 1
+    //   pixel sample data for channel ... for scanline 1
+    //   pixel sample data for channel n for scanline 1
+    //   ...
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            FP16 hf;
+
+            // hf.u = line_ptr[u];
+            // use `cpy` to avoid unaligned memory access when compiler's
+            // optimization is on.
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += static_cast<size_t>(
+                             (height - 1 - (line_no + static_cast<int>(v)))) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              size_t offset = 0;
+              if (line_order == 0) {
+                offset = (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                offset = static_cast<size_t>(
+                             (height - 1 - (line_no + static_cast<int>(v)))) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              image += offset;
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            unsigned int val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += static_cast<size_t>(
+                           (height - 1 - (line_no + static_cast<int>(v)))) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(&outBuf.at(
+              v * pixel_data_size * static_cast<size_t>(width) +
+              channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += static_cast<size_t>(
+                           (height - 1 - (line_no + static_cast<int>(v)))) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+#else
+    return false;
+#endif
+
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
+    // Allocate original data size.
+    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
+                                      static_cast<size_t>(num_lines) *
+                                      pixel_data_size);
+
+    unsigned long dstLen = static_cast<unsigned long>(outBuf.size());
+    TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false);
+    if (!tinyexr::DecompressZip(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), &dstLen, data_ptr,
+            static_cast<unsigned long>(data_len))) {
+      return false;
+    }
+
+    // For ZIP_COMPRESSION:
+    //   pixel sample data for channel 0 for scanline 0
+    //   pixel sample data for channel 1 for scanline 0
+    //   pixel sample data for channel ... for scanline 0
+    //   pixel sample data for channel n for scanline 0
+    //   pixel sample data for channel 0 for scanline 1
+    //   pixel sample data for channel 1 for scanline 1
+    //   pixel sample data for channel ... for scanline 1
+    //   pixel sample data for channel n for scanline 1
+    //   ...
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
+                             static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            tinyexr::FP16 hf;
+
+            // hf.u = line_ptr[u];
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              size_t offset = 0;
+              if (line_order == 0) {
+                offset = (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                offset = (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              image += offset;
+
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            unsigned int val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
+    // Allocate original data size.
+    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
+                                      static_cast<size_t>(num_lines) *
+                                      pixel_data_size);
+
+    unsigned long dstLen = static_cast<unsigned long>(outBuf.size());
+    if (dstLen == 0) {
+      return false;
+    }
+
+    if (!tinyexr::DecompressRle(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), dstLen, data_ptr,
+            static_cast<unsigned long>(data_len))) {
+      return false;
+    }
+
+    // For RLE_COMPRESSION:
+    //   pixel sample data for channel 0 for scanline 0
+    //   pixel sample data for channel 1 for scanline 0
+    //   pixel sample data for channel ... for scanline 0
+    //   pixel sample data for channel n for scanline 0
+    //   pixel sample data for channel 0 for scanline 1
+    //   pixel sample data for channel 1 for scanline 1
+    //   pixel sample data for channel ... for scanline 1
+    //   pixel sample data for channel n for scanline 1
+    //   ...
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
+                             static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            tinyexr::FP16 hf;
+
+            // hf.u = line_ptr[u];
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            unsigned int val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            // val = line_ptr[u];
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+#if TINYEXR_USE_ZFP
+    tinyexr::ZFPCompressionParam zfp_compression_param;
+    std::string e;
+    if (!tinyexr::FindZFPCompressionParam(&zfp_compression_param, attributes,
+                                          int(num_attributes), &e)) {
+      // This code path should not be reachable.
+      return false;
+    }
+
+    // Allocate original data size.
+    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
+                                      static_cast<size_t>(num_lines) *
+                                      pixel_data_size);
+
+    unsigned long dstLen = outBuf.size();
+    TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false);
+    tinyexr::DecompressZfp(reinterpret_cast<float *>(&outBuf.at(0)), width,
+                           num_lines, num_channels, data_ptr,
+                           static_cast<unsigned long>(data_len),
+                           zfp_compression_param);
+
+    // For ZFP_COMPRESSION:
+    //   pixel sample data for channel 0 for scanline 0
+    //   pixel sample data for channel 1 for scanline 0
+    //   pixel sample data for channel ... for scanline 0
+    //   pixel sample data for channel n for scanline 0
+    //   pixel sample data for channel 0 for scanline 1
+    //   pixel sample data for channel 1 for scanline 1
+    //   pixel sample data for channel ... for scanline 1
+    //   pixel sample data for channel n for scanline 1
+    //   ...
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      TINYEXR_CHECK_AND_RETURN_C(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT, false);
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+#else
+    (void)attributes;
+    (void)num_attributes;
+    (void)num_channels;
+    return false;
+#endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
+    for (size_t c = 0; c < num_channels; c++) {
+      for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+        if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+          const unsigned short *line_ptr =
+              reinterpret_cast<const unsigned short *>(
+                  data_ptr + v * pixel_data_size * size_t(width) +
+                  channel_offset_list[c] * static_cast<size_t>(width));
+
+          if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+            unsigned short *outLine =
+                reinterpret_cast<unsigned short *>(out_images[c]);
+            if (line_order == 0) {
+              outLine += (size_t(y) + v) * size_t(x_stride);
+            } else {
+              outLine +=
+                  (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
+            }
+
+            for (int u = 0; u < width; u++) {
+              tinyexr::FP16 hf;
+
+              // hf.u = line_ptr[u];
+              tinyexr::cpy2(&(hf.u), line_ptr + u);
+
+              tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+              outLine[u] = hf.u;
+            }
+          } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
+            float *outLine = reinterpret_cast<float *>(out_images[c]);
+            if (line_order == 0) {
+              outLine += (size_t(y) + v) * size_t(x_stride);
+            } else {
+              outLine +=
+                  (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
+            }
+
+            if (reinterpret_cast<const unsigned char *>(line_ptr + width) >
+                (data_ptr + data_len)) {
+              // Insufficient data size
+              return false;
+            }
+
+            for (int u = 0; u < width; u++) {
+              tinyexr::FP16 hf;
+
+              // address may not be aligned. use byte-wise copy for safety.#76
+              // hf.u = line_ptr[u];
+              tinyexr::cpy2(&(hf.u), line_ptr + u);
+
+              tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+              tinyexr::FP32 f32 = half_to_float(hf);
+
+              outLine[u] = f32.f;
+            }
+          } else {
+            return false;
+          }
+        } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+          const float *line_ptr = reinterpret_cast<const float *>(
+              data_ptr + v * pixel_data_size * size_t(width) +
+              channel_offset_list[c] * static_cast<size_t>(width));
+
+          float *outLine = reinterpret_cast<float *>(out_images[c]);
+          if (line_order == 0) {
+            outLine += (size_t(y) + v) * size_t(x_stride);
+          } else {
+            outLine +=
+                (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
+          }
+
+          if (reinterpret_cast<const unsigned char *>(line_ptr + width) >
+              (data_ptr + data_len)) {
+            // Insufficient data size
+            return false;
+          }
+
+          for (int u = 0; u < width; u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            outLine[u] = val;
+          }
+        } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+          const unsigned int *line_ptr = reinterpret_cast<const unsigned int *>(
+              data_ptr + v * pixel_data_size * size_t(width) +
+              channel_offset_list[c] * static_cast<size_t>(width));
+
+          unsigned int *outLine =
+              reinterpret_cast<unsigned int *>(out_images[c]);
+          if (line_order == 0) {
+            outLine += (size_t(y) + v) * size_t(x_stride);
+          } else {
+            outLine +=
+                (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
+          }
+
+          if (reinterpret_cast<const unsigned char *>(line_ptr + width) >
+              (data_ptr + data_len)) {
+            // Corrupted data
+            return false;
+          }
+
+          for (int u = 0; u < width; u++) {
+
+            unsigned int val;
+            tinyexr::cpy4(&val, line_ptr + u);
+
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            outLine[u] = val;
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool DecodeTiledPixelData(
+    unsigned char **out_images, int *width, int *height,
+    const int *requested_pixel_types, const unsigned char *data_ptr,
+    size_t data_len, int compression_type, int line_order, int data_width,
+    int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x,
+    int tile_size_y, size_t pixel_data_size, size_t num_attributes,
+    const EXRAttribute *attributes, size_t num_channels,
+    const EXRChannelInfo *channels,
+    const std::vector<size_t> &channel_offset_list) {
+  // Here, data_width and data_height are the dimensions of the current (sub)level.
+  if (tile_size_x * tile_offset_x > data_width ||
+      tile_size_y * tile_offset_y > data_height) {
+    return false;
+  }
+
+  // Compute actual image size in a tile.
+  if ((tile_offset_x + 1) * tile_size_x >= data_width) {
+    (*width) = data_width - (tile_offset_x * tile_size_x);
+  } else {
+    (*width) = tile_size_x;
+  }
+
+  if ((tile_offset_y + 1) * tile_size_y >= data_height) {
+    (*height) = data_height - (tile_offset_y * tile_size_y);
+  } else {
+    (*height) = tile_size_y;
+  }
+
+  // Image size = tile size.
+  return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len,
+                         compression_type, line_order, (*width), tile_size_y,
+                         /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0,
+                         (*height), pixel_data_size, num_attributes, attributes,
+                         num_channels, channels, channel_offset_list);
+}
+
+static bool ComputeChannelLayout(std::vector<size_t> *channel_offset_list,
+                                 int *pixel_data_size, size_t *channel_offset,
+                                 int num_channels,
+                                 const EXRChannelInfo *channels) {
+  channel_offset_list->resize(static_cast<size_t>(num_channels));
+
+  (*pixel_data_size) = 0;
+  (*channel_offset) = 0;
+
+  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+    (*channel_offset_list)[c] = (*channel_offset);
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      (*pixel_data_size) += sizeof(unsigned short);
+      (*channel_offset) += sizeof(unsigned short);
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      (*pixel_data_size) += sizeof(float);
+      (*channel_offset) += sizeof(float);
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      (*pixel_data_size) += sizeof(unsigned int);
+      (*channel_offset) += sizeof(unsigned int);
+    } else {
+      // ???
+      return false;
+    }
+  }
+  return true;
+}
+
+// TODO: Simply return nullptr when failed to allocate?
+static unsigned char **AllocateImage(int num_channels,
+                                     const EXRChannelInfo *channels,
+                                     const int *requested_pixel_types,
+                                     int data_width, int data_height, bool *success) {
+  unsigned char **images =
+      reinterpret_cast<unsigned char **>(static_cast<float **>(
+          malloc(sizeof(float *) * static_cast<size_t>(num_channels))));
+
+  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+    images[c] = NULL;
+  }
+
+  bool valid = true;
+
+  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+    size_t data_len =
+        static_cast<size_t>(data_width) * static_cast<size_t>(data_height);
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      // pixel_data_size += sizeof(unsigned short);
+      // channel_offset += sizeof(unsigned short);
+      // Alloc internal image for half type.
+      if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+        images[c] =
+            reinterpret_cast<unsigned char *>(static_cast<unsigned short *>(
+                malloc(sizeof(unsigned short) * data_len)));
+      } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
+        images[c] = reinterpret_cast<unsigned char *>(
+            static_cast<float *>(malloc(sizeof(float) * data_len)));
+      } else {
+        images[c] = NULL; // just in case.
+        valid = false;
+        break;
+      }
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      // pixel_data_size += sizeof(float);
+      // channel_offset += sizeof(float);
+      images[c] = reinterpret_cast<unsigned char *>(
+          static_cast<float *>(malloc(sizeof(float) * data_len)));
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      // pixel_data_size += sizeof(unsigned int);
+      // channel_offset += sizeof(unsigned int);
+      images[c] = reinterpret_cast<unsigned char *>(
+          static_cast<unsigned int *>(malloc(sizeof(unsigned int) * data_len)));
+    } else {
+      images[c] = NULL; // just in case.
+      valid = false;
+      break;
+    }
+  }
+
+  if (!valid) {
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (images[c]) {
+        free(images[c]);
+        images[c] = NULL;
+      }
+    }
+
+    if (success) {
+      (*success) = false;
+    }
+  } else {
+    if (success) {
+      (*success) = true;
+    }
+  }
+
+  return images;
+}
+
+#ifdef _WIN32
+static inline std::wstring UTF8ToWchar(const std::string &str) {
+  int wstr_size =
+      MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), NULL, 0);
+  std::wstring wstr(wstr_size, 0);
+  MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &wstr[0],
+                      (int)wstr.size());
+  return wstr;
+}
+#endif
+
+
+static int ParseEXRHeader(HeaderInfo *info, bool *empty_header,
+                          const EXRVersion *version, std::string *err,
+                          const unsigned char *buf, size_t size) {
+  const char *marker = reinterpret_cast<const char *>(&buf[0]);
+
+  if (empty_header) {
+    (*empty_header) = false;
+  }
+
+  if (version->multipart) {
+    if (size > 0 && marker[0] == '\0') {
+      // End of header list.
+      if (empty_header) {
+        (*empty_header) = true;
+      }
+      return TINYEXR_SUCCESS;
+    }
+  }
+
+  // According to the spec, the header of every OpenEXR file must contain at
+  // least the following attributes:
+  //
+  // channels chlist
+  // compression compression
+  // dataWindow box2i
+  // displayWindow box2i
+  // lineOrder lineOrder
+  // pixelAspectRatio float
+  // screenWindowCenter v2f
+  // screenWindowWidth float
+  bool has_channels = false;
+  bool has_compression = false;
+  bool has_data_window = false;
+  bool has_display_window = false;
+  bool has_line_order = false;
+  bool has_pixel_aspect_ratio = false;
+  bool has_screen_window_center = false;
+  bool has_screen_window_width = false;
+  bool has_name = false;
+  bool has_type = false;
+
+  info->name.clear();
+  info->type.clear();
+
+  info->data_window.min_x = 0;
+  info->data_window.min_y = 0;
+  info->data_window.max_x = 0;
+  info->data_window.max_y = 0;
+  info->line_order = 0;  // @fixme
+  info->display_window.min_x = 0;
+  info->display_window.min_y = 0;
+  info->display_window.max_x = 0;
+  info->display_window.max_y = 0;
+  info->screen_window_center[0] = 0.0f;
+  info->screen_window_center[1] = 0.0f;
+  info->screen_window_width = -1.0f;
+  info->pixel_aspect_ratio = -1.0f;
+
+  info->tiled = 0;
+  info->tile_size_x = -1;
+  info->tile_size_y = -1;
+  info->tile_level_mode = -1;
+  info->tile_rounding_mode = -1;
+
+  info->attributes.clear();
+
+  // Read attributes
+  size_t orig_size = size;
+  for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) {
+    if (0 == size) {
+      if (err) {
+        (*err) += "Insufficient data size for attributes.\n";
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    } else if (marker[0] == '\0') {
+      size--;
+      break;
+    }
+
+    std::string attr_name;
+    std::string attr_type;
+    std::vector<unsigned char> data;
+    size_t marker_size;
+    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
+                                marker, size)) {
+      if (err) {
+        (*err) += "Failed to read attribute.\n";
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+    marker += marker_size;
+    size -= marker_size;
+
+    // For a multipart file, the version field 9th bit is 0.
+    if ((version->tiled || version->multipart || version->non_image) && attr_name.compare("tiles") == 0) {
+      unsigned int x_size, y_size;
+      unsigned char tile_mode;
+      if (data.size() != 9) {
+        if (err) {
+          (*err) += "(ParseEXRHeader) Invalid attribute data size. Attribute data size must be 9.\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      memcpy(&x_size, &data.at(0), sizeof(int));
+      memcpy(&y_size, &data.at(4), sizeof(int));
+      tile_mode = data[8];
+      tinyexr::swap4(&x_size);
+      tinyexr::swap4(&y_size);
+
+      if (x_size > static_cast<unsigned int>(std::numeric_limits<int>::max()) ||
+          y_size > static_cast<unsigned int>(std::numeric_limits<int>::max())) {
+        if (err) {
+          (*err) = "Tile sizes were invalid.";
+        }
+        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+      }
+
+      info->tile_size_x = static_cast<int>(x_size);
+      info->tile_size_y = static_cast<int>(y_size);
+
+      // mode = levelMode + roundingMode * 16
+      info->tile_level_mode = tile_mode & 0x3;
+      info->tile_rounding_mode = (tile_mode >> 4) & 0x1;
+      info->tiled = 1;
+    } else if (attr_name.compare("compression") == 0) {
+      bool ok = false;
+      if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) {
+        ok = true;
+      }
+
+      if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) {
+#if TINYEXR_USE_PIZ
+        ok = true;
+#else
+        if (err) {
+          (*err) = "PIZ compression is not supported.";
+        }
+        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+#endif
+      }
+
+      if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) {
+#if TINYEXR_USE_ZFP
+        ok = true;
+#else
+        if (err) {
+          (*err) = "ZFP compression is not supported.";
+        }
+        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+#endif
+      }
+
+      if (!ok) {
+        if (err) {
+          (*err) = "Unknown compression type.";
+        }
+        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+      }
+
+      info->compression_type = static_cast<int>(data[0]);
+      has_compression = true;
+
+    } else if (attr_name.compare("channels") == 0) {
+      // name: zero-terminated string, from 1 to 255 bytes long
+      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
+      // pLinear: unsigned char, possible values are 0 and 1
+      // reserved: three chars, should be zero
+      // xSampling: int
+      // ySampling: int
+
+      if (!ReadChannelInfo(info->channels, data)) {
+        if (err) {
+          (*err) += "Failed to parse channel info.\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      if (info->channels.size() < 1) {
+        if (err) {
+          (*err) += "# of channels is zero.\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      has_channels = true;
+
+    } else if (attr_name.compare("dataWindow") == 0) {
+      if (data.size() >= 16) {
+        memcpy(&info->data_window.min_x, &data.at(0), sizeof(int));
+        memcpy(&info->data_window.min_y, &data.at(4), sizeof(int));
+        memcpy(&info->data_window.max_x, &data.at(8), sizeof(int));
+        memcpy(&info->data_window.max_y, &data.at(12), sizeof(int));
+        tinyexr::swap4(&info->data_window.min_x);
+        tinyexr::swap4(&info->data_window.min_y);
+        tinyexr::swap4(&info->data_window.max_x);
+        tinyexr::swap4(&info->data_window.max_y);
+        has_data_window = true;
+      }
+    } else if (attr_name.compare("displayWindow") == 0) {
+      if (data.size() >= 16) {
+        memcpy(&info->display_window.min_x, &data.at(0), sizeof(int));
+        memcpy(&info->display_window.min_y, &data.at(4), sizeof(int));
+        memcpy(&info->display_window.max_x, &data.at(8), sizeof(int));
+        memcpy(&info->display_window.max_y, &data.at(12), sizeof(int));
+        tinyexr::swap4(&info->display_window.min_x);
+        tinyexr::swap4(&info->display_window.min_y);
+        tinyexr::swap4(&info->display_window.max_x);
+        tinyexr::swap4(&info->display_window.max_y);
+
+        has_display_window = true;
+      }
+    } else if (attr_name.compare("lineOrder") == 0) {
+      if (data.size() >= 1) {
+        info->line_order = static_cast<int>(data[0]);
+        has_line_order = true;
+      }
+    } else if (attr_name.compare("pixelAspectRatio") == 0) {
+      if (data.size() >= sizeof(float)) {
+        memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float));
+        tinyexr::swap4(&info->pixel_aspect_ratio);
+        has_pixel_aspect_ratio = true;
+      }
+    } else if (attr_name.compare("screenWindowCenter") == 0) {
+      if (data.size() >= 8) {
+        memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float));
+        memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float));
+        tinyexr::swap4(&info->screen_window_center[0]);
+        tinyexr::swap4(&info->screen_window_center[1]);
+        has_screen_window_center = true;
+      }
+    } else if (attr_name.compare("screenWindowWidth") == 0) {
+      if (data.size() >= sizeof(float)) {
+        memcpy(&info->screen_window_width, &data.at(0), sizeof(float));
+        tinyexr::swap4(&info->screen_window_width);
+
+        has_screen_window_width = true;
+      }
+    } else if (attr_name.compare("chunkCount") == 0) {
+      if (data.size() >= sizeof(int)) {
+        memcpy(&info->chunk_count, &data.at(0), sizeof(int));
+        tinyexr::swap4(&info->chunk_count);
+      }
+    } else if (attr_name.compare("name") == 0) {
+      if (!data.empty() && data[0]) {
+        data.push_back(0);
+        size_t len = strlen(reinterpret_cast<const char*>(&data[0]));
+        info->name.resize(len);
+        info->name.assign(reinterpret_cast<const char*>(&data[0]), len);
+        has_name = true;
+      }
+    } else if (attr_name.compare("type") == 0) {
+      if (!data.empty() && data[0]) {
+        data.push_back(0);
+        size_t len = strlen(reinterpret_cast<const char*>(&data[0]));
+        info->type.resize(len);
+        info->type.assign(reinterpret_cast<const char*>(&data[0]), len);
+        has_type = true;
+      }
+    } else {
+      // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES)
+      if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+        EXRAttribute attrib;
+#ifdef _MSC_VER
+        strncpy_s(attrib.name, attr_name.c_str(), 255);
+        strncpy_s(attrib.type, attr_type.c_str(), 255);
+#else
+        strncpy(attrib.name, attr_name.c_str(), 255);
+        strncpy(attrib.type, attr_type.c_str(), 255);
+#endif
+        attrib.name[255] = '\0';
+        attrib.type[255] = '\0';
+        //std::cout << "i = " << info->attributes.size() << ", dsize = " << data.size() << "\n";
+        attrib.size = static_cast<int>(data.size());
+        attrib.value = static_cast<unsigned char *>(malloc(data.size()));
+        memcpy(reinterpret_cast<char *>(attrib.value), &data.at(0),
+               data.size());
+        info->attributes.push_back(attrib);
+      }
+    }
+  }
+
+  // Check if required attributes exist
+  {
+    std::stringstream ss_err;
+
+    if (!has_compression) {
+      ss_err << "\"compression\" attribute not found in the header."
+             << std::endl;
+    }
+
+    if (!has_channels) {
+      ss_err << "\"channels\" attribute not found in the header." << std::endl;
+    }
+
+    if (!has_line_order) {
+      ss_err << "\"lineOrder\" attribute not found in the header." << std::endl;
+    }
+
+    if (!has_display_window) {
+      ss_err << "\"displayWindow\" attribute not found in the header."
+             << std::endl;
+    }
+
+    if (!has_data_window) {
+      ss_err << "\"dataWindow\" attribute not found in the header or invalid."
+             << std::endl;
+    }
+
+    if (!has_pixel_aspect_ratio) {
+      ss_err << "\"pixelAspectRatio\" attribute not found in the header."
+             << std::endl;
+    }
+
+    if (!has_screen_window_width) {
+      ss_err << "\"screenWindowWidth\" attribute not found in the header."
+             << std::endl;
+    }
+
+    if (!has_screen_window_center) {
+      ss_err << "\"screenWindowCenter\" attribute not found in the header."
+             << std::endl;
+    }
+
+    if (version->multipart || version->non_image) {
+      if (!has_name) {
+        ss_err << "\"name\" attribute not found in the header."
+          << std::endl;
+      }
+      if (!has_type) {
+        ss_err << "\"type\" attribute not found in the header."
+          << std::endl;
+      }
+    }
+
+    if (!(ss_err.str().empty())) {
+      if (err) {
+        (*err) += ss_err.str();
+      }
+
+      return TINYEXR_ERROR_INVALID_HEADER;
+    }
+  }
+
+  info->header_len = static_cast<unsigned int>(orig_size - size);
+
+  return TINYEXR_SUCCESS;
+}
+
+// C++ HeaderInfo to C EXRHeader conversion.
+static bool ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info, std::string *warn, std::string *err) {
+  exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio;
+  exr_header->screen_window_center[0] = info.screen_window_center[0];
+  exr_header->screen_window_center[1] = info.screen_window_center[1];
+  exr_header->screen_window_width = info.screen_window_width;
+  exr_header->chunk_count = info.chunk_count;
+  exr_header->display_window.min_x = info.display_window.min_x;
+  exr_header->display_window.min_y = info.display_window.min_y;
+  exr_header->display_window.max_x = info.display_window.max_x;
+  exr_header->display_window.max_y = info.display_window.max_y;
+  exr_header->data_window.min_x = info.data_window.min_x;
+  exr_header->data_window.min_y = info.data_window.min_y;
+  exr_header->data_window.max_x = info.data_window.max_x;
+  exr_header->data_window.max_y = info.data_window.max_y;
+  exr_header->line_order = info.line_order;
+  exr_header->compression_type = info.compression_type;
+  exr_header->tiled = info.tiled;
+  exr_header->tile_size_x = info.tile_size_x;
+  exr_header->tile_size_y = info.tile_size_y;
+  exr_header->tile_level_mode = info.tile_level_mode;
+  exr_header->tile_rounding_mode = info.tile_rounding_mode;
+
+  EXRSetNameAttr(exr_header, info.name.c_str());
+
+
+  if (!info.type.empty()) {
+    bool valid = true;
+    if (info.type == "scanlineimage") {
+      if (exr_header->tiled) {
+        if (err) {
+          (*err) += "(ConvertHeader) tiled bit must be off for `scanlineimage` type.\n";
+        }
+        valid = false;
+      }
+    } else if (info.type == "tiledimage") {
+      if (!exr_header->tiled) {
+        if (err) {
+          (*err) += "(ConvertHeader) tiled bit must be on for `tiledimage` type.\n";
+        }
+        valid = false;
+      }
+    } else if (info.type == "deeptile") {
+      exr_header->non_image = 1;
+      if (!exr_header->tiled) {
+        if (err) {
+          (*err) += "(ConvertHeader) tiled bit must be on for `deeptile` type.\n";
+        }
+        valid = false;
+      }
+    } else if (info.type == "deepscanline") {
+      exr_header->non_image = 1;
+      if (exr_header->tiled) {
+        if (err) {
+          (*err) += "(ConvertHeader) tiled bit must be off for `deepscanline` type.\n";
+        }
+        //valid = false;
+      }
+    } else {
+      if (warn) {
+        std::stringstream ss;
+        ss << "(ConvertHeader) Unsupported or unknown info.type: " << info.type << "\n";
+        (*warn) += ss.str();
+      }
+    }
+
+    if (!valid) {
+      return false;
+    }
+  }
+
+  exr_header->num_channels = static_cast<int>(info.channels.size());
+
+  exr_header->channels = static_cast<EXRChannelInfo *>(malloc(
+      sizeof(EXRChannelInfo) * static_cast<size_t>(exr_header->num_channels)));
+  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
+#ifdef _MSC_VER
+    strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255);
+#else
+    strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255);
+#endif
+    // manually add '\0' for safety.
+    exr_header->channels[c].name[255] = '\0';
+
+    exr_header->channels[c].pixel_type = info.channels[c].pixel_type;
+    exr_header->channels[c].p_linear = info.channels[c].p_linear;
+    exr_header->channels[c].x_sampling = info.channels[c].x_sampling;
+    exr_header->channels[c].y_sampling = info.channels[c].y_sampling;
+  }
+
+  exr_header->pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
+  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
+    exr_header->pixel_types[c] = info.channels[c].pixel_type;
+  }
+
+  // Initially fill with values of `pixel_types`
+  exr_header->requested_pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
+  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
+    exr_header->requested_pixel_types[c] = info.channels[c].pixel_type;
+  }
+
+  exr_header->num_custom_attributes = static_cast<int>(info.attributes.size());
+
+  if (exr_header->num_custom_attributes > 0) {
+    // TODO(syoyo): Report warning when # of attributes exceeds
+    // `TINYEXR_MAX_CUSTOM_ATTRIBUTES`
+    if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+      exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES;
+    }
+
+    exr_header->custom_attributes = static_cast<EXRAttribute *>(malloc(
+        sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes)));
+
+    for (size_t i = 0; i < size_t(exr_header->num_custom_attributes); i++) {
+      memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name,
+             256);
+      memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type,
+             256);
+      exr_header->custom_attributes[i].size = info.attributes[i].size;
+      // Just copy pointer
+      exr_header->custom_attributes[i].value = info.attributes[i].value;
+    }
+
+  } else {
+    exr_header->custom_attributes = NULL;
+  }
+
+  exr_header->header_len = info.header_len;
+
+  return true;
+}
+
+struct OffsetData {
+  OffsetData() : num_x_levels(0), num_y_levels(0) {}
+  std::vector<std::vector<std::vector <tinyexr::tinyexr_uint64> > > offsets;
+  int num_x_levels;
+  int num_y_levels;
+};
+
+// -1 = error
+static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) {
+  switch (tile_level_mode) {
+  case TINYEXR_TILE_ONE_LEVEL:
+    return 0;
+
+  case TINYEXR_TILE_MIPMAP_LEVELS:
+    return lx;
+
+  case TINYEXR_TILE_RIPMAP_LEVELS:
+    return lx + ly * num_x_levels;
+
+  default:
+    return -1;
+  }
+  return 0;
+}
+
+static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) {
+  if (level < 0) {
+    return -1;
+  }
+
+  int b = static_cast<int>(1u << static_cast<unsigned int>(level));
+  int level_size = toplevel_size / b;
+
+  if (tile_rounding_mode == TINYEXR_TILE_ROUND_UP && level_size * b < toplevel_size)
+    level_size += 1;
+
+  return std::max(level_size, 1);
+}
+
+static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header,
+  const OffsetData& offset_data,
+  const std::vector<size_t>& channel_offset_list,
+  int pixel_data_size,
+  const unsigned char* head, const size_t size,
+  std::string* err) {
+  int num_channels = exr_header->num_channels;
+
+  int level_index = LevelIndex(exr_image->level_x, exr_image->level_y, exr_header->tile_level_mode, offset_data.num_x_levels);
+  int num_y_tiles = int(offset_data.offsets[size_t(level_index)].size());
+  if (num_y_tiles < 1) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  int num_x_tiles = int(offset_data.offsets[size_t(level_index)][0].size());
+  if (num_x_tiles < 1) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  int num_tiles = num_x_tiles * num_y_tiles;
+
+  int err_code = TINYEXR_SUCCESS;
+
+  enum {
+    EF_SUCCESS = 0,
+    EF_INVALID_DATA = 1,
+    EF_INSUFFICIENT_DATA = 2,
+    EF_FAILED_TO_DECODE = 4
+  };
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  std::atomic<unsigned> error_flag(EF_SUCCESS);
+#else
+  unsigned error_flag(EF_SUCCESS);
+#endif
+
+  // Although the spec says : "...the data window is subdivided into an array of smaller rectangles...",
+  // the IlmImf library allows the dimensions of the tile to be larger (or equal) than the dimensions of the data window.
+#if 0
+  if ((exr_header->tile_size_x > exr_image->width || exr_header->tile_size_y > exr_image->height) &&
+    exr_image->level_x == 0 && exr_image->level_y == 0) {
+    if (err) {
+      (*err) += "Failed to decode tile data.\n";
+    }
+    err_code = TINYEXR_ERROR_INVALID_DATA;
+  }
+#endif
+  exr_image->tiles = static_cast<EXRTile*>(
+    calloc(static_cast<size_t>(num_tiles), sizeof(EXRTile)));
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  std::vector<std::thread> workers;
+  std::atomic<int> tile_count(0);
+
+  int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+  if (num_threads > int(num_tiles)) {
+    num_threads = int(num_tiles);
+  }
+
+  for (int t = 0; t < num_threads; t++) {
+    workers.emplace_back(std::thread([&]()
+      {
+        int tile_idx = 0;
+        while ((tile_idx = tile_count++) < num_tiles) {
+
+#else
+#if TINYEXR_USE_OPENMP
+#pragma omp parallel for
+#endif
+  for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+#endif
+    // Allocate memory for each tile.
+    bool alloc_success = false;
+    exr_image->tiles[tile_idx].images = tinyexr::AllocateImage(
+      num_channels, exr_header->channels,
+      exr_header->requested_pixel_types, exr_header->tile_size_x,
+      exr_header->tile_size_y, &alloc_success);
+
+    if (!alloc_success) {
+      error_flag |= EF_INVALID_DATA;
+      continue;
+    }
+
+    int x_tile = tile_idx % num_x_tiles;
+    int y_tile = tile_idx / num_x_tiles;
+    // 16 byte: tile coordinates
+    // 4 byte : data size
+    // ~      : data(uncompressed or compressed)
+    tinyexr::tinyexr_uint64 offset = offset_data.offsets[size_t(level_index)][size_t(y_tile)][size_t(x_tile)];
+    if (offset + sizeof(int) * 5 > size) {
+      // Insufficient data size.
+      error_flag |= EF_INSUFFICIENT_DATA;
+      continue;
+    }
+
+    size_t data_size =
+      size_t(size - (offset + sizeof(int) * 5));
+    const unsigned char* data_ptr =
+      reinterpret_cast<const unsigned char*>(head + offset);
+
+    int tile_coordinates[4];
+    memcpy(tile_coordinates, data_ptr, sizeof(int) * 4);
+    tinyexr::swap4(&tile_coordinates[0]);
+    tinyexr::swap4(&tile_coordinates[1]);
+    tinyexr::swap4(&tile_coordinates[2]);
+    tinyexr::swap4(&tile_coordinates[3]);
+
+    if (tile_coordinates[2] != exr_image->level_x) {
+      // Invalid data.
+      error_flag |= EF_INVALID_DATA;
+      continue;
+    }
+    if (tile_coordinates[3] != exr_image->level_y) {
+      // Invalid data.
+      error_flag |= EF_INVALID_DATA;
+      continue;
+    }
+
+    int data_len;
+    memcpy(&data_len, data_ptr + 16,
+      sizeof(int));  // 16 = sizeof(tile_coordinates)
+    tinyexr::swap4(&data_len);
+
+    if (data_len < 2 || size_t(data_len) > data_size) {
+      // Insufficient data size.
+      error_flag |= EF_INSUFFICIENT_DATA;
+      continue;
+    }
+
+    // Move to data addr: 20 = 16 + 4;
+    data_ptr += 20;
+    bool ret = tinyexr::DecodeTiledPixelData(
+      exr_image->tiles[tile_idx].images,
+      &(exr_image->tiles[tile_idx].width),
+      &(exr_image->tiles[tile_idx].height),
+      exr_header->requested_pixel_types, data_ptr,
+      static_cast<size_t>(data_len), exr_header->compression_type,
+      exr_header->line_order,
+      exr_image->width, exr_image->height,
+      tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x,
+      exr_header->tile_size_y, static_cast<size_t>(pixel_data_size),
+      static_cast<size_t>(exr_header->num_custom_attributes),
+      exr_header->custom_attributes,
+      static_cast<size_t>(exr_header->num_channels),
+      exr_header->channels, channel_offset_list);
+
+    if (!ret) {
+      // Failed to decode tile data.
+      error_flag |= EF_FAILED_TO_DECODE;
+    }
+
+    exr_image->tiles[tile_idx].offset_x = tile_coordinates[0];
+    exr_image->tiles[tile_idx].offset_y = tile_coordinates[1];
+    exr_image->tiles[tile_idx].level_x = tile_coordinates[2];
+    exr_image->tiles[tile_idx].level_y = tile_coordinates[3];
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  }
+        }));
+    }  // num_thread loop
+
+    for (auto& t : workers) {
+      t.join();
+    }
+
+#else
+  } // parallel for
+#endif
+
+  // Even in the event of an error, the reserved memory may be freed.
+  exr_image->num_channels = num_channels;
+  exr_image->num_tiles = static_cast<int>(num_tiles);
+
+  if (error_flag)  err_code = TINYEXR_ERROR_INVALID_DATA;
+  if (err) {
+    if (error_flag & EF_INSUFFICIENT_DATA) {
+      (*err) += "Insufficient data length.\n";
+    }
+    if (error_flag & EF_FAILED_TO_DECODE) {
+      (*err) += "Failed to decode tile data.\n";
+    }
+  }
+  return err_code;
+}
+
+static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
+                       const OffsetData& offset_data,
+                       const unsigned char *head, const size_t size,
+                       std::string *err) {
+  int num_channels = exr_header->num_channels;
+
+  int num_scanline_blocks = 1;
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+    num_scanline_blocks = 32;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+    num_scanline_blocks = 16;
+
+#if TINYEXR_USE_ZFP
+    tinyexr::ZFPCompressionParam zfp_compression_param;
+    if (!FindZFPCompressionParam(&zfp_compression_param,
+                                 exr_header->custom_attributes,
+                                 int(exr_header->num_custom_attributes), err)) {
+      return TINYEXR_ERROR_INVALID_HEADER;
+    }
+#endif
+  }
+
+  if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
+      exr_header->data_window.max_y < exr_header->data_window.min_y) {
+    if (err) {
+      (*err) += "Invalid data window.\n";
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  tinyexr_int64 data_width =
+      static_cast<tinyexr_int64>(exr_header->data_window.max_x) - static_cast<tinyexr_int64>(exr_header->data_window.min_x) + static_cast<tinyexr_int64>(1);
+  tinyexr_int64 data_height =
+      static_cast<tinyexr_int64>(exr_header->data_window.max_y) - static_cast<tinyexr_int64>(exr_header->data_window.min_y) + static_cast<tinyexr_int64>(1);
+
+  if (data_width <= 0) {
+    if (err) {
+      (*err) += "Invalid data window width.\n";
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  if (data_height <= 0) {
+    if (err) {
+      (*err) += "Invalid data window height.\n";
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  // Do not allow too large data_width and data_height. header invalid?
+  {
+    if ((data_width > TINYEXR_DIMENSION_THRESHOLD) || (data_height > TINYEXR_DIMENSION_THRESHOLD)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "data_with or data_height too large. data_width: " << data_width
+           << ", "
+           << "data_height = " << data_height << std::endl;
+        (*err) += ss.str();
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+    if (exr_header->tiled) {
+      if ((exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) || (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD)) {
+        if (err) {
+          std::stringstream ss;
+          ss << "tile with or tile height too large. tile width: " << exr_header->tile_size_x
+            << ", "
+            << "tile height = " << exr_header->tile_size_y << std::endl;
+          (*err) += ss.str();
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+    }
+  }
+
+  const std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0];
+  size_t num_blocks = offsets.size();
+
+  std::vector<size_t> channel_offset_list;
+  int pixel_data_size = 0;
+  size_t channel_offset = 0;
+  if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size,
+                                     &channel_offset, num_channels,
+                                     exr_header->channels)) {
+    if (err) {
+      (*err) += "Failed to compute channel layout.\n";
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  std::atomic<bool> invalid_data(false);
+#else
+  bool invalid_data(false);
+#endif
+
+  if (exr_header->tiled) {
+    // value check
+    if (exr_header->tile_size_x < 0) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n";
+        (*err) += ss.str();
+      }
+      return TINYEXR_ERROR_INVALID_HEADER;
+    }
+
+    if (exr_header->tile_size_y < 0) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n";
+        (*err) += ss.str();
+      }
+      return TINYEXR_ERROR_INVALID_HEADER;
+    }
+    if (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) {
+      EXRImage* level_image = NULL;
+      for (int level = 0; level < offset_data.num_x_levels; ++level) {
+        if (!level_image) {
+          level_image = exr_image;
+        } else {
+          level_image->next_level = new EXRImage;
+          InitEXRImage(level_image->next_level);
+          level_image = level_image->next_level;
+        }
+        level_image->width =
+          LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level, exr_header->tile_rounding_mode);
+        if (level_image->width < 1) {
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+
+        level_image->height =
+          LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level, exr_header->tile_rounding_mode);
+
+        if (level_image->height < 1) {
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+
+        level_image->level_x = level;
+        level_image->level_y = level;
+
+        int ret = DecodeTiledLevel(level_image, exr_header,
+          offset_data,
+          channel_offset_list,
+          pixel_data_size,
+          head, size,
+          err);
+        if (ret != TINYEXR_SUCCESS) return ret;
+      }
+    } else {
+      EXRImage* level_image = NULL;
+      for (int level_y = 0; level_y < offset_data.num_y_levels; ++level_y)
+        for (int level_x = 0; level_x < offset_data.num_x_levels; ++level_x) {
+          if (!level_image) {
+            level_image = exr_image;
+          } else {
+            level_image->next_level = new EXRImage;
+            InitEXRImage(level_image->next_level);
+            level_image = level_image->next_level;
+          }
+
+          level_image->width =
+            LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level_x, exr_header->tile_rounding_mode);
+          if (level_image->width < 1) {
+            return TINYEXR_ERROR_INVALID_DATA;
+          }
+
+          level_image->height =
+            LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level_y, exr_header->tile_rounding_mode);
+          if (level_image->height < 1) {
+            return TINYEXR_ERROR_INVALID_DATA;
+          }
+
+          level_image->level_x = level_x;
+          level_image->level_y = level_y;
+
+          int ret = DecodeTiledLevel(level_image, exr_header,
+            offset_data,
+            channel_offset_list,
+            pixel_data_size,
+            head, size,
+            err);
+          if (ret != TINYEXR_SUCCESS) return ret;
+        }
+    }
+  } else {  // scanline format
+    // Don't allow too large image(256GB * pixel_data_size or more). Workaround
+    // for #104.
+    size_t total_data_len =
+        size_t(data_width) * size_t(data_height) * size_t(num_channels);
+    const bool total_data_len_overflown =
+        sizeof(void *) == 8 ? (total_data_len >= 0x4000000000) : false;
+    if ((total_data_len == 0) || total_data_len_overflown) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Image data size is zero or too large: width = " << data_width
+           << ", height = " << data_height << ", channels = " << num_channels
+           << std::endl;
+        (*err) += ss.str();
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    bool alloc_success = false;
+    exr_image->images = tinyexr::AllocateImage(
+        num_channels, exr_header->channels, exr_header->requested_pixel_types,
+        int(data_width), int(data_height), &alloc_success);
+
+    if (!alloc_success) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Failed to allocate memory for Images. Maybe EXR header is corrupted or Image data size is too large: width = " << data_width
+           << ", height = " << data_height << ", channels = " << num_channels
+           << std::endl;
+        (*err) += ss.str();
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+    std::vector<std::thread> workers;
+    std::atomic<int> y_count(0);
+
+    int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+    if (num_threads > int(num_blocks)) {
+      num_threads = int(num_blocks);
+    }
+
+    for (int t = 0; t < num_threads; t++) {
+      workers.emplace_back(std::thread([&]() {
+        int y = 0;
+        while ((y = y_count++) < int(num_blocks)) {
+
+#else
+
+#if TINYEXR_USE_OPENMP
+#pragma omp parallel for
+#endif
+    for (int y = 0; y < static_cast<int>(num_blocks); y++) {
+
+#endif
+          size_t y_idx = static_cast<size_t>(y);
+
+          if (offsets[y_idx] + sizeof(int) * 2 > size) {
+            invalid_data = true;
+          } else {
+            // 4 byte: scan line
+            // 4 byte: data size
+            // ~     : pixel data(uncompressed or compressed)
+            size_t data_size =
+                size_t(size - (offsets[y_idx] + sizeof(int) * 2));
+            const unsigned char *data_ptr =
+                reinterpret_cast<const unsigned char *>(head + offsets[y_idx]);
+
+            int line_no;
+            memcpy(&line_no, data_ptr, sizeof(int));
+            int data_len;
+            memcpy(&data_len, data_ptr + 4, sizeof(int));
+            tinyexr::swap4(&line_no);
+            tinyexr::swap4(&data_len);
+
+            if (size_t(data_len) > data_size) {
+              invalid_data = true;
+
+            } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) {
+              // Too large value. Assume this is invalid
+              // 2**20 = 1048576 = heuristic value.
+              invalid_data = true;
+            } else if (data_len == 0) {
+              // TODO(syoyo): May be ok to raise the threshold for example
+              // `data_len < 4`
+              invalid_data = true;
+            } else {
+              // line_no may be negative.
+              int end_line_no = (std::min)(line_no + num_scanline_blocks,
+                                           (exr_header->data_window.max_y + 1));
+
+              int num_lines = end_line_no - line_no;
+
+              if (num_lines <= 0) {
+                invalid_data = true;
+              } else {
+                // Move to data addr: 8 = 4 + 4;
+                data_ptr += 8;
+
+                // Adjust line_no with data_window.bmin.y
+
+                // overflow check
+                tinyexr_int64 lno =
+                    static_cast<tinyexr_int64>(line_no) -
+                    static_cast<tinyexr_int64>(exr_header->data_window.min_y);
+                if (lno > std::numeric_limits<int>::max()) {
+                  line_no = -1;  // invalid
+                } else if (lno < -std::numeric_limits<int>::max()) {
+                  line_no = -1;  // invalid
+                } else {
+                  line_no -= exr_header->data_window.min_y;
+                }
+
+                if (line_no < 0) {
+                  invalid_data = true;
+                } else {
+                  if (!tinyexr::DecodePixelData(
+                          exr_image->images, exr_header->requested_pixel_types,
+                          data_ptr, static_cast<size_t>(data_len),
+                          exr_header->compression_type, exr_header->line_order,
+                          int(data_width), int(data_height), int(data_width), y, line_no,
+                          num_lines, static_cast<size_t>(pixel_data_size),
+                          static_cast<size_t>(
+                              exr_header->num_custom_attributes),
+                          exr_header->custom_attributes,
+                          static_cast<size_t>(exr_header->num_channels),
+                          exr_header->channels, channel_offset_list)) {
+                    invalid_data = true;
+                  }
+                }
+              }
+            }
+          }
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+        }
+      }));
+    }
+
+    for (auto &t : workers) {
+      t.join();
+    }
+#else
+    }  // omp parallel
+#endif
+  }
+
+  if (invalid_data) {
+    if (err) {
+      (*err) += "Invalid/Corrupted data found when decoding pixels.\n";
+    }
+
+    // free alloced image.
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (exr_image->images[c]) {
+        free(exr_image->images[c]);
+        exr_image->images[c] = NULL;
+      }
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  // Overwrite `pixel_type` with `requested_pixel_type`.
+  {
+    for (int c = 0; c < exr_header->num_channels; c++) {
+      exr_header->pixel_types[c] = exr_header->requested_pixel_types[c];
+    }
+  }
+
+  {
+    exr_image->num_channels = num_channels;
+
+    exr_image->width = int(data_width);
+    exr_image->height = int(data_height);
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+static bool ReconstructLineOffsets(
+    std::vector<tinyexr::tinyexr_uint64> *offsets, size_t n,
+    const unsigned char *head, const unsigned char *marker, const size_t size) {
+  if (head >= marker) {
+    return false;
+  }
+  if (offsets->size() != n) {
+    return false;
+  }
+
+  for (size_t i = 0; i < n; i++) {
+    size_t offset = static_cast<size_t>(marker - head);
+    // Offset should not exceed whole EXR file/data size.
+    if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) {
+      return false;
+    }
+
+    int y;
+    unsigned int data_len;
+
+    memcpy(&y, marker, sizeof(int));
+    memcpy(&data_len, marker + 4, sizeof(unsigned int));
+
+    if (data_len >= size) {
+      return false;
+    }
+
+    tinyexr::swap4(&y);
+    tinyexr::swap4(&data_len);
+
+    (*offsets)[i] = offset;
+
+    marker += data_len + 8;  // 8 = 4 bytes(y) + 4 bytes(data_len)
+  }
+
+  return true;
+}
+
+
+static int FloorLog2(unsigned x) {
+  //
+  // For x > 0, floorLog2(y) returns floor(log(x)/log(2)).
+  //
+  int y = 0;
+  while (x > 1) {
+    y += 1;
+    x >>= 1u;
+  }
+  return y;
+}
+
+
+static int CeilLog2(unsigned x) {
+  //
+  // For x > 0, ceilLog2(y) returns ceil(log(x)/log(2)).
+  //
+  int y = 0;
+  int r = 0;
+  while (x > 1) {
+    if (x & 1)
+      r = 1;
+
+    y += 1;
+    x >>= 1u;
+  }
+  return y + r;
+}
+
+static int RoundLog2(int x, int tile_rounding_mode) {
+  return (tile_rounding_mode == TINYEXR_TILE_ROUND_DOWN) ? FloorLog2(static_cast<unsigned>(x)) : CeilLog2(static_cast<unsigned>(x));
+}
+
+static int CalculateNumXLevels(const EXRHeader* exr_header) {
+  int min_x = exr_header->data_window.min_x;
+  int max_x = exr_header->data_window.max_x;
+  int min_y = exr_header->data_window.min_y;
+  int max_y = exr_header->data_window.max_y;
+
+  int num = 0;
+  switch (exr_header->tile_level_mode) {
+  case TINYEXR_TILE_ONE_LEVEL:
+
+    num = 1;
+    break;
+
+  case TINYEXR_TILE_MIPMAP_LEVELS:
+
+  {
+    int w = max_x - min_x + 1;
+    int h = max_y - min_y + 1;
+    num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1;
+  }
+  break;
+
+  case TINYEXR_TILE_RIPMAP_LEVELS:
+
+  {
+    int w = max_x - min_x + 1;
+    num = RoundLog2(w, exr_header->tile_rounding_mode) + 1;
+  }
+  break;
+
+  default:
+
+    return -1;
+  }
+
+  return num;
+}
+
+static int CalculateNumYLevels(const EXRHeader* exr_header) {
+  int min_x = exr_header->data_window.min_x;
+  int max_x = exr_header->data_window.max_x;
+  int min_y = exr_header->data_window.min_y;
+  int max_y = exr_header->data_window.max_y;
+  int num = 0;
+
+  switch (exr_header->tile_level_mode) {
+  case TINYEXR_TILE_ONE_LEVEL:
+
+    num = 1;
+    break;
+
+  case TINYEXR_TILE_MIPMAP_LEVELS:
+
+  {
+    int w = max_x - min_x + 1;
+    int h = max_y - min_y + 1;
+    num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1;
+  }
+  break;
+
+  case TINYEXR_TILE_RIPMAP_LEVELS:
+
+  {
+    int h = max_y - min_y + 1;
+    num = RoundLog2(h, exr_header->tile_rounding_mode) + 1;
+  }
+  break;
+
+  default:
+
+    return -1;
+  }
+
+  return num;
+}
+
+static bool CalculateNumTiles(std::vector<int>& numTiles,
+  int toplevel_size,
+  int size,
+  int tile_rounding_mode) {
+  for (unsigned i = 0; i < numTiles.size(); i++) {
+    int l = LevelSize(toplevel_size, int(i), tile_rounding_mode);
+    if (l < 0) {
+      return false;
+    }
+    TINYEXR_CHECK_AND_RETURN_C(l <= std::numeric_limits<int>::max() - size + 1, false);
+
+    numTiles[i] = (l + size - 1) / size;
+  }
+  return true;
+}
+
+static bool PrecalculateTileInfo(std::vector<int>& num_x_tiles,
+  std::vector<int>& num_y_tiles,
+  const EXRHeader* exr_header) {
+  int min_x = exr_header->data_window.min_x;
+  int max_x = exr_header->data_window.max_x;
+  int min_y = exr_header->data_window.min_y;
+  int max_y = exr_header->data_window.max_y;
+
+  int num_x_levels = CalculateNumXLevels(exr_header);
+
+  if (num_x_levels < 0) {
+    return false;
+  }
+
+  int num_y_levels = CalculateNumYLevels(exr_header);
+
+  if (num_y_levels < 0) {
+    return false;
+  }
+
+  num_x_tiles.resize(size_t(num_x_levels));
+  num_y_tiles.resize(size_t(num_y_levels));
+
+  if (!CalculateNumTiles(num_x_tiles,
+    max_x - min_x + 1,
+    exr_header->tile_size_x,
+    exr_header->tile_rounding_mode)) {
+    return false;
+  }
+
+  if (!CalculateNumTiles(num_y_tiles,
+    max_y - min_y + 1,
+    exr_header->tile_size_y,
+    exr_header->tile_rounding_mode)) {
+    return false;
+  }
+
+  return true;
+}
+
+static void InitSingleResolutionOffsets(OffsetData& offset_data, size_t num_blocks) {
+  offset_data.offsets.resize(1);
+  offset_data.offsets[0].resize(1);
+  offset_data.offsets[0][0].resize(num_blocks);
+  offset_data.num_x_levels = 1;
+  offset_data.num_y_levels = 1;
+}
+
+// Return sum of tile blocks.
+// 0 = error
+static int InitTileOffsets(OffsetData& offset_data,
+  const EXRHeader* exr_header,
+  const std::vector<int>& num_x_tiles,
+  const std::vector<int>& num_y_tiles) {
+  int num_tile_blocks = 0;
+  offset_data.num_x_levels = static_cast<int>(num_x_tiles.size());
+  offset_data.num_y_levels = static_cast<int>(num_y_tiles.size());
+  switch (exr_header->tile_level_mode) {
+  case TINYEXR_TILE_ONE_LEVEL:
+  case TINYEXR_TILE_MIPMAP_LEVELS:
+    TINYEXR_CHECK_AND_RETURN_C(offset_data.num_x_levels == offset_data.num_y_levels, 0);
+    offset_data.offsets.resize(size_t(offset_data.num_x_levels));
+
+    for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) {
+      offset_data.offsets[l].resize(size_t(num_y_tiles[l]));
+
+      for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) {
+        offset_data.offsets[l][dy].resize(size_t(num_x_tiles[l]));
+        num_tile_blocks += num_x_tiles[l];
+      }
+    }
+    break;
+
+  case TINYEXR_TILE_RIPMAP_LEVELS:
+
+    offset_data.offsets.resize(static_cast<size_t>(offset_data.num_x_levels) * static_cast<size_t>(offset_data.num_y_levels));
+
+    for (int ly = 0; ly < offset_data.num_y_levels; ++ly) {
+      for (int lx = 0; lx < offset_data.num_x_levels; ++lx) {
+        int l = ly * offset_data.num_x_levels + lx;
+        offset_data.offsets[size_t(l)].resize(size_t(num_y_tiles[size_t(ly)]));
+
+        for (size_t dy = 0; dy < offset_data.offsets[size_t(l)].size(); ++dy) {
+          offset_data.offsets[size_t(l)][dy].resize(size_t(num_x_tiles[size_t(lx)]));
+          num_tile_blocks += num_x_tiles[size_t(lx)];
+        }
+      }
+    }
+    break;
+
+  default:
+    return 0;
+  }
+  return num_tile_blocks;
+}
+
+static bool IsAnyOffsetsAreInvalid(const OffsetData& offset_data) {
+  for (unsigned int l = 0; l < offset_data.offsets.size(); ++l)
+    for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy)
+      for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx)
+        if (reinterpret_cast<const tinyexr::tinyexr_int64&>(offset_data.offsets[l][dy][dx]) <= 0)
+          return true;
+
+  return false;
+}
+
+static bool isValidTile(const EXRHeader* exr_header,
+                        const OffsetData& offset_data,
+                        int dx, int dy, int lx, int ly) {
+  if (lx < 0 || ly < 0 || dx < 0 || dy < 0) return false;
+  int num_x_levels = offset_data.num_x_levels;
+  int num_y_levels = offset_data.num_y_levels;
+  switch (exr_header->tile_level_mode) {
+  case TINYEXR_TILE_ONE_LEVEL:
+
+    if (lx == 0 &&
+        ly == 0 &&
+        offset_data.offsets.size() > 0 &&
+        offset_data.offsets[0].size() > static_cast<size_t>(dy) &&
+        offset_data.offsets[0][size_t(dy)].size() > static_cast<size_t>(dx)) {
+      return true;
+    }
+
+    break;
+
+  case TINYEXR_TILE_MIPMAP_LEVELS:
+
+    if (lx < num_x_levels &&
+        ly < num_y_levels &&
+        offset_data.offsets.size() > static_cast<size_t>(lx) &&
+        offset_data.offsets[size_t(lx)].size() > static_cast<size_t>(dy) &&
+        offset_data.offsets[size_t(lx)][size_t(dy)].size() > static_cast<size_t>(dx)) {
+      return true;
+    }
+
+    break;
+
+  case TINYEXR_TILE_RIPMAP_LEVELS:
+  {
+    size_t idx = static_cast<size_t>(lx) + static_cast<size_t>(ly)* static_cast<size_t>(num_x_levels);
+    if (lx < num_x_levels &&
+       ly < num_y_levels &&
+       (offset_data.offsets.size() > idx) &&
+       offset_data.offsets[idx].size() > static_cast<size_t>(dy) &&
+       offset_data.offsets[idx][size_t(dy)].size() > static_cast<size_t>(dx)) {
+      return true;
+    }
+  }
+
+    break;
+
+  default:
+
+    return false;
+  }
+
+  return false;
+}
+
+static bool ReconstructTileOffsets(OffsetData& offset_data,
+                                   const EXRHeader* exr_header,
+                                   const unsigned char* head, const unsigned char* marker, const size_t size,
+                                   bool isMultiPartFile,
+                                   bool isDeep) {
+  int numXLevels = offset_data.num_x_levels;
+  for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) {
+    for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) {
+      for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) {
+        tinyexr::tinyexr_uint64 tileOffset = tinyexr::tinyexr_uint64(marker - head);
+
+
+        if (isMultiPartFile) {
+          if ((marker + sizeof(int)) >= (head + size)) {
+            return false;
+          }
+
+          //int partNumber;
+          marker += sizeof(int);
+        }
+
+        if ((marker + 4 * sizeof(int)) >= (head + size)) {
+          return false;
+        }
+
+        int tileX;
+        memcpy(&tileX, marker, sizeof(int));
+        tinyexr::swap4(&tileX);
+        marker += sizeof(int);
+
+        int tileY;
+        memcpy(&tileY, marker, sizeof(int));
+        tinyexr::swap4(&tileY);
+        marker += sizeof(int);
+
+        int levelX;
+        memcpy(&levelX, marker, sizeof(int));
+        tinyexr::swap4(&levelX);
+        marker += sizeof(int);
+
+        int levelY;
+        memcpy(&levelY, marker, sizeof(int));
+        tinyexr::swap4(&levelY);
+        marker += sizeof(int);
+
+        if (isDeep) {
+          if ((marker + 2 * sizeof(tinyexr::tinyexr_int64)) >= (head + size)) {
+            return false;
+          }
+          tinyexr::tinyexr_int64 packed_offset_table_size;
+          memcpy(&packed_offset_table_size, marker, sizeof(tinyexr::tinyexr_int64));
+          tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64*>(&packed_offset_table_size));
+          marker += sizeof(tinyexr::tinyexr_int64);
+
+          tinyexr::tinyexr_int64 packed_sample_size;
+          memcpy(&packed_sample_size, marker, sizeof(tinyexr::tinyexr_int64));
+          tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64*>(&packed_sample_size));
+          marker += sizeof(tinyexr::tinyexr_int64);
+
+          // next Int64 is unpacked sample size - skip that too
+          marker += packed_offset_table_size + packed_sample_size + 8;
+
+          if (marker >= (head + size)) {
+            return false;
+          }
+
+        } else {
+
+          if ((marker + sizeof(uint32_t)) >= (head + size)) {
+            return false;
+          }
+
+          uint32_t dataSize;
+          memcpy(&dataSize, marker, sizeof(uint32_t));
+          tinyexr::swap4(&dataSize);
+          marker += sizeof(uint32_t);
+
+          marker += dataSize;
+
+          if (marker >= (head + size)) {
+            return false;
+          }
+        }
+
+        if (!isValidTile(exr_header, offset_data,
+          tileX, tileY, levelX, levelY)) {
+          return false;
+        }
+
+        int level_idx = LevelIndex(levelX, levelY, exr_header->tile_level_mode, numXLevels);
+        if (level_idx < 0) {
+          return false;
+        }
+
+        if (size_t(level_idx) >= offset_data.offsets.size()) {
+          return false;
+        }
+
+        if (size_t(tileY) >= offset_data.offsets[size_t(level_idx)].size()) {
+          return false;
+        }
+
+        if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) {
+          return false;
+        }
+        
+        offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset;
+      }
+    }
+  }
+  return true;
+}
+
+// marker output is also
+static int ReadOffsets(OffsetData& offset_data,
+                       const unsigned char* head,
+                       const unsigned char*& marker,
+                       const size_t size,
+                       const char** err) {
+  for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) {
+    for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) {
+      for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) {
+        tinyexr::tinyexr_uint64 offset;
+        if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) {
+          tinyexr::SetErrorMessage("Insufficient data size in offset table.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+
+        memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64));
+        tinyexr::swap8(&offset);
+        if (offset >= size) {
+          tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+        marker += sizeof(tinyexr::tinyexr_uint64);  // = 8
+        offset_data.offsets[l][dy][dx] = offset;
+      }
+    }
+  }
+  return TINYEXR_SUCCESS;
+}
+
+static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header,
+                          const unsigned char *head,
+                          const unsigned char *marker, const size_t size,
+                          const char **err) {
+  if (exr_image == NULL || exr_header == NULL || head == NULL ||
+      marker == NULL || (size <= tinyexr::kEXRVersionSize)) {
+    tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  int num_scanline_blocks = 1;
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+    num_scanline_blocks = 32;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+    num_scanline_blocks = 16;
+  }
+
+  if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
+      exr_header->data_window.max_x - exr_header->data_window.min_x ==
+          std::numeric_limits<int>::max()) {
+    // Issue 63
+    tinyexr::SetErrorMessage("Invalid data width value", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  tinyexr_int64 data_width =
+      static_cast<tinyexr_int64>(exr_header->data_window.max_x) - static_cast<tinyexr_int64>(exr_header->data_window.min_x) + static_cast<tinyexr_int64>(1);
+  if (data_width <= 0) {
+    tinyexr::SetErrorMessage("Invalid data window width value", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  if (exr_header->data_window.max_y < exr_header->data_window.min_y ||
+      exr_header->data_window.max_y - exr_header->data_window.min_y ==
+          std::numeric_limits<int>::max()) {
+    tinyexr::SetErrorMessage("Invalid data height value", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  tinyexr_int64 data_height =
+      static_cast<tinyexr_int64>(exr_header->data_window.max_y) - static_cast<tinyexr_int64>(exr_header->data_window.min_y) + static_cast<tinyexr_int64>(1);
+
+  if (data_height <= 0) {
+    tinyexr::SetErrorMessage("Invalid data window height value", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  // Do not allow too large data_width and data_height. header invalid?
+  {
+    if (data_width > TINYEXR_DIMENSION_THRESHOLD) {
+      tinyexr::SetErrorMessage("data width too large.", err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+    if (data_height > TINYEXR_DIMENSION_THRESHOLD) {
+      tinyexr::SetErrorMessage("data height too large.", err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+  }
+
+  if (exr_header->tiled) {
+    if (exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) {
+      tinyexr::SetErrorMessage("tile width too large.", err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+    if (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD) {
+      tinyexr::SetErrorMessage("tile height too large.", err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+  }
+
+  // Read offset tables.
+  OffsetData offset_data;
+  size_t num_blocks = 0;
+  // For a multi-resolution image, the size of the offset table will be calculated from the other attributes of the header.
+  // If chunk_count > 0 then chunk_count must be equal to the calculated tile count.
+  if (exr_header->tiled) {
+    {
+      std::vector<int> num_x_tiles, num_y_tiles;
+      if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_header)) {
+        tinyexr::SetErrorMessage("Failed to precalculate tile info.", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+      num_blocks = size_t(InitTileOffsets(offset_data, exr_header, num_x_tiles, num_y_tiles));
+      if (exr_header->chunk_count > 0) {
+        if (exr_header->chunk_count != static_cast<int>(num_blocks)) {
+          tinyexr::SetErrorMessage("Invalid offset table size.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+      }
+    }
+
+    int ret = ReadOffsets(offset_data, head, marker, size, err);
+    if (ret != TINYEXR_SUCCESS) return ret;
+    if (IsAnyOffsetsAreInvalid(offset_data)) {
+      if (!ReconstructTileOffsets(offset_data, exr_header,
+        head, marker, size,
+        exr_header->multipart, exr_header->non_image)) {
+
+          tinyexr::SetErrorMessage("Invalid Tile Offsets data.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+      }
+    }
+  } else if (exr_header->chunk_count > 0) {
+    // Use `chunkCount` attribute.
+    num_blocks = static_cast<size_t>(exr_header->chunk_count);
+    InitSingleResolutionOffsets(offset_data, num_blocks);
+  } else {
+    num_blocks = static_cast<size_t>(data_height) /
+      static_cast<size_t>(num_scanline_blocks);
+    if (num_blocks * static_cast<size_t>(num_scanline_blocks) <
+      static_cast<size_t>(data_height)) {
+      num_blocks++;
+    }
+
+    InitSingleResolutionOffsets(offset_data, num_blocks);
+  }
+
+  if (!exr_header->tiled) {
+    std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0];
+    for (size_t y = 0; y < num_blocks; y++) {
+      tinyexr::tinyexr_uint64 offset;
+      // Issue #81
+      if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) {
+        tinyexr::SetErrorMessage("Insufficient data size in offset table.", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64));
+      tinyexr::swap8(&offset);
+      if (offset >= size) {
+        tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+      marker += sizeof(tinyexr::tinyexr_uint64);  // = 8
+      offsets[y] = offset;
+    }
+
+    // If line offsets are invalid, we try to reconstruct it.
+    // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details.
+    for (size_t y = 0; y < num_blocks; y++) {
+      if (offsets[y] <= 0) {
+        // TODO(syoyo) Report as warning?
+        // if (err) {
+        //  stringstream ss;
+        //  ss << "Incomplete lineOffsets." << std::endl;
+        //  (*err) += ss.str();
+        //}
+        bool ret =
+          ReconstructLineOffsets(&offsets, num_blocks, head, marker, size);
+        if (ret) {
+          // OK
+          break;
+        } else {
+          tinyexr::SetErrorMessage(
+            "Cannot reconstruct lineOffset table in DecodeEXRImage.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+      }
+    }
+  }
+
+  {
+    std::string e;
+    int ret = DecodeChunk(exr_image, exr_header, offset_data, head, size, &e);
+
+    if (ret != TINYEXR_SUCCESS) {
+      if (!e.empty()) {
+        tinyexr::SetErrorMessage(e, err);
+      }
+
+#if 1
+      FreeEXRImage(exr_image);
+#else
+      // release memory(if exists)
+      if ((exr_header->num_channels > 0) && exr_image && exr_image->images) {
+        for (size_t c = 0; c < size_t(exr_header->num_channels); c++) {
+          if (exr_image->images[c]) {
+            free(exr_image->images[c]);
+            exr_image->images[c] = NULL;
+          }
+        }
+        free(exr_image->images);
+        exr_image->images = NULL;
+      }
+#endif
+    }
+
+    return ret;
+  }
+}
+
+static void GetLayers(const EXRHeader &exr_header,
+                      std::vector<std::string> &layer_names) {
+  // Naive implementation
+  // Group channels by layers
+  // go over all channel names, split by periods
+  // collect unique names
+  layer_names.clear();
+  for (int c = 0; c < exr_header.num_channels; c++) {
+    std::string full_name(exr_header.channels[c].name);
+    const size_t pos = full_name.find_last_of('.');
+    if (pos != std::string::npos && pos != 0 && pos + 1 < full_name.size()) {
+      full_name.erase(pos);
+      if (std::find(layer_names.begin(), layer_names.end(), full_name) ==
+          layer_names.end())
+        layer_names.push_back(full_name);
+    }
+  }
+}
+
+struct LayerChannel {
+  explicit LayerChannel(size_t i, std::string n) : index(i), name(n) {}
+  size_t index;
+  std::string name;
+};
+
+static void ChannelsInLayer(const EXRHeader &exr_header,
+                            const std::string &layer_name,
+                            std::vector<LayerChannel> &channels) {
+  channels.clear();
+  //std::cout << "layer_name = " << layer_name << "\n";
+  for (int c = 0; c < exr_header.num_channels; c++) {
+    //std::cout << "chan[" << c << "] = " << exr_header.channels[c].name << "\n";
+    std::string ch_name(exr_header.channels[c].name);
+    if (layer_name.empty()) {
+      const size_t pos = ch_name.find_last_of('.');
+      if (pos != std::string::npos && pos < ch_name.size()) {
+        if (pos != 0) continue;
+        ch_name = ch_name.substr(pos + 1);
+      }
+    } else {
+      const size_t pos = ch_name.find(layer_name + '.');
+      if (pos == std::string::npos) continue;
+      if (pos == 0) {
+        ch_name = ch_name.substr(layer_name.size() + 1);
+      }
+    }
+    LayerChannel ch(size_t(c), ch_name);
+    channels.push_back(ch);
+  }
+}
+
+}  // namespace tinyexr
+
+int EXRLayers(const char *filename, const char **layer_names[], int *num_layers,
+              const char **err) {
+  EXRVersion exr_version;
+  EXRHeader exr_header;
+  InitEXRHeader(&exr_header);
+
+  {
+    int ret = ParseEXRVersionFromFile(&exr_version, filename);
+    if (ret != TINYEXR_SUCCESS) {
+      tinyexr::SetErrorMessage("Invalid EXR header.", err);
+      return ret;
+    }
+
+    if (exr_version.multipart || exr_version.non_image) {
+      tinyexr::SetErrorMessage(
+          "Loading multipart or DeepImage is not supported  in LoadEXR() API",
+          err);
+      return TINYEXR_ERROR_INVALID_DATA;  // @fixme.
+    }
+  }
+
+  int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err);
+  if (ret != TINYEXR_SUCCESS) {
+    FreeEXRHeader(&exr_header);
+    return ret;
+  }
+
+  std::vector<std::string> layer_vec;
+  tinyexr::GetLayers(exr_header, layer_vec);
+
+  (*num_layers) = int(layer_vec.size());
+  (*layer_names) = static_cast<const char **>(
+      malloc(sizeof(const char *) * static_cast<size_t>(layer_vec.size())));
+  for (size_t c = 0; c < static_cast<size_t>(layer_vec.size()); c++) {
+#ifdef _MSC_VER
+    (*layer_names)[c] = _strdup(layer_vec[c].c_str());
+#else
+    (*layer_names)[c] = strdup(layer_vec[c].c_str());
+#endif
+  }
+
+  FreeEXRHeader(&exr_header);
+  return TINYEXR_SUCCESS;
+}
+
+int LoadEXR(float **out_rgba, int *width, int *height, const char *filename,
+            const char **err) {
+  return LoadEXRWithLayer(out_rgba, width, height, filename,
+                          /* layername */ NULL, err);
+}
+
+int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
+                     const char *filename, const char *layername,
+                     const char **err) {
+  if (out_rgba == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  EXRVersion exr_version;
+  EXRImage exr_image;
+  EXRHeader exr_header;
+  InitEXRHeader(&exr_header);
+  InitEXRImage(&exr_image);
+
+  {
+    int ret = ParseEXRVersionFromFile(&exr_version, filename);
+    if (ret != TINYEXR_SUCCESS) {
+      std::stringstream ss;
+      ss << "Failed to open EXR file or read version info from EXR file. code("
+         << ret << ")";
+      tinyexr::SetErrorMessage(ss.str(), err);
+      return ret;
+    }
+
+    if (exr_version.multipart || exr_version.non_image) {
+      tinyexr::SetErrorMessage(
+          "Loading multipart or DeepImage is not supported  in LoadEXR() API",
+          err);
+      return TINYEXR_ERROR_INVALID_DATA;  // @fixme.
+    }
+  }
+
+  {
+    int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err);
+    if (ret != TINYEXR_SUCCESS) {
+      FreeEXRHeader(&exr_header);
+      return ret;
+    }
+  }
+
+  // Read HALF channel as FLOAT.
+  for (int i = 0; i < exr_header.num_channels; i++) {
+    if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) {
+      exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
+    }
+  }
+
+  // TODO: Probably limit loading to layers (channels) selected by layer index
+  {
+    int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err);
+    if (ret != TINYEXR_SUCCESS) {
+      FreeEXRHeader(&exr_header);
+      return ret;
+    }
+  }
+
+  // RGBA
+  int idxR = -1;
+  int idxG = -1;
+  int idxB = -1;
+  int idxA = -1;
+
+  std::vector<std::string> layer_names;
+  tinyexr::GetLayers(exr_header, layer_names);
+
+  std::vector<tinyexr::LayerChannel> channels;
+  tinyexr::ChannelsInLayer(
+      exr_header, layername == NULL ? "" : std::string(layername), channels);
+
+
+  if (channels.size() < 1) {
+    if (layername == NULL) {
+      tinyexr::SetErrorMessage("Layer Not Found. Seems EXR contains channels with layer(e.g. `diffuse.R`). if you are using LoadEXR(), please try LoadEXRWithLayer(). LoadEXR() cannot load EXR having channels with layer.", err);
+
+    } else {
+      tinyexr::SetErrorMessage("Layer Not Found", err);
+    }
+    FreeEXRHeader(&exr_header);
+    FreeEXRImage(&exr_image);
+    return TINYEXR_ERROR_LAYER_NOT_FOUND;
+  }
+
+  size_t ch_count = channels.size() < 4 ? channels.size() : 4;
+  for (size_t c = 0; c < ch_count; c++) {
+    const tinyexr::LayerChannel &ch = channels[c];
+
+    if (ch.name == "R") {
+      idxR = int(ch.index);
+    } else if (ch.name == "G") {
+      idxG = int(ch.index);
+    } else if (ch.name == "B") {
+      idxB = int(ch.index);
+    } else if (ch.name == "A") {
+      idxA = int(ch.index);
+    }
+  }
+
+  if (channels.size() == 1) {
+    int chIdx = int(channels.front().index);
+    // Grayscale channel only.
+
+    (*out_rgba) = reinterpret_cast<float *>(
+        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
+               static_cast<size_t>(exr_image.height)));
+
+    if (exr_header.tiled) {
+      const size_t tile_size_x = static_cast<size_t>(exr_header.tile_size_x);
+      const size_t tile_size_y = static_cast<size_t>(exr_header.tile_size_y);
+      for (int it = 0; it < exr_image.num_tiles; it++) {
+        for (size_t j = 0; j < tile_size_y; j++) {
+          for (size_t i = 0; i < tile_size_x; i++) {
+            const size_t ii =
+              static_cast<size_t>(exr_image.tiles[it].offset_x) * tile_size_x +
+              i;
+            const size_t jj =
+              static_cast<size_t>(exr_image.tiles[it].offset_y) * tile_size_y +
+              j;
+            const size_t idx = ii + jj * static_cast<size_t>(exr_image.width);
+
+            // out of region check.
+            if (ii >= static_cast<size_t>(exr_image.width)) {
+              continue;
+            }
+            if (jj >= static_cast<size_t>(exr_image.height)) {
+              continue;
+            }
+            const size_t srcIdx = i + j * tile_size_x;
+            unsigned char **src = exr_image.tiles[it].images;
+            (*out_rgba)[4 * idx + 0] =
+                reinterpret_cast<float **>(src)[chIdx][srcIdx];
+            (*out_rgba)[4 * idx + 1] =
+                reinterpret_cast<float **>(src)[chIdx][srcIdx];
+            (*out_rgba)[4 * idx + 2] =
+                reinterpret_cast<float **>(src)[chIdx][srcIdx];
+            (*out_rgba)[4 * idx + 3] =
+                reinterpret_cast<float **>(src)[chIdx][srcIdx];
+          }
+        }
+      }
+    } else {
+      const size_t pixel_size = static_cast<size_t>(exr_image.width) *
+        static_cast<size_t>(exr_image.height);
+      for (size_t i = 0; i < pixel_size; i++) {
+        const float val =
+            reinterpret_cast<float **>(exr_image.images)[chIdx][i];
+        (*out_rgba)[4 * i + 0] = val;
+        (*out_rgba)[4 * i + 1] = val;
+        (*out_rgba)[4 * i + 2] = val;
+        (*out_rgba)[4 * i + 3] = val;
+      }
+    }
+  } else {
+    // Assume RGB(A)
+
+    if (idxR == -1) {
+      tinyexr::SetErrorMessage("R channel not found", err);
+
+      FreeEXRHeader(&exr_header);
+      FreeEXRImage(&exr_image);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    if (idxG == -1) {
+      tinyexr::SetErrorMessage("G channel not found", err);
+      FreeEXRHeader(&exr_header);
+      FreeEXRImage(&exr_image);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    if (idxB == -1) {
+      tinyexr::SetErrorMessage("B channel not found", err);
+      FreeEXRHeader(&exr_header);
+      FreeEXRImage(&exr_image);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    (*out_rgba) = reinterpret_cast<float *>(
+        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
+               static_cast<size_t>(exr_image.height)));
+    if (exr_header.tiled) {
+      const size_t tile_size_x = static_cast<size_t>(exr_header.tile_size_x);
+      const size_t tile_size_y = static_cast<size_t>(exr_header.tile_size_y);
+      for (int it = 0; it < exr_image.num_tiles; it++) {
+        for (size_t j = 0; j < tile_size_y; j++) {
+          for (size_t i = 0; i < tile_size_x; i++) {
+            const size_t ii =
+                static_cast<size_t>(exr_image.tiles[it].offset_x) *
+                    tile_size_x +
+                i;
+            const size_t jj =
+                static_cast<size_t>(exr_image.tiles[it].offset_y) *
+                    tile_size_y +
+                j;
+            const size_t idx = ii + jj * static_cast<size_t>(exr_image.width);
+
+            // out of region check.
+            if (ii >= static_cast<size_t>(exr_image.width)) {
+              continue;
+            }
+            if (jj >= static_cast<size_t>(exr_image.height)) {
+              continue;
+            }
+            const size_t srcIdx = i + j * tile_size_x;
+            unsigned char **src = exr_image.tiles[it].images;
+            (*out_rgba)[4 * idx + 0] =
+                reinterpret_cast<float **>(src)[idxR][srcIdx];
+            (*out_rgba)[4 * idx + 1] =
+                reinterpret_cast<float **>(src)[idxG][srcIdx];
+            (*out_rgba)[4 * idx + 2] =
+                reinterpret_cast<float **>(src)[idxB][srcIdx];
+            if (idxA != -1) {
+              (*out_rgba)[4 * idx + 3] =
+                  reinterpret_cast<float **>(src)[idxA][srcIdx];
+            } else {
+              (*out_rgba)[4 * idx + 3] = 1.0;
+            }
+          }
+        }
+      }
+    } else {
+      const size_t pixel_size = static_cast<size_t>(exr_image.width) *
+        static_cast<size_t>(exr_image.height);
+      for (size_t i = 0; i < pixel_size; i++) {
+        (*out_rgba)[4 * i + 0] =
+            reinterpret_cast<float **>(exr_image.images)[idxR][i];
+        (*out_rgba)[4 * i + 1] =
+            reinterpret_cast<float **>(exr_image.images)[idxG][i];
+        (*out_rgba)[4 * i + 2] =
+            reinterpret_cast<float **>(exr_image.images)[idxB][i];
+        if (idxA != -1) {
+          (*out_rgba)[4 * i + 3] =
+              reinterpret_cast<float **>(exr_image.images)[idxA][i];
+        } else {
+          (*out_rgba)[4 * i + 3] = 1.0;
+        }
+      }
+    }
+  }
+
+  (*width) = exr_image.width;
+  (*height) = exr_image.height;
+
+  FreeEXRHeader(&exr_header);
+  FreeEXRImage(&exr_image);
+
+  return TINYEXR_SUCCESS;
+}
+
+int IsEXR(const char *filename) {
+  EXRVersion exr_version;
+
+  int ret = ParseEXRVersionFromFile(&exr_version, filename);
+  if (ret != TINYEXR_SUCCESS) {
+    return ret;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int IsEXRFromMemory(const unsigned char *memory, size_t size) {
+  EXRVersion exr_version;
+
+  int ret = ParseEXRVersionFromMemory(&exr_version, memory, size);
+  if (ret != TINYEXR_SUCCESS) {
+    return ret;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version,
+                             const unsigned char *memory, size_t size,
+                             const char **err) {
+  if (memory == NULL || exr_header == NULL) {
+    tinyexr::SetErrorMessage(
+        "Invalid argument. `memory` or `exr_header` argument is null in "
+        "ParseEXRHeaderFromMemory()",
+        err);
+
+    // Invalid argument
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (size < tinyexr::kEXRVersionSize) {
+    tinyexr::SetErrorMessage("Insufficient header/data size.\n", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
+  size_t marker_size = size - tinyexr::kEXRVersionSize;
+
+  tinyexr::HeaderInfo info;
+  info.clear();
+
+  int ret;
+  {
+    std::string err_str;
+    ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size);
+
+    if (ret != TINYEXR_SUCCESS) {
+      if (err && !err_str.empty()) {
+        tinyexr::SetErrorMessage(err_str, err);
+      }
+    }
+  }
+
+  {
+    std::string warn;
+    std::string err_str;
+
+    if (!ConvertHeader(exr_header, info, &warn, &err_str)) {
+      // release mem
+      for (size_t i = 0; i < info.attributes.size(); i++) {
+        if (info.attributes[i].value) {
+          free(info.attributes[i].value);
+        }
+      }
+      if (err && !err_str.empty()) {
+        tinyexr::SetErrorMessage(err_str, err);
+      }
+      ret = TINYEXR_ERROR_INVALID_HEADER;
+    }
+  }
+
+  exr_header->multipart = version->multipart ? 1 : 0;
+  exr_header->non_image = version->non_image ? 1 : 0;
+
+  return ret;
+}
+
+int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
+                      const unsigned char *memory, size_t size,
+                      const char **err) {
+  if (out_rgba == NULL || memory == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  EXRVersion exr_version;
+  EXRImage exr_image;
+  EXRHeader exr_header;
+
+  InitEXRHeader(&exr_header);
+
+  int ret = ParseEXRVersionFromMemory(&exr_version, memory, size);
+  if (ret != TINYEXR_SUCCESS) {
+    std::stringstream ss;
+    ss << "Failed to parse EXR version. code(" << ret << ")";
+    tinyexr::SetErrorMessage(ss.str(), err);
+    return ret;
+  }
+
+  ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err);
+  if (ret != TINYEXR_SUCCESS) {
+    return ret;
+  }
+
+  // Read HALF channel as FLOAT.
+  for (int i = 0; i < exr_header.num_channels; i++) {
+    if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) {
+      exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
+    }
+  }
+
+  InitEXRImage(&exr_image);
+  ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err);
+  if (ret != TINYEXR_SUCCESS) {
+    return ret;
+  }
+
+  // RGBA
+  int idxR = -1;
+  int idxG = -1;
+  int idxB = -1;
+  int idxA = -1;
+  for (int c = 0; c < exr_header.num_channels; c++) {
+    if (strcmp(exr_header.channels[c].name, "R") == 0) {
+      idxR = c;
+    } else if (strcmp(exr_header.channels[c].name, "G") == 0) {
+      idxG = c;
+    } else if (strcmp(exr_header.channels[c].name, "B") == 0) {
+      idxB = c;
+    } else if (strcmp(exr_header.channels[c].name, "A") == 0) {
+      idxA = c;
+    }
+  }
+
+  // TODO(syoyo): Refactor removing same code as used in LoadEXR().
+  if (exr_header.num_channels == 1) {
+    // Grayscale channel only.
+
+    (*out_rgba) = reinterpret_cast<float *>(
+        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
+               static_cast<size_t>(exr_image.height)));
+
+    if (exr_header.tiled) {
+      const size_t tile_size_x = static_cast<size_t>(exr_header.tile_size_x);
+      const size_t tile_size_y = static_cast<size_t>(exr_header.tile_size_y);
+      for (int it = 0; it < exr_image.num_tiles; it++) {
+        for (size_t j = 0; j < tile_size_y; j++) {
+          for (size_t i = 0; i < tile_size_x; i++) {
+            const size_t ii =
+                static_cast<size_t>(exr_image.tiles[it].offset_x) *
+                    tile_size_x +
+                i;
+            const size_t jj =
+                static_cast<size_t>(exr_image.tiles[it].offset_y) *
+                    tile_size_y +
+                j;
+            const size_t idx = ii + jj * static_cast<size_t>(exr_image.width);
+
+            // out of region check.
+            if (ii >= static_cast<size_t>(exr_image.width)) {
+              continue;
+            }
+            if (jj >= static_cast<size_t>(exr_image.height)) {
+              continue;
+            }
+            const size_t srcIdx = i + j * tile_size_x;
+            unsigned char **src = exr_image.tiles[it].images;
+            (*out_rgba)[4 * idx + 0] =
+                reinterpret_cast<float **>(src)[0][srcIdx];
+            (*out_rgba)[4 * idx + 1] =
+                reinterpret_cast<float **>(src)[0][srcIdx];
+            (*out_rgba)[4 * idx + 2] =
+                reinterpret_cast<float **>(src)[0][srcIdx];
+            (*out_rgba)[4 * idx + 3] =
+                reinterpret_cast<float **>(src)[0][srcIdx];
+          }
+        }
+      }
+    } else {
+      const size_t pixel_size = static_cast<size_t>(exr_image.width) *
+        static_cast<size_t>(exr_image.height);
+      for (size_t i = 0; i < pixel_size; i++) {
+        const float val = reinterpret_cast<float **>(exr_image.images)[0][i];
+        (*out_rgba)[4 * i + 0] = val;
+        (*out_rgba)[4 * i + 1] = val;
+        (*out_rgba)[4 * i + 2] = val;
+        (*out_rgba)[4 * i + 3] = val;
+      }
+    }
+
+  } else {
+    // TODO(syoyo): Support non RGBA image.
+
+    if (idxR == -1) {
+      tinyexr::SetErrorMessage("R channel not found", err);
+
+      // @todo { free exr_image }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    if (idxG == -1) {
+      tinyexr::SetErrorMessage("G channel not found", err);
+      // @todo { free exr_image }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    if (idxB == -1) {
+      tinyexr::SetErrorMessage("B channel not found", err);
+      // @todo { free exr_image }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    (*out_rgba) = reinterpret_cast<float *>(
+        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
+               static_cast<size_t>(exr_image.height)));
+
+    if (exr_header.tiled) {
+      const size_t tile_size_x = static_cast<size_t>(exr_header.tile_size_x);
+      const size_t tile_size_y = static_cast<size_t>(exr_header.tile_size_y);
+      for (int it = 0; it < exr_image.num_tiles; it++) {
+        for (size_t j = 0; j < tile_size_y; j++)
+          for (size_t i = 0; i < tile_size_x; i++) {
+            const size_t ii =
+                static_cast<size_t>(exr_image.tiles[it].offset_x) *
+                    tile_size_x +
+                i;
+            const size_t jj =
+                static_cast<size_t>(exr_image.tiles[it].offset_y) *
+                    tile_size_y +
+                j;
+            const size_t idx = ii + jj * static_cast<size_t>(exr_image.width);
+
+            // out of region check.
+            if (ii >= static_cast<size_t>(exr_image.width)) {
+              continue;
+            }
+            if (jj >= static_cast<size_t>(exr_image.height)) {
+              continue;
+            }
+            const size_t srcIdx = i + j * tile_size_x;
+            unsigned char **src = exr_image.tiles[it].images;
+            (*out_rgba)[4 * idx + 0] =
+                reinterpret_cast<float **>(src)[idxR][srcIdx];
+            (*out_rgba)[4 * idx + 1] =
+                reinterpret_cast<float **>(src)[idxG][srcIdx];
+            (*out_rgba)[4 * idx + 2] =
+                reinterpret_cast<float **>(src)[idxB][srcIdx];
+            if (idxA != -1) {
+              (*out_rgba)[4 * idx + 3] =
+                  reinterpret_cast<float **>(src)[idxA][srcIdx];
+            } else {
+              (*out_rgba)[4 * idx + 3] = 1.0;
+            }
+          }
+      }
+    } else {
+      const size_t pixel_size = static_cast<size_t>(exr_image.width) *
+        static_cast<size_t>(exr_image.height);
+      for (size_t i = 0; i < pixel_size; i++) {
+        (*out_rgba)[4 * i + 0] =
+            reinterpret_cast<float **>(exr_image.images)[idxR][i];
+        (*out_rgba)[4 * i + 1] =
+            reinterpret_cast<float **>(exr_image.images)[idxG][i];
+        (*out_rgba)[4 * i + 2] =
+            reinterpret_cast<float **>(exr_image.images)[idxB][i];
+        if (idxA != -1) {
+          (*out_rgba)[4 * i + 3] =
+              reinterpret_cast<float **>(exr_image.images)[idxA][i];
+        } else {
+          (*out_rgba)[4 * i + 3] = 1.0;
+        }
+      }
+    }
+  }
+
+  (*width) = exr_image.width;
+  (*height) = exr_image.height;
+
+  FreeEXRHeader(&exr_header);
+  FreeEXRImage(&exr_image);
+
+  return TINYEXR_SUCCESS;
+}
+
+// Represents a read-only file mapped to an address space in memory.
+// If no memory-mapping API is available, falls back to allocating a buffer
+// with a copy of the file's data.
+struct MemoryMappedFile {
+  unsigned char *data;  // To the start of the file's data.
+  size_t size;          // The size of the file in bytes.
+#ifdef TINYEXR_USE_WIN32_MMAP
+  HANDLE windows_file;
+  HANDLE windows_file_mapping;
+#elif defined(TINYEXR_USE_POSIX_MMAP)
+  int posix_descriptor;
+#endif
+
+  // MemoryMappedFile's constructor tries to map memory to a file.
+  // If this succeeds, valid() will return true and all fields
+  // are usable; otherwise, valid() will return false.
+  MemoryMappedFile(const char *filename) {
+    data = NULL;
+    size = 0;
+#ifdef TINYEXR_USE_WIN32_MMAP
+    windows_file_mapping = NULL;
+    windows_file =
+        CreateFileW(tinyexr::UTF8ToWchar(filename).c_str(),  // lpFileName
+                    GENERIC_READ,                            // dwDesiredAccess
+                    FILE_SHARE_READ,                         // dwShareMode
+                    NULL,                     // lpSecurityAttributes
+                    OPEN_EXISTING,            // dwCreationDisposition
+                    FILE_ATTRIBUTE_READONLY,  // dwFlagsAndAttributes
+                    NULL);                    // hTemplateFile
+    if (windows_file == INVALID_HANDLE_VALUE) {
+      return;
+    }
+
+    windows_file_mapping = CreateFileMapping(windows_file,  // hFile
+                                             NULL,  // lpFileMappingAttributes
+                                             PAGE_READONLY,  // flProtect
+                                             0,      // dwMaximumSizeHigh
+                                             0,      // dwMaximumSizeLow
+                                             NULL);  // lpName
+    if (windows_file_mapping == NULL) {
+      return;
+    }
+
+    data = reinterpret_cast<unsigned char *>(
+        MapViewOfFile(windows_file_mapping,  // hFileMappingObject
+                      FILE_MAP_READ,         // dwDesiredAccess
+                      0,                     // dwFileOffsetHigh
+                      0,                     // dwFileOffsetLow
+                      0));                   // dwNumberOfBytesToMap
+    if (!data) {
+      return;
+    }
+
+    LARGE_INTEGER windows_file_size = {};
+    if (!GetFileSizeEx(windows_file, &windows_file_size) ||
+        static_cast<ULONGLONG>(windows_file_size.QuadPart) >
+            std::numeric_limits<size_t>::max()) {
+      UnmapViewOfFile(data);
+      data = NULL;
+      return;
+    }
+    size = static_cast<size_t>(windows_file_size.QuadPart);
+#elif defined(TINYEXR_USE_POSIX_MMAP)
+    posix_descriptor = open(filename, O_RDONLY);
+    if (posix_descriptor == -1) {
+      return;
+    }
+
+    struct stat info;
+    if (fstat(posix_descriptor, &info) < 0) {
+      return;
+    }
+    // Make sure st_size is in the valid range for a size_t. The second case
+    // can only fail if a POSIX implementation defines off_t to be a larger
+    // type than size_t - for instance, compiling with _FILE_OFFSET_BITS=64
+    // on a 32-bit system. On current 64-bit systems, this check can never
+    // fail, so we turn off clang's Wtautological-type-limit-compare warning
+    // around this code.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wtautological-type-limit-compare"
+#endif
+    if (info.st_size < 0 ||
+        info.st_size > std::numeric_limits<ssize_t>::max()) {
+      return;
+    }
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+    size = static_cast<size_t>(info.st_size);
+
+    data = reinterpret_cast<unsigned char *>(
+        mmap(0, size, PROT_READ, MAP_SHARED, posix_descriptor, 0));
+    if (data == MAP_FAILED) {
+      data = nullptr;
+      return;
+    }
+#else
+    FILE *fp = fopen(filename, "rb");
+    if (!fp) {
+      return;
+    }
+
+    // Calling fseek(fp, 0, SEEK_END) isn't strictly-conforming C code, but
+    // since neither the WIN32 nor POSIX APIs are available in this branch, this
+    // is a reasonable fallback option.
+    if (fseek(fp, 0, SEEK_END) != 0) {
+      fclose(fp);
+      return;
+    }
+    const long ftell_result = ftell(fp);
+    if (ftell_result < 0) {
+      // Error from ftell
+      fclose(fp);
+      return;
+    }
+    size = static_cast<size_t>(ftell_result);
+    if (fseek(fp, 0, SEEK_SET) != 0) {
+      fclose(fp);
+      size = 0;
+      return;
+    }
+
+    data = reinterpret_cast<unsigned char *>(malloc(size));
+    if (!data) {
+      size = 0;
+      fclose(fp);
+      return;
+    }
+    size_t read_bytes = fread(data, 1, size, fp);
+    if (read_bytes != size) {
+      // TODO: Try to read data until reading `size` bytes.
+      fclose(fp);
+      size = 0; 
+      data = nullptr;
+      return;
+    }
+    fclose(fp);
+#endif
+  }
+
+  // MemoryMappedFile's destructor closes all its handles.
+  ~MemoryMappedFile() {
+#ifdef TINYEXR_USE_WIN32_MMAP
+    if (data) {
+      (void)UnmapViewOfFile(data);
+      data = NULL;
+    }
+
+    if (windows_file_mapping != NULL) {
+      (void)CloseHandle(windows_file_mapping);
+    }
+
+    if (windows_file != INVALID_HANDLE_VALUE) {
+      (void)CloseHandle(windows_file);
+    }
+#elif defined(TINYEXR_USE_POSIX_MMAP)
+    if (data) {
+      (void)munmap(data, size);
+      data = NULL;
+    }
+
+    if (posix_descriptor != -1) {
+      (void)close(posix_descriptor);
+    }
+#else
+    if (data) {
+      (void)free(data);
+    }
+    data = NULL;
+#endif
+  }
+
+  // A MemoryMappedFile cannot be copied or moved.
+  // Only check for this when compiling with C++11 or higher, since deleted
+  // function definitions were added then.
+#if TINYEXR_HAS_CXX11
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#endif
+  MemoryMappedFile(const MemoryMappedFile &) = delete;
+  MemoryMappedFile &operator=(const MemoryMappedFile &) = delete;
+  MemoryMappedFile(MemoryMappedFile &&other) noexcept = delete;
+  MemoryMappedFile &operator=(MemoryMappedFile &&other) noexcept = delete;
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+#endif
+
+  // Returns whether this was successfully opened.
+  bool valid() const { return data; }
+};
+
+int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header,
+                         const char *filename, const char **err) {
+  if (exr_image == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  MemoryMappedFile file(filename);
+  if (!file.valid()) {
+    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  if (file.size < 16) {
+    tinyexr::SetErrorMessage("File size too short : " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_INVALID_FILE;
+  }
+
+  return LoadEXRImageFromMemory(exr_image, exr_header, file.data, file.size,
+                                err);
+}
+
+int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header,
+                           const unsigned char *memory, const size_t size,
+                           const char **err) {
+  if (exr_image == NULL || memory == NULL ||
+      (size < tinyexr::kEXRVersionSize)) {
+    tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory",
+                             err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (exr_header->header_len == 0) {
+    tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  const unsigned char *head = memory;
+  const unsigned char *marker = reinterpret_cast<const unsigned char *>(
+      memory + exr_header->header_len +
+      8);  // +8 for magic number + version header.
+  return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size,
+                                 err);
+}
+
+namespace tinyexr
+{
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#endif
+
+// out_data must be allocated initially with the block-header size
+// of the current image(-part) type
+static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data,
+                            const unsigned char* const* images,
+                            int compression_type,
+                            int /*line_order*/,
+                            int width, // for tiled : tile.width
+                            int /*height*/, // for tiled : header.tile_size_y
+                            int x_stride, // for tiled : header.tile_size_x
+                            int line_no, // for tiled : 0
+                            int num_lines, // for tiled : tile.height
+                            size_t pixel_data_size,
+                            const std::vector<ChannelInfo>& channels,
+                            const std::vector<size_t>& channel_offset_list,
+                            std::string *err,
+                            const void* compression_param = 0) // zfp compression param
+{
+  size_t buf_size = static_cast<size_t>(width) *
+                  static_cast<size_t>(num_lines) *
+                  static_cast<size_t>(pixel_data_size);
+  //int last2bit = (buf_size & 3);
+  // buf_size must be multiple of four
+  //if(last2bit) buf_size += 4 - last2bit;
+  std::vector<unsigned char> buf(buf_size);
+
+  size_t start_y = static_cast<size_t>(line_no);
+  for (size_t c = 0; c < channels.size(); c++) {
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        for (int y = 0; y < num_lines; y++) {
+          // Assume increasing Y
+          float *line_ptr = reinterpret_cast<float *>(&buf.at(
+            static_cast<size_t>(pixel_data_size * size_t(y) * size_t(width)) +
+            channel_offset_list[c] *
+            static_cast<size_t>(width)));
+          for (int x = 0; x < width; x++) {
+            tinyexr::FP16 h16;
+            h16.u = reinterpret_cast<const unsigned short * const *>(
+              images)[c][(y + start_y) * size_t(x_stride) + size_t(x)];
+
+            tinyexr::FP32 f32 = half_to_float(h16);
+
+            tinyexr::swap4(&f32.f);
+
+            // line_ptr[x] = f32.f;
+            tinyexr::cpy4(line_ptr + x, &(f32.f));
+          }
+        }
+      } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (int y = 0; y < num_lines; y++) {
+          // Assume increasing Y
+          unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+            &buf.at(static_cast<size_t>(pixel_data_size * y *
+                                        width) +
+                    channel_offset_list[c] *
+                    static_cast<size_t>(width)));
+          for (int x = 0; x < width; x++) {
+            unsigned short val = reinterpret_cast<const unsigned short * const *>(
+              images)[c][(y + start_y) * x_stride + x];
+
+            tinyexr::swap2(&val);
+
+            // line_ptr[x] = val;
+            tinyexr::cpy2(line_ptr + x, &val);
+          }
+        }
+      } else {
+        if (err) {
+          (*err) += "Invalid requested_pixel_type.\n";
+        }
+        return false;
+      }
+
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (int y = 0; y < num_lines; y++) {
+          // Assume increasing Y
+          unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+            &buf.at(static_cast<size_t>(pixel_data_size * y *
+                                        width) +
+                    channel_offset_list[c] *
+                    static_cast<size_t>(width)));
+          for (int x = 0; x < width; x++) {
+            tinyexr::FP32 f32;
+            f32.f = reinterpret_cast<const float * const *>(
+              images)[c][(y + start_y) * x_stride + x];
+
+            tinyexr::FP16 h16;
+            h16 = float_to_half_full(f32);
+
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u));
+
+            // line_ptr[x] = h16.u;
+            tinyexr::cpy2(line_ptr + x, &(h16.u));
+          }
+        }
+      } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        for (int y = 0; y < num_lines; y++) {
+          // Assume increasing Y
+          float *line_ptr = reinterpret_cast<float *>(&buf.at(
+            static_cast<size_t>(pixel_data_size * y * width) +
+            channel_offset_list[c] *
+            static_cast<size_t>(width)));
+          for (int x = 0; x < width; x++) {
+            float val = reinterpret_cast<const float * const *>(
+              images)[c][(y + start_y) * x_stride + x];
+
+            tinyexr::swap4(&val);
+
+            // line_ptr[x] = val;
+            tinyexr::cpy4(line_ptr + x, &val);
+          }
+        }
+      } else {
+        if (err) {
+          (*err) += "Invalid requested_pixel_type.\n";
+        }
+        return false;
+      }
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      for (int y = 0; y < num_lines; y++) {
+        // Assume increasing Y
+        unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at(
+          static_cast<size_t>(pixel_data_size * y * width) +
+          channel_offset_list[c] * static_cast<size_t>(width)));
+        for (int x = 0; x < width; x++) {
+          unsigned int val = reinterpret_cast<const unsigned int * const *>(
+            images)[c][(y + start_y) * x_stride + x];
+
+          tinyexr::swap4(&val);
+
+          // line_ptr[x] = val;
+          tinyexr::cpy4(line_ptr + x, &val);
+        }
+      }
+    }
+  }
+
+  if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
+    // 4 byte: scan line
+    // 4 byte: data size
+    // ~     : pixel data(uncompressed)
+    out_data.insert(out_data.end(), buf.begin(), buf.end());
+
+  } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
+    (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+    std::vector<unsigned char> block(mz_compressBound(
+      static_cast<unsigned long>(buf.size())));
+#elif TINYEXR_USE_STB_ZLIB
+    // there is no compressBound() function, so we use a value that
+    // is grossly overestimated, but should always work
+    std::vector<unsigned char> block(256 + 2 * buf.size());
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB == 1)
+    std::vector<unsigned char> block(nanoz_compressBound(
+      static_cast<unsigned long>(buf.size())));
+#else
+    std::vector<unsigned char> block(
+      compressBound(static_cast<uLong>(buf.size())));
+#endif
+    tinyexr::tinyexr_uint64 outSize = block.size();
+
+    if (!tinyexr::CompressZip(&block.at(0), outSize,
+                         reinterpret_cast<const unsigned char *>(&buf.at(0)),
+                         static_cast<unsigned long>(buf.size()))) {
+      if (err) {
+        (*err) += "Zip compresssion failed.\n";
+      }
+      return false;
+    }
+
+    // 4 byte: scan line
+    // 4 byte: data size
+    // ~     : pixel data(compressed)
+    unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
+
+    out_data.insert(out_data.end(), block.begin(), block.begin() + data_len);
+
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
+    // (buf.size() * 3) / 2 would be enough.
+    std::vector<unsigned char> block((buf.size() * 3) / 2);
+
+    tinyexr::tinyexr_uint64 outSize = block.size();
+
+    if (!tinyexr::CompressRle(&block.at(0), outSize,
+                         reinterpret_cast<const unsigned char *>(&buf.at(0)),
+                         static_cast<unsigned long>(buf.size()))) {
+      if (err) {
+        (*err) += "RLE compresssion failed.\n";
+      }
+      return false;
+    }
+
+    // 4 byte: scan line
+    // 4 byte: data size
+    // ~     : pixel data(compressed)
+    unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
+    out_data.insert(out_data.end(), block.begin(), block.begin() + data_len);
+
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+#if TINYEXR_USE_PIZ
+    unsigned int bufLen =
+      8192 + static_cast<unsigned int>(
+        2 * static_cast<unsigned int>(
+          buf.size()));  // @fixme { compute good bound. }
+    std::vector<unsigned char> block(bufLen);
+    unsigned int outSize = static_cast<unsigned int>(block.size());
+
+    if (!CompressPiz(&block.at(0), &outSize,
+                reinterpret_cast<const unsigned char *>(&buf.at(0)),
+                buf.size(), channels, width, num_lines)) {
+      if (err) {
+        (*err) += "PIZ compresssion failed.\n";
+      }
+      return false;
+    }
+
+    // 4 byte: scan line
+    // 4 byte: data size
+    // ~     : pixel data(compressed)
+    unsigned int data_len = outSize;
+    out_data.insert(out_data.end(), block.begin(), block.begin() + data_len);
+
+#else
+    if (err) {
+      (*err) += "PIZ compression is disabled in this build.\n";
+    }
+    return false;
+#endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+#if TINYEXR_USE_ZFP
+    const ZFPCompressionParam* zfp_compression_param = reinterpret_cast<const ZFPCompressionParam*>(compression_param);
+    std::vector<unsigned char> block;
+    unsigned int outSize;
+
+    tinyexr::CompressZfp(
+      &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)),
+      width, num_lines, static_cast<int>(channels.size()), *zfp_compression_param);
+
+    // 4 byte: scan line
+    // 4 byte: data size
+    // ~     : pixel data(compressed)
+    unsigned int data_len = outSize;
+    out_data.insert(out_data.end(), block.begin(), block.begin() + data_len);
+
+#else
+    if (err) {
+      (*err) += "ZFP compression is disabled in this build.\n";
+    }
+    (void)compression_param;
+    return false;
+#endif
+  } else {
+    return false;
+  }
+
+  return true;
+}
+
+static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_header,
+                            const std::vector<tinyexr::ChannelInfo>& channels,
+                            std::vector<std::vector<unsigned char> >& data_list,
+                            size_t start_index, // for data_list
+                            int num_x_tiles, int num_y_tiles,
+                            const std::vector<size_t>& channel_offset_list,
+                            int pixel_data_size,
+                            const void* compression_param, // must be set if zfp compression is enabled
+                            std::string* err) {
+  int num_tiles = num_x_tiles * num_y_tiles;
+  if (num_tiles != level_image->num_tiles) {
+    if (err) {
+      (*err) += "Invalid number of tiles in argument.\n";
+    }
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if ((exr_header->tile_size_x > level_image->width || exr_header->tile_size_y > level_image->height) &&
+      level_image->level_x == 0 && level_image->level_y == 0) {
+      if (err) {
+        (*err) += "Failed to encode tile data.\n";
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  std::atomic<bool> invalid_data(false);
+#else
+  bool invalid_data(false);
+#endif
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  std::vector<std::thread> workers;
+  std::atomic<int> tile_count(0);
+
+  int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+  if (num_threads > int(num_tiles)) {
+    num_threads = int(num_tiles);
+  }
+
+  for (int t = 0; t < num_threads; t++) {
+    workers.emplace_back(std::thread([&]() {
+      int i = 0;
+      while ((i = tile_count++) < num_tiles) {
+
+#else
+  // Use signed int since some OpenMP compiler doesn't allow unsigned type for
+  // `parallel for`
+#if TINYEXR_USE_OPENMP
+#pragma omp parallel for
+#endif
+  for (int i = 0; i < num_tiles; i++) {
+
+#endif
+    size_t tile_idx = static_cast<size_t>(i);
+    size_t data_idx = tile_idx + start_index;
+
+    int x_tile = i % num_x_tiles;
+    int y_tile = i / num_x_tiles;
+
+    EXRTile& tile = level_image->tiles[tile_idx];
+
+    const unsigned char* const* images =
+      static_cast<const unsigned char* const*>(tile.images);
+
+    data_list[data_idx].resize(5*sizeof(int));
+    size_t data_header_size = data_list[data_idx].size();
+    bool ret = EncodePixelData(data_list[data_idx],
+                               images,
+                               exr_header->compression_type,
+                               0, // increasing y
+                               tile.width,
+                               exr_header->tile_size_y,
+                               exr_header->tile_size_x,
+                               0,
+                               tile.height,
+                               pixel_data_size,
+                               channels,
+                               channel_offset_list,
+                               err, compression_param);
+    if (!ret) {
+      invalid_data = true;
+      continue;
+    }
+    if (data_list[data_idx].size() <= data_header_size) {
+      invalid_data = true;
+      continue;
+    }
+
+    int data_len = static_cast<int>(data_list[data_idx].size() - data_header_size);
+    //tileX, tileY, levelX, levelY // pixel_data_size(int)
+    memcpy(&data_list[data_idx][0], &x_tile, sizeof(int));
+    memcpy(&data_list[data_idx][4], &y_tile, sizeof(int));
+    memcpy(&data_list[data_idx][8], &level_image->level_x, sizeof(int));
+    memcpy(&data_list[data_idx][12], &level_image->level_y, sizeof(int));
+    memcpy(&data_list[data_idx][16], &data_len, sizeof(int));
+
+    swap4(reinterpret_cast<int*>(&data_list[data_idx][0]));
+    swap4(reinterpret_cast<int*>(&data_list[data_idx][4]));
+    swap4(reinterpret_cast<int*>(&data_list[data_idx][8]));
+    swap4(reinterpret_cast<int*>(&data_list[data_idx][12]));
+    swap4(reinterpret_cast<int*>(&data_list[data_idx][16]));
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+  }
+}));
+    }
+
+    for (auto &t : workers) {
+      t.join();
+    }
+#else
+    }  // omp parallel
+#endif
+
+  if (invalid_data) {
+    if (err) {
+      (*err) += "Failed to encode tile data.\n";
+    }
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  return TINYEXR_SUCCESS;
+}
+
+static int NumScanlines(int compression_type) {
+  int num_scanlines = 1;
+  if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
+    num_scanlines = 16;
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+    num_scanlines = 32;
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+    num_scanlines = 16;
+  }
+  return num_scanlines;
+}
+
+static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header,
+                       const std::vector<ChannelInfo>& channels,
+                       int num_blocks,
+                       tinyexr_uint64 chunk_offset, // starting offset of current chunk
+                       bool is_multipart,
+                       OffsetData& offset_data, // output block offsets, must be initialized
+                       std::vector<std::vector<unsigned char> >& data_list, // output
+                       tinyexr_uint64& total_size, // output: ending offset of current chunk
+                       std::string* err) {
+  int num_scanlines = NumScanlines(exr_header->compression_type);
+
+  data_list.resize(num_blocks);
+
+  std::vector<size_t> channel_offset_list(
+    static_cast<size_t>(exr_header->num_channels));
+
+  int pixel_data_size = 0;
+  {
+    size_t channel_offset = 0;
+    for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
+      channel_offset_list[c] = channel_offset;
+      if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        pixel_data_size += sizeof(unsigned short);
+        channel_offset += sizeof(unsigned short);
+      } else if (channels[c].requested_pixel_type ==
+                 TINYEXR_PIXELTYPE_FLOAT) {
+        pixel_data_size += sizeof(float);
+        channel_offset += sizeof(float);
+      } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        pixel_data_size += sizeof(unsigned int);
+        channel_offset += sizeof(unsigned int);
+      } else {
+        if (err) {
+          (*err) += "Invalid requested_pixel_type.\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+    }
+  }
+
+  const void* compression_param = 0;
+#if TINYEXR_USE_ZFP
+  tinyexr::ZFPCompressionParam zfp_compression_param;
+
+  // Use ZFP compression parameter from custom attributes(if such a parameter
+  // exists)
+  {
+    std::string e;
+    bool ret = tinyexr::FindZFPCompressionParam(
+      &zfp_compression_param, exr_header->custom_attributes,
+      exr_header->num_custom_attributes, &e);
+
+    if (!ret) {
+      // Use predefined compression parameter.
+      zfp_compression_param.type = 0;
+      zfp_compression_param.rate = 2;
+    }
+    compression_param = &zfp_compression_param;
+  }
+#endif
+
+  tinyexr_uint64 offset = chunk_offset;
+  tinyexr_uint64 doffset = is_multipart ? 4u : 0u;
+
+  if (exr_image->tiles) {
+    const EXRImage* level_image = exr_image;
+    size_t block_idx = 0;
+    //tinyexr::tinyexr_uint64 block_data_size = 0;
+    int num_levels = (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ?
+      offset_data.num_x_levels : (offset_data.num_x_levels * offset_data.num_y_levels);
+    for (int level_index = 0; level_index < num_levels; ++level_index) {
+      if (!level_image) {
+        if (err) {
+          (*err) += "Invalid number of tiled levels for EncodeChunk\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      int level_index_from_image = LevelIndex(level_image->level_x, level_image->level_y,
+                                    exr_header->tile_level_mode, offset_data.num_x_levels);
+      if (level_index_from_image < 0) {
+        if (err) {
+          (*err) += "Invalid tile level mode\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      if (level_index_from_image != level_index) {
+        if (err) {
+          (*err) += "Incorrect level ordering in tiled image\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+      int num_y_tiles = int(offset_data.offsets[level_index].size());
+      if (num_y_tiles <= 0) {
+        if (err) {
+          (*err) += "Invalid Y tile size\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      int num_x_tiles = int(offset_data.offsets[level_index][0].size());
+      if (num_x_tiles <= 0) {
+        if (err) {
+          (*err) += "Invalid X tile size\n";
+        }
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      std::string e;
+      int ret = EncodeTiledLevel(level_image,
+                                  exr_header,
+                                  channels,
+                                  data_list,
+                                  block_idx,
+                                  num_x_tiles,
+                                  num_y_tiles,
+                                  channel_offset_list,
+                                  pixel_data_size,
+                                  compression_param,
+                                  &e);
+      if (ret != TINYEXR_SUCCESS) {
+        if (!e.empty() && err) {
+          (*err) += e;
+        }
+        return ret;
+      }
+
+      for (size_t j = 0; j < static_cast<size_t>(num_y_tiles); ++j)
+        for (size_t i = 0; i < static_cast<size_t>(num_x_tiles); ++i) {
+          offset_data.offsets[level_index][j][i] = offset;
+          swap8(reinterpret_cast<tinyexr_uint64*>(&offset_data.offsets[level_index][j][i]));
+          offset += data_list[block_idx].size() + doffset;
+          //block_data_size += data_list[block_idx].size();
+          ++block_idx;
+        }
+      level_image = level_image->next_level;
+    }
+    TINYEXR_CHECK_AND_RETURN_C(static_cast<int>(block_idx) == num_blocks, TINYEXR_ERROR_INVALID_DATA);
+    total_size = offset;
+  } else { // scanlines
+    std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0];
+
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+    std::atomic<bool> invalid_data(false);
+    std::vector<std::thread> workers;
+    std::atomic<int> block_count(0);
+
+    int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks);
+
+    for (int t = 0; t < num_threads; t++) {
+      workers.emplace_back(std::thread([&]() {
+        int i = 0;
+        while ((i = block_count++) < num_blocks) {
+
+#else
+    bool invalid_data(false);
+#if TINYEXR_USE_OPENMP
+#pragma omp parallel for
+#endif
+    for (int i = 0; i < num_blocks; i++) {
+
+#endif
+      int start_y = num_scanlines * i;
+      int end_Y = (std::min)(num_scanlines * (i + 1), exr_image->height);
+      int num_lines = end_Y - start_y;
+
+      const unsigned char* const* images =
+        static_cast<const unsigned char* const*>(exr_image->images);
+
+      data_list[i].resize(2*sizeof(int));
+      size_t data_header_size = data_list[i].size();
+
+      bool ret = EncodePixelData(data_list[i],
+                                 images,
+                                 exr_header->compression_type,
+                                 0, // increasing y
+                                 exr_image->width,
+                                 exr_image->height,
+                                 exr_image->width,
+                                 start_y,
+                                 num_lines,
+                                 pixel_data_size,
+                                 channels,
+                                 channel_offset_list,
+                                 err,
+                                 compression_param);
+      if (!ret) {
+        invalid_data = true;
+        continue; // "break" cannot be used with OpenMP
+      }
+      if (data_list[i].size() <= data_header_size) {
+        invalid_data = true;
+        continue; // "break" cannot be used with OpenMP
+      }
+      int data_len = static_cast<int>(data_list[i].size() - data_header_size);
+      memcpy(&data_list[i][0], &start_y, sizeof(int));
+      memcpy(&data_list[i][4], &data_len, sizeof(int));
+
+      swap4(reinterpret_cast<int*>(&data_list[i][0]));
+      swap4(reinterpret_cast<int*>(&data_list[i][4]));
+#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0)
+        }
+                                       }));
+    }
+
+    for (auto &t : workers) {
+      t.join();
+    }
+#else
+    }  // omp parallel
+#endif
+
+    if (invalid_data) {
+      if (err) {
+        (*err) += "Failed to encode scanline data.\n";
+      }
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) {
+      offsets[i] = offset;
+      tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i]));
+      offset += data_list[i].size() + doffset;
+    }
+
+    total_size = static_cast<size_t>(offset);
+  }
+  return TINYEXR_SUCCESS;
+}
+
+// can save a single or multi-part image (no deep* formats)
+static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
+                                        const EXRHeader** exr_headers,
+                                        unsigned int num_parts,
+                                        unsigned char** memory_out, const char** err) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
+      memory_out == NULL) {
+    SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory",
+                    err);
+    return 0;
+  }
+  {
+    for (unsigned int i = 0; i < num_parts; ++i) {
+      if (exr_headers[i]->compression_type < 0) {
+        SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory",
+                        err);
+        return 0;
+      }
+#if !TINYEXR_USE_PIZ
+      if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+        SetErrorMessage("PIZ compression is not supported in this build",
+                        err);
+        return 0;
+      }
+#endif
+      if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+#if !TINYEXR_USE_ZFP
+        SetErrorMessage("ZFP compression is not supported in this build",
+                        err);
+        return 0;
+#else
+        // All channels must be fp32.
+        // No fp16 support in ZFP atm(as of 2023 June)
+        // https://github.com/LLNL/fpzip/issues/2
+        for (int c = 0; c < exr_headers[i]->num_channels; ++c) {
+          if (exr_headers[i]->requested_pixel_types[c] != TINYEXR_PIXELTYPE_FLOAT) {
+            SetErrorMessage("Pixel type must be FLOAT for ZFP compression",
+                            err);
+            return 0;
+          }
+        }
+#endif
+      }
+    }
+  }
+
+  std::vector<unsigned char> memory;
+
+  // Header
+  {
+    const char header[] = { 0x76, 0x2f, 0x31, 0x01 };
+    memory.insert(memory.end(), header, header + 4);
+  }
+
+  // Version
+  // using value from the first header
+  int long_name = exr_headers[0]->long_name;
+  {
+    char marker[] = { 2, 0, 0, 0 };
+    /* @todo
+    if (exr_header->non_image) {
+    marker[1] |= 0x8;
+    }
+    */
+    // tiled
+    if (num_parts == 1 && exr_images[0].tiles) {
+      marker[1] |= 0x2;
+    }
+    // long_name
+    if (long_name) {
+      marker[1] |= 0x4;
+    }
+    // multipart
+    if (num_parts > 1) {
+      marker[1] |= 0x10;
+    }
+    memory.insert(memory.end(), marker, marker + 4);
+  }
+
+  int total_chunk_count = 0;
+  std::vector<int> chunk_count(num_parts);
+  std::vector<OffsetData> offset_data(num_parts);
+  for (unsigned int i = 0; i < num_parts; ++i) {
+    if (!exr_images[i].tiles) {
+      int num_scanlines = NumScanlines(exr_headers[i]->compression_type);
+      chunk_count[i] =
+        (exr_images[i].height + num_scanlines - 1) / num_scanlines;
+      InitSingleResolutionOffsets(offset_data[i], chunk_count[i]);
+      total_chunk_count += chunk_count[i];
+    } else {
+      {
+        std::vector<int> num_x_tiles, num_y_tiles;
+        if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) {
+          SetErrorMessage("Failed to precalculate Tile info",
+                          err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+        int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles);
+        if (ntiles > 0) {
+          chunk_count[i] = ntiles;
+        } else {
+          SetErrorMessage("Failed to compute Tile offsets",
+                          err);
+          return TINYEXR_ERROR_INVALID_DATA;
+          
+        }
+        total_chunk_count += chunk_count[i];
+      }
+    }
+  }
+  // Write attributes to memory buffer.
+  std::vector< std::vector<tinyexr::ChannelInfo> > channels(num_parts);
+  {
+    std::set<std::string> partnames;
+    for (unsigned int i = 0; i < num_parts; ++i) {
+      //channels
+      {
+        std::vector<unsigned char> data;
+
+        for (int c = 0; c < exr_headers[i]->num_channels; c++) {
+          tinyexr::ChannelInfo info;
+          info.p_linear = 0;
+          info.pixel_type = exr_headers[i]->pixel_types[c];
+          info.requested_pixel_type = exr_headers[i]->requested_pixel_types[c];
+          info.x_sampling = 1;
+          info.y_sampling = 1;
+          info.name = std::string(exr_headers[i]->channels[c].name);
+          channels[i].push_back(info);
+        }
+
+        tinyexr::WriteChannelInfo(data, channels[i]);
+
+        tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0),
+                                        static_cast<int>(data.size()));
+      }
+
+      {
+        int comp = exr_headers[i]->compression_type;
+        swap4(&comp);
+        WriteAttributeToMemory(
+          &memory, "compression", "compression",
+          reinterpret_cast<const unsigned char*>(&comp), 1);
+      }
+
+      {
+        int data[4] = { 0, 0, exr_images[i].width - 1, exr_images[i].height - 1 };
+        swap4(&data[0]);
+        swap4(&data[1]);
+        swap4(&data[2]);
+        swap4(&data[3]);
+        WriteAttributeToMemory(
+          &memory, "dataWindow", "box2i",
+          reinterpret_cast<const unsigned char*>(data), sizeof(int) * 4);
+
+        int data0[4] = { 0, 0, exr_images[0].width - 1, exr_images[0].height - 1 };
+        swap4(&data0[0]);
+        swap4(&data0[1]);
+        swap4(&data0[2]);
+        swap4(&data0[3]);
+        // Note: must be the same across parts (currently, using value from the first header)
+        WriteAttributeToMemory(
+          &memory, "displayWindow", "box2i",
+          reinterpret_cast<const unsigned char*>(data0), sizeof(int) * 4);
+      }
+
+      {
+        unsigned char line_order = 0;  // @fixme { read line_order from EXRHeader }
+        WriteAttributeToMemory(&memory, "lineOrder", "lineOrder",
+                               &line_order, 1);
+      }
+
+      {
+        // Note: must be the same across parts
+        float aspectRatio = 1.0f;
+        swap4(&aspectRatio);
+        WriteAttributeToMemory(
+          &memory, "pixelAspectRatio", "float",
+          reinterpret_cast<const unsigned char*>(&aspectRatio), sizeof(float));
+      }
+
+      {
+        float center[2] = { 0.0f, 0.0f };
+        swap4(&center[0]);
+        swap4(&center[1]);
+        WriteAttributeToMemory(
+          &memory, "screenWindowCenter", "v2f",
+          reinterpret_cast<const unsigned char*>(center), 2 * sizeof(float));
+      }
+
+      {
+        float w = 1.0f;
+        swap4(&w);
+        WriteAttributeToMemory(&memory, "screenWindowWidth", "float",
+                               reinterpret_cast<const unsigned char*>(&w),
+                               sizeof(float));
+      }
+
+      if (exr_images[i].tiles) {
+        unsigned char tile_mode = static_cast<unsigned char>(exr_headers[i]->tile_level_mode & 0x3);
+        if (exr_headers[i]->tile_rounding_mode) tile_mode |= (1u << 4u);
+        //unsigned char data[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+        unsigned int datai[3] = { 0, 0, 0 };
+        unsigned char* data = reinterpret_cast<unsigned char*>(&datai[0]);
+        datai[0] = static_cast<unsigned int>(exr_headers[i]->tile_size_x);
+        datai[1] = static_cast<unsigned int>(exr_headers[i]->tile_size_y);
+        data[8] = tile_mode;
+        swap4(reinterpret_cast<unsigned int*>(&data[0]));
+        swap4(reinterpret_cast<unsigned int*>(&data[4]));
+        WriteAttributeToMemory(
+          &memory, "tiles", "tiledesc",
+          reinterpret_cast<const unsigned char*>(data), 9);
+      }
+
+      // must be present for multi-part files - according to spec.
+      if (num_parts > 1) {
+        // name
+        {
+          size_t len = 0;
+          if ((len = strlen(exr_headers[i]->name)) > 0) {
+#if TINYEXR_HAS_CXX11
+            partnames.emplace(exr_headers[i]->name);
+#else
+            partnames.insert(std::string(exr_headers[i]->name));
+#endif
+            if (partnames.size() != i + 1) {
+              SetErrorMessage("'name' attributes must be unique for a multi-part file", err);
+              return 0;
+            }
+            WriteAttributeToMemory(
+              &memory, "name", "string",
+              reinterpret_cast<const unsigned char*>(exr_headers[i]->name),
+              static_cast<int>(len));
+          } else {
+            SetErrorMessage("Invalid 'name' attribute for a multi-part file", err);
+            return 0;
+          }
+        }
+        // type
+        {
+          const char* type = "scanlineimage";
+          if (exr_images[i].tiles) type = "tiledimage";
+          WriteAttributeToMemory(
+            &memory, "type", "string",
+            reinterpret_cast<const unsigned char*>(type),
+            static_cast<int>(strlen(type)));
+        }
+        // chunkCount
+        {
+          WriteAttributeToMemory(
+            &memory, "chunkCount", "int",
+            reinterpret_cast<const unsigned char*>(&chunk_count[i]),
+            4);
+        }
+      }
+
+      // Custom attributes
+      if (exr_headers[i]->num_custom_attributes > 0) {
+        for (int j = 0; j < exr_headers[i]->num_custom_attributes; j++) {
+          tinyexr::WriteAttributeToMemory(
+            &memory, exr_headers[i]->custom_attributes[j].name,
+            exr_headers[i]->custom_attributes[j].type,
+            reinterpret_cast<const unsigned char*>(
+              exr_headers[i]->custom_attributes[j].value),
+            exr_headers[i]->custom_attributes[j].size);
+        }
+      }
+
+      {  // end of header
+        memory.push_back(0);
+      }
+    }
+  }
+  if (num_parts > 1) {
+    // end of header list
+    memory.push_back(0);
+  }
+
+  tinyexr_uint64 chunk_offset = memory.size() + size_t(total_chunk_count) * sizeof(tinyexr_uint64);
+
+  tinyexr_uint64 total_size = 0;
+  std::vector< std::vector< std::vector<unsigned char> > > data_lists(num_parts);
+  for (unsigned int i = 0; i < num_parts; ++i) {
+    std::string e;
+    int ret = EncodeChunk(&exr_images[i], exr_headers[i],
+                          channels[i],
+                          chunk_count[i],
+                          // starting offset of current chunk after part-number
+                          chunk_offset,
+                          num_parts > 1,
+                          offset_data[i], // output: block offsets, must be initialized
+                          data_lists[i], // output
+                          total_size, // output
+                          &e);
+    if (ret != TINYEXR_SUCCESS) {
+      if (!e.empty()) {
+        tinyexr::SetErrorMessage(e, err);
+      }
+      return 0;
+    }
+    chunk_offset = total_size;
+  }
+
+  // Allocating required memory
+  if (total_size == 0) { // something went wrong
+    tinyexr::SetErrorMessage("Output memory size is zero", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  (*memory_out) = static_cast<unsigned char*>(malloc(size_t(total_size)));
+
+  // Writing header
+  memcpy((*memory_out), &memory[0], memory.size());
+  unsigned char* memory_ptr = *memory_out + memory.size();
+  size_t sum = memory.size();
+
+  // Writing offset data for chunks
+  for (unsigned int i = 0; i < num_parts; ++i) {
+    if (exr_images[i].tiles) {
+      const EXRImage* level_image = &exr_images[i];
+      int num_levels = (exr_headers[i]->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ?
+        offset_data[i].num_x_levels : (offset_data[i].num_x_levels * offset_data[i].num_y_levels);
+      for (int level_index = 0; level_index < num_levels; ++level_index) {
+        for (size_t j = 0; j < offset_data[i].offsets[level_index].size(); ++j) {
+          size_t num_bytes = sizeof(tinyexr_uint64) * offset_data[i].offsets[level_index][j].size();
+          sum += num_bytes;
+          if (sum > total_size) {
+            tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err);
+            return TINYEXR_ERROR_INVALID_DATA;
+          }
+
+          memcpy(memory_ptr,
+                 reinterpret_cast<unsigned char*>(&offset_data[i].offsets[level_index][j][0]),
+                 num_bytes);
+          memory_ptr += num_bytes;
+        }
+        level_image = level_image->next_level;
+      }
+    } else {
+      size_t num_bytes = sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(chunk_count[i]);
+      sum += num_bytes;
+      if (sum > total_size) {
+        tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+      std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data[i].offsets[0][0];
+      memcpy(memory_ptr, reinterpret_cast<unsigned char*>(&offsets[0]), num_bytes);
+      memory_ptr += num_bytes;
+    }
+  }
+
+  // Writing chunk data
+  for (unsigned int i = 0; i < num_parts; ++i) {
+    for (size_t j = 0; j < static_cast<size_t>(chunk_count[i]); ++j) {
+      if (num_parts > 1) {
+        sum += 4;
+        if (sum > total_size) {
+          tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+        unsigned int part_number = i;
+        swap4(&part_number);
+        memcpy(memory_ptr, &part_number, 4);
+        memory_ptr += 4;
+      }
+      sum += data_lists[i][j].size();
+      if (sum > total_size) {
+        tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+      memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size());
+      memory_ptr += data_lists[i][j].size();
+    }
+  }
+
+  if (sum != total_size) {
+    tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  return size_t(total_size);  // OK
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+} // tinyexr
+
+size_t SaveEXRImageToMemory(const EXRImage* exr_image,
+                             const EXRHeader* exr_header,
+                             unsigned char** memory_out, const char** err) {
+  return tinyexr::SaveEXRNPartImageToMemory(exr_image, &exr_header, 1, memory_out, err);
+}
+
+int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header,
+                       const char *filename, const char **err) {
+  if (exr_image == NULL || filename == NULL ||
+      exr_header->compression_type < 0) {
+    tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+#if !TINYEXR_USE_PIZ
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
+    tinyexr::SetErrorMessage("PIZ compression is not supported in this build",
+                             err);
+    return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
+  }
+#endif
+
+#if !TINYEXR_USE_ZFP
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
+    tinyexr::SetErrorMessage("ZFP compression is not supported in this build",
+                             err);
+    return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
+  }
+#endif
+
+  FILE *fp = NULL;
+#ifdef _WIN32
+#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang
+  errno_t errcode =
+      _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb");
+  if (errcode != 0) {
+    tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+#else
+  // Unknown compiler or MinGW without MINGW_HAS_SECURE_API.
+  fp = fopen(filename, "wb");
+#endif
+#else
+  fp = fopen(filename, "wb");
+#endif
+  if (!fp) {
+    tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+
+  unsigned char *mem = NULL;
+  size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err);
+  if (mem_size == 0) {
+    fclose(fp);
+    return TINYEXR_ERROR_SERIALIZATION_FAILED;
+  }
+
+  size_t written_size = 0;
+  if ((mem_size > 0) && mem) {
+    written_size = fwrite(mem, 1, mem_size, fp);
+  }
+  free(mem);
+
+  fclose(fp);
+
+  if (written_size != mem_size) {
+    tinyexr::SetErrorMessage("Cannot write a file", err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images,
+                                     const EXRHeader** exr_headers,
+                                     unsigned int num_parts,
+                                     unsigned char** memory_out, const char** err) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts < 2 ||
+      memory_out == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory",
+                              err);
+    return 0;
+  }
+  return tinyexr::SaveEXRNPartImageToMemory(exr_images, exr_headers, num_parts, memory_out, err);
+}
+
+int SaveEXRMultipartImageToFile(const EXRImage* exr_images,
+                                const EXRHeader** exr_headers,
+                                unsigned int num_parts,
+                                const char* filename,
+                                const char** err) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts < 2) {
+    tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile",
+                              err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  FILE *fp = NULL;
+#ifdef _WIN32
+#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang.
+  errno_t errcode =
+    _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb");
+  if (errcode != 0) {
+    tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+#else
+  // Unknown compiler or MinGW without MINGW_HAS_SECURE_API.
+  fp = fopen(filename, "wb");
+#endif
+#else
+  fp = fopen(filename, "wb");
+#endif
+  if (!fp) {
+    tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+
+  unsigned char *mem = NULL;
+  size_t mem_size = SaveEXRMultipartImageToMemory(exr_images, exr_headers, num_parts, &mem, err);
+  if (mem_size == 0) {
+    fclose(fp);
+    return TINYEXR_ERROR_SERIALIZATION_FAILED;
+  }
+
+  size_t written_size = 0;
+  if ((mem_size > 0) && mem) {
+    written_size = fwrite(mem, 1, mem_size, fp);
+  }
+  free(mem);
+
+  fclose(fp);
+
+  if (written_size != mem_size) {
+    tinyexr::SetErrorMessage("Cannot write a file", err);
+    return TINYEXR_ERROR_CANT_WRITE_FILE;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
+  if (deep_image == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  MemoryMappedFile file(filename);
+  if (!file.valid()) {
+    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  if (file.size == 0) {
+    tinyexr::SetErrorMessage("File size is zero : " + std::string(filename),
+                             err);
+    return TINYEXR_ERROR_INVALID_FILE;
+  }
+
+  const char *head = reinterpret_cast<const char *>(file.data);
+  const char *marker = reinterpret_cast<const char *>(file.data);
+
+  // Header check.
+  {
+    const char header[] = {0x76, 0x2f, 0x31, 0x01};
+
+    if (memcmp(marker, header, 4) != 0) {
+      tinyexr::SetErrorMessage("Invalid magic number", err);
+      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
+    }
+    marker += 4;
+  }
+
+  // Version, scanline.
+  {
+    // ver 2.0, scanline, deep bit on(0x800)
+    // must be [2, 0, 0, 0]
+    if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) {
+      tinyexr::SetErrorMessage("Unsupported version or scanline", err);
+      return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+    }
+
+    marker += 4;
+  }
+
+  int dx = -1;
+  int dy = -1;
+  int dw = -1;
+  int dh = -1;
+  int num_scanline_blocks = 1;  // 16 for ZIP compression.
+  int compression_type = -1;
+  int num_channels = -1;
+  std::vector<tinyexr::ChannelInfo> channels;
+
+  // Read attributes
+  size_t size = file.size - tinyexr::kEXRVersionSize;
+  for (;;) {
+    if (0 == size) {
+      return TINYEXR_ERROR_INVALID_DATA;
+    } else if (marker[0] == '\0') {
+      marker++;
+      size--;
+      break;
+    }
+
+    std::string attr_name;
+    std::string attr_type;
+    std::vector<unsigned char> data;
+    size_t marker_size;
+    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
+                                marker, size)) {
+      std::stringstream ss;
+      ss << "Failed to parse attribute\n";
+      tinyexr::SetErrorMessage(ss.str(), err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+    marker += marker_size;
+    size -= marker_size;
+
+    if (attr_name.compare("compression") == 0) {
+      compression_type = data[0];
+      if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) {
+        std::stringstream ss;
+        ss << "Unsupported compression type : " << compression_type;
+        tinyexr::SetErrorMessage(ss.str(), err);
+        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+      }
+
+      if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
+        num_scanline_blocks = 16;
+      }
+
+    } else if (attr_name.compare("channels") == 0) {
+      // name: zero-terminated string, from 1 to 255 bytes long
+      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
+      // pLinear: unsigned char, possible values are 0 and 1
+      // reserved: three chars, should be zero
+      // xSampling: int
+      // ySampling: int
+
+      if (!tinyexr::ReadChannelInfo(channels, data)) {
+        tinyexr::SetErrorMessage("Failed to parse channel info", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+      num_channels = static_cast<int>(channels.size());
+
+      if (num_channels < 1) {
+        tinyexr::SetErrorMessage("Invalid channels format", err);
+        return TINYEXR_ERROR_INVALID_DATA;
+      }
+
+    } else if (attr_name.compare("dataWindow") == 0) {
+      memcpy(&dx, &data.at(0), sizeof(int));
+      memcpy(&dy, &data.at(4), sizeof(int));
+      memcpy(&dw, &data.at(8), sizeof(int));
+      memcpy(&dh, &data.at(12), sizeof(int));
+      tinyexr::swap4(&dx);
+      tinyexr::swap4(&dy);
+      tinyexr::swap4(&dw);
+      tinyexr::swap4(&dh);
+
+    } else if (attr_name.compare("displayWindow") == 0) {
+      int x;
+      int y;
+      int w;
+      int h;
+      memcpy(&x, &data.at(0), sizeof(int));
+      memcpy(&y, &data.at(4), sizeof(int));
+      memcpy(&w, &data.at(8), sizeof(int));
+      memcpy(&h, &data.at(12), sizeof(int));
+      tinyexr::swap4(&x);
+      tinyexr::swap4(&y);
+      tinyexr::swap4(&w);
+      tinyexr::swap4(&h);
+    }
+  }
+
+  TINYEXR_CHECK_AND_RETURN_C(dx >= 0, TINYEXR_ERROR_INVALID_DATA);
+  TINYEXR_CHECK_AND_RETURN_C(dy >= 0, TINYEXR_ERROR_INVALID_DATA);
+  TINYEXR_CHECK_AND_RETURN_C(dw >= 0, TINYEXR_ERROR_INVALID_DATA);
+  TINYEXR_CHECK_AND_RETURN_C(dh >= 0, TINYEXR_ERROR_INVALID_DATA);
+  TINYEXR_CHECK_AND_RETURN_C(num_channels >= 1, TINYEXR_ERROR_INVALID_DATA);
+
+  int data_width = dw - dx + 1;
+  int data_height = dh - dy + 1;
+
+  // Read offset tables.
+  int num_blocks = data_height / num_scanline_blocks;
+  if (num_blocks * num_scanline_blocks < data_height) {
+    num_blocks++;
+  }
+
+  std::vector<tinyexr::tinyexr_int64> offsets(static_cast<size_t>(num_blocks));
+
+  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
+    tinyexr::tinyexr_int64 offset;
+    memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64));
+    tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offset));
+    marker += sizeof(tinyexr::tinyexr_int64);  // = 8
+    offsets[y] = offset;
+  }
+
+#if TINYEXR_USE_PIZ
+  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) {
+#else
+  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
+      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
+#endif
+    // OK
+  } else {
+    tinyexr::SetErrorMessage("Unsupported compression format", err);
+    return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
+  }
+
+  deep_image->image = static_cast<float ***>(
+      malloc(sizeof(float **) * static_cast<size_t>(num_channels)));
+  for (int c = 0; c < num_channels; c++) {
+    deep_image->image[c] = static_cast<float **>(
+        malloc(sizeof(float *) * static_cast<size_t>(data_height)));
+    for (int y = 0; y < data_height; y++) {
+    }
+  }
+
+  deep_image->offset_table = static_cast<int **>(
+      malloc(sizeof(int *) * static_cast<size_t>(data_height)));
+  for (int y = 0; y < data_height; y++) {
+    deep_image->offset_table[y] = static_cast<int *>(
+        malloc(sizeof(int) * static_cast<size_t>(data_width)));
+  }
+
+  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
+    const unsigned char *data_ptr =
+        reinterpret_cast<const unsigned char *>(head + offsets[y]);
+
+    // int: y coordinate
+    // int64: packed size of pixel offset table
+    // int64: packed size of sample data
+    // int64: unpacked size of sample data
+    // compressed pixel offset table
+    // compressed sample data
+    int line_no;
+    tinyexr::tinyexr_int64 packedOffsetTableSize;
+    tinyexr::tinyexr_int64 packedSampleDataSize;
+    tinyexr::tinyexr_int64 unpackedSampleDataSize;
+    memcpy(&line_no, data_ptr, sizeof(int));
+    memcpy(&packedOffsetTableSize, data_ptr + 4,
+           sizeof(tinyexr::tinyexr_int64));
+    memcpy(&packedSampleDataSize, data_ptr + 12,
+           sizeof(tinyexr::tinyexr_int64));
+    memcpy(&unpackedSampleDataSize, data_ptr + 20,
+           sizeof(tinyexr::tinyexr_int64));
+
+    tinyexr::swap4(&line_no);
+    tinyexr::swap8(
+        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedOffsetTableSize));
+    tinyexr::swap8(
+        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedSampleDataSize));
+    tinyexr::swap8(
+        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&unpackedSampleDataSize));
+
+    std::vector<int> pixelOffsetTable(static_cast<size_t>(data_width));
+
+    // decode pixel offset table.
+    {
+      unsigned long dstLen =
+          static_cast<unsigned long>(pixelOffsetTable.size() * sizeof(int));
+      if (!tinyexr::DecompressZip(
+              reinterpret_cast<unsigned char *>(&pixelOffsetTable.at(0)),
+              &dstLen, data_ptr + 28,
+              static_cast<unsigned long>(packedOffsetTableSize))) {
+        return false;
+      }
+
+      TINYEXR_CHECK_AND_RETURN_C(dstLen == pixelOffsetTable.size() * sizeof(int), TINYEXR_ERROR_INVALID_DATA);
+      for (size_t i = 0; i < static_cast<size_t>(data_width); i++) {
+        deep_image->offset_table[y][i] = pixelOffsetTable[i];
+      }
+    }
+
+    std::vector<unsigned char> sample_data(
+        static_cast<size_t>(unpackedSampleDataSize));
+
+    // decode sample data.
+    {
+      unsigned long dstLen = static_cast<unsigned long>(unpackedSampleDataSize);
+      if (dstLen) {
+        if (!tinyexr::DecompressZip(
+                reinterpret_cast<unsigned char *>(&sample_data.at(0)), &dstLen,
+                data_ptr + 28 + packedOffsetTableSize,
+                static_cast<unsigned long>(packedSampleDataSize))) {
+          return false;
+        }
+        TINYEXR_CHECK_AND_RETURN_C(dstLen == static_cast<unsigned long>(unpackedSampleDataSize), TINYEXR_ERROR_INVALID_DATA);
+      }
+    }
+
+    // decode sample
+    int sampleSize = -1;
+    std::vector<int> channel_offset_list(static_cast<size_t>(num_channels));
+    {
+      int channel_offset = 0;
+      for (size_t i = 0; i < static_cast<size_t>(num_channels); i++) {
+        channel_offset_list[i] = channel_offset;
+        if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) {  // UINT
+          channel_offset += 4;
+        } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) {  // half
+          channel_offset += 2;
+        } else if (channels[i].pixel_type ==
+                   TINYEXR_PIXELTYPE_FLOAT) {  // float
+          channel_offset += 4;
+        } else {
+          tinyexr::SetErrorMessage("Invalid pixel_type in chnnels.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+      }
+      sampleSize = channel_offset;
+    }
+    TINYEXR_CHECK_AND_RETURN_C(sampleSize >= 2, TINYEXR_ERROR_INVALID_DATA);
+
+    TINYEXR_CHECK_AND_RETURN_C(static_cast<size_t>(
+               pixelOffsetTable[static_cast<size_t>(data_width - 1)] *
+               sampleSize) == sample_data.size(), TINYEXR_ERROR_INVALID_DATA);
+    int samples_per_line = static_cast<int>(sample_data.size()) / sampleSize;
+
+    //
+    // Alloc memory
+    //
+
+    //
+    // pixel data is stored as image[channels][pixel_samples]
+    //
+    {
+      tinyexr::tinyexr_uint64 data_offset = 0;
+      for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+        deep_image->image[c][y] = static_cast<float *>(
+            malloc(sizeof(float) * static_cast<size_t>(samples_per_line)));
+
+        if (channels[c].pixel_type == 0) {  // UINT
+          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
+            unsigned int ui;
+            unsigned int *src_ptr = reinterpret_cast<unsigned int *>(
+                &sample_data.at(size_t(data_offset) + x * sizeof(int)));
+            tinyexr::cpy4(&ui, src_ptr);
+            deep_image->image[c][y][x] = static_cast<float>(ui);  // @fixme
+          }
+          data_offset +=
+              sizeof(unsigned int) * static_cast<size_t>(samples_per_line);
+        } else if (channels[c].pixel_type == 1) {  // half
+          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
+            tinyexr::FP16 f16;
+            const unsigned short *src_ptr = reinterpret_cast<unsigned short *>(
+                &sample_data.at(size_t(data_offset) + x * sizeof(short)));
+            tinyexr::cpy2(&(f16.u), src_ptr);
+            tinyexr::FP32 f32 = half_to_float(f16);
+            deep_image->image[c][y][x] = f32.f;
+          }
+          data_offset += sizeof(short) * static_cast<size_t>(samples_per_line);
+        } else {  // float
+          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
+            float f;
+            const float *src_ptr = reinterpret_cast<float *>(
+                &sample_data.at(size_t(data_offset) + x * sizeof(float)));
+            tinyexr::cpy4(&f, src_ptr);
+            deep_image->image[c][y][x] = f;
+          }
+          data_offset += sizeof(float) * static_cast<size_t>(samples_per_line);
+        }
+      }
+    }
+  }  // y
+
+  deep_image->width = data_width;
+  deep_image->height = data_height;
+
+  deep_image->channel_names = static_cast<const char **>(
+      malloc(sizeof(const char *) * static_cast<size_t>(num_channels)));
+  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+#ifdef _WIN32
+    deep_image->channel_names[c] = _strdup(channels[c].name.c_str());
+#else
+    deep_image->channel_names[c] = strdup(channels[c].name.c_str());
+#endif
+  }
+  deep_image->num_channels = num_channels;
+
+  return TINYEXR_SUCCESS;
+}
+
+void InitEXRImage(EXRImage *exr_image) {
+  if (exr_image == NULL) {
+    return;
+  }
+
+  exr_image->width = 0;
+  exr_image->height = 0;
+  exr_image->num_channels = 0;
+
+  exr_image->images = NULL;
+  exr_image->tiles = NULL;
+  exr_image->next_level = NULL;
+  exr_image->level_x = 0;
+  exr_image->level_y = 0;
+
+  exr_image->num_tiles = 0;
+}
+
+void FreeEXRErrorMessage(const char *msg) {
+  if (msg) {
+    free(reinterpret_cast<void *>(const_cast<char *>(msg)));
+  }
+  return;
+}
+
+void InitEXRHeader(EXRHeader *exr_header) {
+  if (exr_header == NULL) {
+    return;
+  }
+
+  memset(exr_header, 0, sizeof(EXRHeader));
+}
+
+int FreeEXRHeader(EXRHeader *exr_header) {
+  if (exr_header == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (exr_header->channels) {
+    free(exr_header->channels);
+  }
+
+  if (exr_header->pixel_types) {
+    free(exr_header->pixel_types);
+  }
+
+  if (exr_header->requested_pixel_types) {
+    free(exr_header->requested_pixel_types);
+  }
+
+  for (int i = 0; i < exr_header->num_custom_attributes; i++) {
+    if (exr_header->custom_attributes[i].value) {
+      free(exr_header->custom_attributes[i].value);
+    }
+  }
+
+  if (exr_header->custom_attributes) {
+    free(exr_header->custom_attributes);
+  }
+
+  EXRSetNameAttr(exr_header, NULL);
+
+  return TINYEXR_SUCCESS;
+}
+
+void EXRSetNameAttr(EXRHeader* exr_header, const char* name) {
+  if (exr_header == NULL) {
+    return;
+  }
+  memset(exr_header->name, 0, 256);
+  if (name != NULL) {
+    size_t len = std::min(strlen(name), size_t(255));
+    if (len) {
+      memcpy(exr_header->name, name, len);
+    }
+  }
+}
+
+int EXRNumLevels(const EXRImage* exr_image) {
+  if (exr_image == NULL) return 0;
+  if(exr_image->images) return 1; // scanlines
+  int levels = 1;
+  const EXRImage* level_image = exr_image;
+  while((level_image = level_image->next_level)) ++levels;
+  return levels;
+}
+
+int FreeEXRImage(EXRImage *exr_image) {
+  if (exr_image == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (exr_image->next_level) {
+    FreeEXRImage(exr_image->next_level);
+    delete exr_image->next_level;
+  }
+
+  for (int i = 0; i < exr_image->num_channels; i++) {
+    if (exr_image->images && exr_image->images[i]) {
+      free(exr_image->images[i]);
+    }
+  }
+
+  if (exr_image->images) {
+    free(exr_image->images);
+  }
+
+  if (exr_image->tiles) {
+    for (int tid = 0; tid < exr_image->num_tiles; tid++) {
+      for (int i = 0; i < exr_image->num_channels; i++) {
+        if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) {
+          free(exr_image->tiles[tid].images[i]);
+        }
+      }
+      if (exr_image->tiles[tid].images) {
+        free(exr_image->tiles[tid].images);
+      }
+    }
+    free(exr_image->tiles);
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version,
+                           const char *filename, const char **err) {
+  if (exr_header == NULL || exr_version == NULL || filename == NULL) {
+    tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile",
+                             err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  MemoryMappedFile file(filename);
+  if (!file.valid()) {
+    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  return ParseEXRHeaderFromMemory(exr_header, exr_version, file.data, file.size,
+                                  err);
+}
+
+int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers,
+                                      int *num_headers,
+                                      const EXRVersion *exr_version,
+                                      const unsigned char *memory, size_t size,
+                                      const char **err) {
+  if (memory == NULL || exr_headers == NULL || num_headers == NULL ||
+      exr_version == NULL) {
+    // Invalid argument
+    tinyexr::SetErrorMessage(
+        "Invalid argument for ParseEXRMultipartHeaderFromMemory", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (size < tinyexr::kEXRVersionSize) {
+    tinyexr::SetErrorMessage("Data size too short", err);
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
+  size_t marker_size = size - tinyexr::kEXRVersionSize;
+
+  std::vector<tinyexr::HeaderInfo> infos;
+
+  for (;;) {
+    tinyexr::HeaderInfo info;
+    info.clear();
+
+    std::string err_str;
+    bool empty_header = false;
+    int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str,
+                             marker, marker_size);
+
+    if (ret != TINYEXR_SUCCESS) {
+
+      // Free malloc-allocated memory here.
+      for (size_t i = 0; i < info.attributes.size(); i++) {
+        if (info.attributes[i].value) {
+          free(info.attributes[i].value);
+        }
+      }
+
+      tinyexr::SetErrorMessage(err_str, err);
+      return ret;
+    }
+
+    if (empty_header) {
+      marker += 1;  // skip '\0'
+      break;
+    }
+
+    // `chunkCount` must exist in the header.
+    if (info.chunk_count == 0) {
+
+      // Free malloc-allocated memory here.
+      for (size_t i = 0; i < info.attributes.size(); i++) {
+        if (info.attributes[i].value) {
+          free(info.attributes[i].value);
+        }
+      }
+
+      tinyexr::SetErrorMessage(
+          "`chunkCount' attribute is not found in the header.", err);
+      return TINYEXR_ERROR_INVALID_DATA;
+    }
+
+    infos.push_back(info);
+
+    // move to next header.
+    marker += info.header_len;
+    size -= info.header_len;
+  }
+
+  // allocate memory for EXRHeader and create array of EXRHeader pointers.
+  (*exr_headers) =
+      static_cast<EXRHeader **>(malloc(sizeof(EXRHeader *) * infos.size()));
+
+
+  int retcode = TINYEXR_SUCCESS;
+
+  for (size_t i = 0; i < infos.size(); i++) {
+    EXRHeader *exr_header = static_cast<EXRHeader *>(malloc(sizeof(EXRHeader)));
+    memset(exr_header, 0, sizeof(EXRHeader));
+
+    std::string warn;
+    std::string _err;
+    if (!ConvertHeader(exr_header, infos[i], &warn, &_err)) {
+
+      // Free malloc-allocated memory here.
+      for (size_t k = 0; k < infos[i].attributes.size(); k++) {
+        if (infos[i].attributes[k].value) {
+          free(infos[i].attributes[k].value);
+        }
+      }
+
+      if (!_err.empty()) {
+        tinyexr::SetErrorMessage(
+            _err, err);
+      }
+      // continue to converting headers
+      retcode = TINYEXR_ERROR_INVALID_HEADER;
+    }
+
+    exr_header->multipart = exr_version->multipart ? 1 : 0;
+
+    (*exr_headers)[i] = exr_header;
+  }
+
+  (*num_headers) = static_cast<int>(infos.size());
+
+  return retcode;
+}
+
+int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers,
+                                    const EXRVersion *exr_version,
+                                    const char *filename, const char **err) {
+  if (exr_headers == NULL || num_headers == NULL || exr_version == NULL ||
+      filename == NULL) {
+    tinyexr::SetErrorMessage(
+        "Invalid argument for ParseEXRMultipartHeaderFromFile()", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  MemoryMappedFile file(filename);
+  if (!file.valid()) {
+    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  return ParseEXRMultipartHeaderFromMemory(
+      exr_headers, num_headers, exr_version, file.data, file.size, err);
+}
+
+int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
+                              size_t size) {
+  if (version == NULL || memory == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (size < tinyexr::kEXRVersionSize) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  const unsigned char *marker = memory;
+
+  // Header check.
+  {
+    const char header[] = {0x76, 0x2f, 0x31, 0x01};
+
+    if (memcmp(marker, header, 4) != 0) {
+      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
+    }
+    marker += 4;
+  }
+
+  version->tiled = false;
+  version->long_name = false;
+  version->non_image = false;
+  version->multipart = false;
+
+  // Parse version header.
+  {
+    // must be 2
+    if (marker[0] != 2) {
+      return TINYEXR_ERROR_INVALID_EXR_VERSION;
+    }
+
+    if (version == NULL) {
+      return TINYEXR_SUCCESS;  // May OK
+    }
+
+    version->version = 2;
+
+    if (marker[1] & 0x2) {  // 9th bit
+      version->tiled = true;
+    }
+    if (marker[1] & 0x4) {  // 10th bit
+      version->long_name = true;
+    }
+    if (marker[1] & 0x8) {        // 11th bit
+      version->non_image = true;  // (deep image)
+    }
+    if (marker[1] & 0x10) {  // 12th bit
+      version->multipart = true;
+    }
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) {
+  if (filename == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  FILE *fp = NULL;
+#ifdef _WIN32
+#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang.
+  errno_t err = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb");
+  if (err != 0) {
+    // TODO(syoyo): return wfopen_s erro code
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+#else
+  // Unknown compiler or MinGW without MINGW_HAS_SECURE_API.
+  fp = fopen(filename, "rb");
+#endif
+#else
+  fp = fopen(filename, "rb");
+#endif
+  if (!fp) {
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  // Try to read kEXRVersionSize bytes; if the file is shorter than
+  // kEXRVersionSize, this will produce an error. This avoids a call to
+  // fseek(fp, 0, SEEK_END), which is not required to be supported by C
+  // implementations.
+  unsigned char buf[tinyexr::kEXRVersionSize];
+  size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp);
+  fclose(fp);
+
+  if (ret != tinyexr::kEXRVersionSize) {
+    return TINYEXR_ERROR_INVALID_FILE;
+  }
+
+  return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize);
+}
+
+int LoadEXRMultipartImageFromMemory(EXRImage *exr_images,
+                                    const EXRHeader **exr_headers,
+                                    unsigned int num_parts,
+                                    const unsigned char *memory,
+                                    const size_t size, const char **err) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
+      memory == NULL || (size <= tinyexr::kEXRVersionSize)) {
+    tinyexr::SetErrorMessage(
+        "Invalid argument for LoadEXRMultipartImageFromMemory()", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  // compute total header size.
+  size_t total_header_size = 0;
+  for (unsigned int i = 0; i < num_parts; i++) {
+    if (exr_headers[i]->header_len == 0) {
+      tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err);
+      return TINYEXR_ERROR_INVALID_ARGUMENT;
+    }
+
+    total_header_size += exr_headers[i]->header_len;
+  }
+
+  const char *marker = reinterpret_cast<const char *>(
+      memory + total_header_size + 4 +
+      4);  // +8 for magic number and version header.
+
+  marker += 1;  // Skip empty header.
+
+  // NOTE 1:
+  //   In multipart image, There is 'part number' before chunk data.
+  //   4 byte : part number
+  //   4+     : chunk
+  //
+  // NOTE 2:
+  //   EXR spec says 'part number' is 'unsigned long' but actually this is
+  //   'unsigned int(4 bytes)' in OpenEXR implementation...
+  //   http://www.openexr.com/openexrfilelayout.pdf
+
+  // Load chunk offset table.
+  std::vector<tinyexr::OffsetData> chunk_offset_table_list;
+  chunk_offset_table_list.reserve(num_parts);
+  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
+    chunk_offset_table_list.resize(chunk_offset_table_list.size() + 1);
+    tinyexr::OffsetData& offset_data = chunk_offset_table_list.back();
+    if (!exr_headers[i]->tiled || exr_headers[i]->tile_level_mode == TINYEXR_TILE_ONE_LEVEL) {
+      tinyexr::InitSingleResolutionOffsets(offset_data, size_t(exr_headers[i]->chunk_count));
+      std::vector<tinyexr::tinyexr_uint64>& offset_table = offset_data.offsets[0][0];
+
+      for (size_t c = 0; c < offset_table.size(); c++) {
+        tinyexr::tinyexr_uint64 offset;
+        memcpy(&offset, marker, 8);
+        tinyexr::swap8(&offset);
+
+        if (offset >= size) {
+          tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.",
+                                   err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+
+        offset_table[c] = offset + 4;  // +4 to skip 'part number'
+        marker += 8;
+      }
+    } else {
+      {
+        std::vector<int> num_x_tiles, num_y_tiles;
+        if (!tinyexr::PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) {
+          tinyexr::SetErrorMessage("Invalid tile info.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+        int num_blocks = InitTileOffsets(offset_data, exr_headers[i], num_x_tiles, num_y_tiles);
+        if (num_blocks != exr_headers[i]->chunk_count) {
+          tinyexr::SetErrorMessage("Invalid offset table size.", err);
+          return TINYEXR_ERROR_INVALID_DATA;
+        }
+      }
+      for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) {
+        for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) {
+          for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) {
+            tinyexr::tinyexr_uint64 offset;
+            memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64));
+            tinyexr::swap8(&offset);
+            if (offset >= size) {
+              tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.",
+                err);
+              return TINYEXR_ERROR_INVALID_DATA;
+            }
+            offset_data.offsets[l][dy][dx] = offset + 4; // +4 to skip 'part number'
+            marker += sizeof(tinyexr::tinyexr_uint64);  // = 8
+          }
+        }
+      }
+    }
+  }
+
+  // Decode image.
+  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
+    tinyexr::OffsetData &offset_data = chunk_offset_table_list[i];
+
+    // First check 'part number' is identical to 'i'
+    for (unsigned int l = 0; l < offset_data.offsets.size(); ++l)
+      for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy)
+        for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) {
+
+          const unsigned char *part_number_addr =
+              memory + offset_data.offsets[l][dy][dx] - 4;  // -4 to move to 'part number' field.
+          unsigned int part_no;
+          memcpy(&part_no, part_number_addr, sizeof(unsigned int));  // 4
+          tinyexr::swap4(&part_no);
+
+          if (part_no != i) {
+            tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.",
+                                     err);
+            return TINYEXR_ERROR_INVALID_DATA;
+          }
+        }
+
+    std::string e;
+    int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_data,
+                                   memory, size, &e);
+    if (ret != TINYEXR_SUCCESS) {
+      if (!e.empty()) {
+        tinyexr::SetErrorMessage(e, err);
+      }
+      return ret;
+    }
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+int LoadEXRMultipartImageFromFile(EXRImage *exr_images,
+                                  const EXRHeader **exr_headers,
+                                  unsigned int num_parts, const char *filename,
+                                  const char **err) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0) {
+    tinyexr::SetErrorMessage(
+        "Invalid argument for LoadEXRMultipartImageFromFile", err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  MemoryMappedFile file(filename);
+  if (!file.valid()) {
+    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
+    return TINYEXR_ERROR_CANT_OPEN_FILE;
+  }
+
+  return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts,
+                                         file.data, file.size, err);
+}
+
+int SaveEXRToMemory(const float *data, int width, int height, int components,
+            const int save_as_fp16, unsigned char **outbuf, const char **err) {
+
+  if ((components == 1) || components == 3 || components == 4) {
+    // OK
+  } else {
+    std::stringstream ss;
+    ss << "Unsupported component value : " << components << std::endl;
+
+    tinyexr::SetErrorMessage(ss.str(), err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  if ((width < 16) && (height < 16)) {
+    // No compression for small image.
+    header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE;
+  } else {
+    header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;
+  }
+
+  EXRImage image;
+  InitEXRImage(&image);
+
+  image.num_channels = components;
+
+  std::vector<float> images[4];
+
+  if (components == 1) {
+    images[0].resize(static_cast<size_t>(width * height));
+    memcpy(images[0].data(), data, sizeof(float) * size_t(width * height));
+  } else {
+    images[0].resize(static_cast<size_t>(width * height));
+    images[1].resize(static_cast<size_t>(width * height));
+    images[2].resize(static_cast<size_t>(width * height));
+    images[3].resize(static_cast<size_t>(width * height));
+
+    // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
+    for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
+      images[0][i] = data[static_cast<size_t>(components) * i + 0];
+      images[1][i] = data[static_cast<size_t>(components) * i + 1];
+      images[2][i] = data[static_cast<size_t>(components) * i + 2];
+      if (components == 4) {
+        images[3][i] = data[static_cast<size_t>(components) * i + 3];
+      }
+    }
+  }
+
+  float *image_ptr[4] = {0, 0, 0, 0};
+  if (components == 4) {
+    image_ptr[0] = &(images[3].at(0));  // A
+    image_ptr[1] = &(images[2].at(0));  // B
+    image_ptr[2] = &(images[1].at(0));  // G
+    image_ptr[3] = &(images[0].at(0));  // R
+  } else if (components == 3) {
+    image_ptr[0] = &(images[2].at(0));  // B
+    image_ptr[1] = &(images[1].at(0));  // G
+    image_ptr[2] = &(images[0].at(0));  // R
+  } else if (components == 1) {
+    image_ptr[0] = &(images[0].at(0));  // A
+  }
+
+  image.images = reinterpret_cast<unsigned char **>(image_ptr);
+  image.width = width;
+  image.height = height;
+
+  header.num_channels = components;
+  header.channels = static_cast<EXRChannelInfo *>(malloc(
+      sizeof(EXRChannelInfo) * static_cast<size_t>(header.num_channels)));
+  // Must be (A)BGR order, since most of EXR viewers expect this channel order.
+  if (components == 4) {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "A", 255);
+    strncpy_s(header.channels[1].name, "B", 255);
+    strncpy_s(header.channels[2].name, "G", 255);
+    strncpy_s(header.channels[3].name, "R", 255);
+#else
+    strncpy(header.channels[0].name, "A", 255);
+    strncpy(header.channels[1].name, "B", 255);
+    strncpy(header.channels[2].name, "G", 255);
+    strncpy(header.channels[3].name, "R", 255);
+#endif
+    header.channels[0].name[strlen("A")] = '\0';
+    header.channels[1].name[strlen("B")] = '\0';
+    header.channels[2].name[strlen("G")] = '\0';
+    header.channels[3].name[strlen("R")] = '\0';
+  } else if (components == 3) {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "B", 255);
+    strncpy_s(header.channels[1].name, "G", 255);
+    strncpy_s(header.channels[2].name, "R", 255);
+#else
+    strncpy(header.channels[0].name, "B", 255);
+    strncpy(header.channels[1].name, "G", 255);
+    strncpy(header.channels[2].name, "R", 255);
+#endif
+    header.channels[0].name[strlen("B")] = '\0';
+    header.channels[1].name[strlen("G")] = '\0';
+    header.channels[2].name[strlen("R")] = '\0';
+  } else {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "A", 255);
+#else
+    strncpy(header.channels[0].name, "A", 255);
+#endif
+    header.channels[0].name[strlen("A")] = '\0';
+  }
+
+  header.pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
+  header.requested_pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
+  for (int i = 0; i < header.num_channels; i++) {
+    header.pixel_types[i] =
+        TINYEXR_PIXELTYPE_FLOAT;  // pixel type of input image
+
+    if (save_as_fp16 > 0) {
+      header.requested_pixel_types[i] =
+          TINYEXR_PIXELTYPE_HALF;  // save with half(fp16) pixel format
+    } else {
+      header.requested_pixel_types[i] =
+          TINYEXR_PIXELTYPE_FLOAT;  // save with float(fp32) pixel format(i.e.
+                                    // no precision reduction)
+    }
+  }
+
+
+  unsigned char *mem_buf;
+  size_t mem_size = SaveEXRImageToMemory(&image, &header, &mem_buf, err);
+
+  if (mem_size == 0) {
+    return TINYEXR_ERROR_SERIALIZATION_FAILED;
+  }
+
+  free(header.channels);
+  free(header.pixel_types);
+  free(header.requested_pixel_types);
+
+  if (mem_size > size_t(std::numeric_limits<int>::max())) {
+    free(mem_buf);
+    return TINYEXR_ERROR_DATA_TOO_LARGE;
+  }
+
+  (*outbuf) = mem_buf;
+
+  return int(mem_size);
+}
+
+int SaveEXR(const float *data, int width, int height, int components,
+            const int save_as_fp16, const char *outfilename, const char **err) {
+  if ((components == 1) || components == 3 || components == 4) {
+    // OK
+  } else {
+    std::stringstream ss;
+    ss << "Unsupported component value : " << components << std::endl;
+
+    tinyexr::SetErrorMessage(ss.str(), err);
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  if ((width < 16) && (height < 16)) {
+    // No compression for small image.
+    header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE;
+  } else {
+    header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;
+  }
+
+  EXRImage image;
+  InitEXRImage(&image);
+
+  image.num_channels = components;
+
+  std::vector<float> images[4];
+  const size_t pixel_count =
+      static_cast<size_t>(width) * static_cast<size_t>(height);
+
+  if (components == 1) {
+    images[0].resize(pixel_count);
+    memcpy(images[0].data(), data, sizeof(float) * pixel_count);
+  } else {
+    images[0].resize(pixel_count);
+    images[1].resize(pixel_count);
+    images[2].resize(pixel_count);
+    images[3].resize(pixel_count);
+
+    // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
+    for (size_t i = 0; i < pixel_count; i++) {
+      images[0][i] = data[static_cast<size_t>(components) * i + 0];
+      images[1][i] = data[static_cast<size_t>(components) * i + 1];
+      images[2][i] = data[static_cast<size_t>(components) * i + 2];
+      if (components == 4) {
+        images[3][i] = data[static_cast<size_t>(components) * i + 3];
+      }
+    }
+  }
+
+  float *image_ptr[4] = {0, 0, 0, 0};
+  if (components == 4) {
+    image_ptr[0] = &(images[3].at(0));  // A
+    image_ptr[1] = &(images[2].at(0));  // B
+    image_ptr[2] = &(images[1].at(0));  // G
+    image_ptr[3] = &(images[0].at(0));  // R
+  } else if (components == 3) {
+    image_ptr[0] = &(images[2].at(0));  // B
+    image_ptr[1] = &(images[1].at(0));  // G
+    image_ptr[2] = &(images[0].at(0));  // R
+  } else if (components == 1) {
+    image_ptr[0] = &(images[0].at(0));  // A
+  }
+
+  image.images = reinterpret_cast<unsigned char **>(image_ptr);
+  image.width = width;
+  image.height = height;
+
+  header.num_channels = components;
+  header.channels = static_cast<EXRChannelInfo *>(malloc(
+      sizeof(EXRChannelInfo) * static_cast<size_t>(header.num_channels)));
+  // Must be (A)BGR order, since most of EXR viewers expect this channel order.
+  if (components == 4) {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "A", 255);
+    strncpy_s(header.channels[1].name, "B", 255);
+    strncpy_s(header.channels[2].name, "G", 255);
+    strncpy_s(header.channels[3].name, "R", 255);
+#else
+    strncpy(header.channels[0].name, "A", 255);
+    strncpy(header.channels[1].name, "B", 255);
+    strncpy(header.channels[2].name, "G", 255);
+    strncpy(header.channels[3].name, "R", 255);
+#endif
+    header.channels[0].name[strlen("A")] = '\0';
+    header.channels[1].name[strlen("B")] = '\0';
+    header.channels[2].name[strlen("G")] = '\0';
+    header.channels[3].name[strlen("R")] = '\0';
+  } else if (components == 3) {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "B", 255);
+    strncpy_s(header.channels[1].name, "G", 255);
+    strncpy_s(header.channels[2].name, "R", 255);
+#else
+    strncpy(header.channels[0].name, "B", 255);
+    strncpy(header.channels[1].name, "G", 255);
+    strncpy(header.channels[2].name, "R", 255);
+#endif
+    header.channels[0].name[strlen("B")] = '\0';
+    header.channels[1].name[strlen("G")] = '\0';
+    header.channels[2].name[strlen("R")] = '\0';
+  } else {
+#ifdef _MSC_VER
+    strncpy_s(header.channels[0].name, "A", 255);
+#else
+    strncpy(header.channels[0].name, "A", 255);
+#endif
+    header.channels[0].name[strlen("A")] = '\0';
+  }
+
+  header.pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
+  header.requested_pixel_types = static_cast<int *>(
+      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
+  for (int i = 0; i < header.num_channels; i++) {
+    header.pixel_types[i] =
+        TINYEXR_PIXELTYPE_FLOAT;  // pixel type of input image
+
+    if (save_as_fp16 > 0) {
+      header.requested_pixel_types[i] =
+          TINYEXR_PIXELTYPE_HALF;  // save with half(fp16) pixel format
+    } else {
+      header.requested_pixel_types[i] =
+          TINYEXR_PIXELTYPE_FLOAT;  // save with float(fp32) pixel format(i.e.
+                                    // no precision reduction)
+    }
+  }
+
+  int ret = SaveEXRImageToFile(&image, &header, outfilename, err);
+
+  free(header.channels);
+  free(header.pixel_types);
+  free(header.requested_pixel_types);
+
+  return ret;
+}
+
+#ifdef __clang__
+// zero-as-null-pointer-constant
+#pragma clang diagnostic pop
+#endif
+
+#endif  // TINYEXR_IMPLEMENTATION_DEFINED
+#endif  // TINYEXR_IMPLEMENTATION
diff --git a/include/bsppp/LumpData.h b/include/bsppp/LumpData.h
index df7d21986..20af29078 100644
--- a/include/bsppp/LumpData.h
+++ b/include/bsppp/LumpData.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <sourcepp/math/Vector.h>
+#include <sourcepp/Math.h>
 
 namespace bsppp {
 
diff --git a/include/dmxpp/structs/Value.h b/include/dmxpp/structs/Value.h
index 31c69374b..3aab46e43 100644
--- a/include/dmxpp/structs/Value.h
+++ b/include/dmxpp/structs/Value.h
@@ -1,19 +1,14 @@
 #pragma once
 
 #include <array>
-#include <concepts>
 #include <cstddef>
 #include <cstdint>
 #include <string>
 #include <string_view>
-#include <type_traits>
 #include <variant>
 #include <vector>
 
-#include <sourcepp/math/Angles.h>
-#include <sourcepp/math/Integer.h>
-#include <sourcepp/math/Matrix.h>
-#include <sourcepp/math/Vector.h>
+#include <sourcepp/Math.h>
 
 namespace dmxpp {
 
@@ -49,7 +44,7 @@ using EulerAngles = sourcepp::math::EulerAngles;
 
 using Quaternion = sourcepp::math::Quat;
 
-using Matrix4x4 = sourcepp::math::Matrix<4,4>;
+using Matrix4x4 = sourcepp::math::Mat4x4f;
 
 using Generic = std::variant<
 	Invalid,
@@ -148,33 +143,20 @@ std::string IDToString(ID id);
 
 // NOLINTNEXTLINE(*-no-recursion)
 constexpr ID stringToID(std::string_view id) {
-	if (id == "element") {
-		return ID::ELEMENT;
-	} else if (id == "int") {
-		return ID::INT;
-	} else if (id == "float") {
-		return ID::FLOAT;
-	} else if (id == "bool") {
-		return ID::BOOL;
-	} else if (id == "string") {
-		return ID::STRING;
-	} else if (id == "binary") {
-		return ID::BYTEARRAY;
-	} else if (id == "time") {
-		return ID::TIME;
-	} else if (id == "color") {
-		return ID::COLOR;
-	} else if (id == "vector2") {
-		return ID::VECTOR2;
-	} else if (id == "vector3") {
-		return ID::VECTOR3;
-	} else if (id == "vector4") {
-		return ID::VECTOR4;
-	} else if (id == "quaternion") {
-		return ID::QUATERNION;
-	} else if (id == "matrix") {
-		return ID::MATRIX_4X4;
-	} else if (id.ends_with("_array")) {
+	if (id == "element")    return ID::ELEMENT;
+	if (id == "int")        return ID::INT;
+	if (id == "float")      return ID::FLOAT;
+	if (id == "bool")       return ID::BOOL;
+	if (id == "string")     return ID::STRING;
+	if (id == "binary")     return ID::BYTEARRAY;
+	if (id == "time")       return ID::TIME;
+	if (id == "color")      return ID::COLOR;
+	if (id == "vector2")    return ID::VECTOR2;
+	if (id == "vector3")    return ID::VECTOR3;
+	if (id == "vector4")    return ID::VECTOR4;
+	if (id == "quaternion") return ID::QUATERNION;
+	if (id == "matrix")     return ID::MATRIX_4X4;
+	if (id.ends_with("_array")) {
 		return innerIDToArrayID(stringToID(id.substr(0, id.length() - 6)));
 	}
 	return ID::INVALID;
diff --git a/include/gamepp/gamepp.h b/include/gamepp/gamepp.h
index f8b57e232..f2e12e869 100644
--- a/include/gamepp/gamepp.h
+++ b/include/gamepp/gamepp.h
@@ -4,7 +4,7 @@
 #include <string>
 #include <string_view>
 
-#include <sourcepp/math/Vector.h>
+#include <sourcepp/Math.h>
 
 namespace gamepp {
 
diff --git a/include/kvpp/KV1.h b/include/kvpp/KV1.h
index eb2ff3567..378bd45fe 100644
--- a/include/kvpp/KV1.h
+++ b/include/kvpp/KV1.h
@@ -6,7 +6,6 @@
 #include <vector>
 
 #include <BufferStream.h>
-#include <sourcepp/math/Integer.h>
 #include <sourcepp/parser/Text.h>
 #include <sourcepp/FS.h>
 #include <sourcepp/String.h>
diff --git a/include/mdlpp/structs/Generic.h b/include/mdlpp/structs/Generic.h
index 6b6bf4979..c8f5c127e 100644
--- a/include/mdlpp/structs/Generic.h
+++ b/include/mdlpp/structs/Generic.h
@@ -2,8 +2,8 @@
 
 #include <string>
 
-#include <sourcepp/math/Vector.h>
 #include <sourcepp/Macros.h>
+#include <sourcepp/Math.h>
 
 namespace mdlpp {
 
diff --git a/include/mdlpp/structs/MDL.h b/include/mdlpp/structs/MDL.h
index f3066a4b1..cbad48d95 100644
--- a/include/mdlpp/structs/MDL.h
+++ b/include/mdlpp/structs/MDL.h
@@ -6,10 +6,6 @@
 #include <string>
 #include <vector>
 
-#include <sourcepp/math/Angles.h>
-#include <sourcepp/math/Matrix.h>
-#include <sourcepp/math/Vector.h>
-
 #include "Generic.h"
 
 namespace mdlpp::MDL {
@@ -24,13 +20,13 @@ struct Bone {
 	std::string name;
 
 	int32_t parent;
-	int32_t boneController[6];
+	std::array<int32_t, 6> boneController;
 	sourcepp::math::Vec3f position;
 	sourcepp::math::Quat rotationQuat;
 	sourcepp::math::Vec3f rotationEuler;
 	sourcepp::math::Vec3f positionScale;
 	sourcepp::math::Vec3f rotationScale;
-	sourcepp::math::Matrix<3,4> poseToBose;
+	sourcepp::math::Mat3x4f poseToBose;
 	sourcepp::math::Quat alignment;
 	Flags flags;
 	int32_t procType;
diff --git a/include/sourcepp/Math.h b/include/sourcepp/Math.h
new file mode 100644
index 000000000..aedcf157a
--- /dev/null
+++ b/include/sourcepp/Math.h
@@ -0,0 +1,423 @@
+#pragma once
+
+#include <bit>
+#include <concepts>
+#include <cmath>
+#include <cstdint>
+#include <type_traits>
+
+#include <half.hpp>
+
+// Numeric types are intentionally outside the sourcepp namespace
+using std::int8_t;
+using std::int16_t;
+using std::int32_t;
+using std::int64_t;
+using std::uint8_t;
+using std::uint16_t;
+using std::uint32_t;
+using std::uint64_t;
+using half_float::half;
+
+namespace sourcepp::math {
+
+template<typename T>
+concept Arithmetic = std::is_arithmetic_v<T> || std::same_as<T, half>;
+
+template<Arithmetic T>
+[[nodiscard]] constexpr T remap(T value, T l1, T h1, T l2, T h2) {
+	return l2 + (value - l1) * (h2 - l2) / (h1 - l1);
+}
+
+template<Arithmetic T>
+[[nodiscard]] constexpr T remap(T value, T h1, T h2) {
+	return value * h2 / h1;
+}
+
+[[nodiscard]] constexpr bool isPowerOf2(std::integral auto n) {
+	return n && !(n & (n - 1));
+}
+
+template<std::integral T>
+[[nodiscard]] constexpr T nearestPowerOf2(T n) {
+	if (isPowerOf2(n)) {
+		return n;
+	}
+	auto bigger = std::bit_ceil(n);
+	auto smaller = std::bit_floor(n);
+	return (n - smaller) < (bigger - n) ? smaller : bigger;
+}
+
+[[nodiscard]] constexpr uint16_t paddingForAlignment(uint16_t alignment, uint64_t n) {
+	if (const auto rest = n % alignment; rest > 0) {
+		return alignment - rest;
+	}
+	return 0;
+}
+
+template<uint8_t S, Arithmetic P>
+struct Vec {
+	static_assert(S >= 2, "Vectors must have at least two values!");
+
+	P values[S];
+
+	// By defining these constructors, the type becomes nontrivial...
+#if 1
+	constexpr Vec() = default;
+
+	template<std::convertible_to<P>... Vals>
+	requires (sizeof...(Vals) == S)
+	constexpr Vec(Vals... vals) // NOLINT(*-explicit-constructor)
+			: values{static_cast<P>(vals)...} {}
+#endif
+
+	using value_type = P;
+
+	[[nodiscard]] constexpr const P* data() const {
+		return this->values;
+	}
+
+	[[nodiscard]] constexpr P* data() {
+		return this->values;
+	}
+
+	[[nodiscard]] constexpr uint8_t size() const {
+		return S;
+	}
+
+	[[nodiscard]] constexpr P& operator[](uint8_t index) {
+		if (index < S) {
+			return this->values[index];
+		}
+		return this->operator[](index % S);
+	}
+
+	[[nodiscard]] constexpr P operator[](uint8_t index) const {
+		if (index < S) {
+			return this->values[index];
+		}
+		return this->operator[](index % S);
+	}
+
+	[[nodiscard]] constexpr Vec operator+() const {
+		return *this;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr Vec operator+(const Vec<SO, PO>& other) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			out[i] += static_cast<P>(other[i]);
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	constexpr void operator+=(const Vec<SO, PO>& other) {
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			(*this)[i] += static_cast<P>(other[i]);
+		}
+	}
+
+	[[nodiscard]] constexpr Vec operator-() const {
+		auto out = *this;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] *= -1;
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr Vec operator-(const Vec<SO, PO>& other) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			out[i] -= static_cast<P>(other[i]);
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	constexpr void operator-=(const Vec<SO, PO>& other) {
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			(*this)[i] -= static_cast<P>(other[i]);
+		}
+	}
+
+	[[nodiscard]] constexpr Vec operator*(Arithmetic auto scalar) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] *= static_cast<P>(scalar);
+		}
+		return out;
+	}
+
+	constexpr void operator*=(Arithmetic auto scalar) {
+		for (uint8_t i = 0; i < S; i++) {
+			(*this)[i] *= static_cast<P>(scalar);
+		}
+	}
+
+	[[nodiscard]] constexpr Vec operator/(Arithmetic auto scalar) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] /= static_cast<P>(scalar);
+		}
+		return out;
+	}
+
+	constexpr void operator/=(Arithmetic auto scalar) {
+		for (uint8_t i = 0; i < S; i++) {
+			(*this)[i] /= static_cast<P>(scalar);
+		}
+	}
+
+	[[nodiscard]] constexpr Vec operator%(Arithmetic auto scalar) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] %= static_cast<P>(scalar);
+		}
+		return out;
+	}
+
+	constexpr void operator%=(Arithmetic auto scalar) {
+		for (uint8_t i = 0; i < S; i++) {
+			(*this)[i] %= static_cast<P>(scalar);
+		}
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr bool operator==(const Vec<SO, PO>& other) const {
+		if constexpr (S != SO) {
+			return false;
+		} else {
+			for (uint8_t i = 0; i < S; i++) {
+				if ((*this)[i] != static_cast<P>(other[i])) {
+					return false;
+				}
+			}
+			return true;
+		}
+	}
+
+	template<uint8_t SO, Arithmetic PO = P>
+	[[nodiscard]] constexpr Vec<SO, PO> to() const {
+		Vec<SO, PO> out{};
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			out[i] = static_cast<PO>((*this)[i]);
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr Vec mul(const Vec<SO, PO>& other) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			out[i] *= static_cast<P>(other[i]);
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr Vec div(const Vec<SO, PO>& other) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			out[i] /= static_cast<P>(other[i]);
+		}
+		return out;
+	}
+
+	template<uint8_t SO, Arithmetic PO>
+	[[nodiscard]] constexpr Vec mod(const Vec<SO, PO>& other) const {
+		auto out = *this;
+		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
+			if constexpr ((std::floating_point<P> && std::floating_point<PO>) || std::floating_point<P>) {
+				out[i] = std::fmod(out[i], static_cast<P>(other[i]));
+			} else {
+				out[i] %= static_cast<P>(other[i]);
+			}
+		}
+		return out;
+	}
+
+	[[nodiscard]] constexpr float magf() const {
+		float out = 0.0;
+		for (uint8_t i = 0; i < S; i++) {
+			out += std::pow((*this)[i], 2);
+		}
+		return std::sqrt(out);
+	}
+
+	[[nodiscard]] constexpr double mag() const {
+		double out = 0.0;
+		for (uint8_t i = 0; i < S; i++) {
+			out += std::pow((*this)[i], 2);
+		}
+		return std::sqrt(out);
+	}
+
+	[[nodiscard]] constexpr P sum() const {
+		P out{};
+		for (uint8_t i = 0; i < S; i++) {
+			out += (*this)[i];
+		}
+		return out;
+	}
+
+	template<Arithmetic PO>
+	[[nodiscard]] constexpr Vec scale(const Vec<S, PO>& other) const {
+		Vec out;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] = (*this)[i] * static_cast<P>(other[i]);
+		}
+		return out;
+	}
+
+	template<Arithmetic PO>
+	[[nodiscard]] constexpr P dot(const Vec<S, PO>& other) const {
+		return this->scale(other).sum();
+	}
+
+	[[nodiscard]] constexpr Vec abs() const {
+		auto out = *this;
+		for (uint8_t i = 0; i < S; i++) {
+			out[i] = std::abs(out[i]);
+		}
+		return out;
+	}
+
+	[[nodiscard]] static constexpr Vec zero() {
+		return {};
+	}
+
+	[[nodiscard]] constexpr bool isZero() const {
+		return *this == zero();
+	}
+};
+static_assert(std::is_trivially_copyable_v<Vec<2, float>>);
+
+#define SOURCEPP_VEC_DEFINE(S) \
+	template<Arithmetic P> \
+	using Vec##S       = Vec<S, P>; \
+	using Vec##S##i8   = Vec##S<int8_t>; \
+	using Vec##S##i16  = Vec##S<int16_t>; \
+	using Vec##S##i32  = Vec##S<int32_t>; \
+	using Vec##S##i64  = Vec##S<int64_t>; \
+	using Vec##S##i    = Vec##S##i32; \
+	using Vec##S##ui8  = Vec##S<uint8_t>; \
+	using Vec##S##ui16 = Vec##S<uint16_t>; \
+	using Vec##S##ui32 = Vec##S<uint32_t>; \
+	using Vec##S##ui64 = Vec##S<uint64_t>; \
+	using Vec##S##ui   = Vec##S##ui32; \
+	using Vec##S##f16  = Vec##S<half>; \
+	using Vec##S##f32  = Vec##S<float>; \
+	using Vec##S##f64  = Vec##S<double>; \
+	using Vec##S##f    = Vec##S##f32
+
+SOURCEPP_VEC_DEFINE(2);
+SOURCEPP_VEC_DEFINE(3);
+SOURCEPP_VEC_DEFINE(4);
+
+#undef SOURCEPP_VEC_DEFINE
+
+using EulerAngles = Vec3f;
+
+using Quat = Vec4f;
+
+/// Lower precision Quat compressed to 6 bytes
+struct QuatCompressed48 {
+	uint16_t x : 16;
+	uint16_t y : 16;
+	uint16_t z : 15;
+	uint16_t wn : 1;
+
+	[[nodiscard]] Quat decompress() const {
+		// Convert from 16-bit (or 15-bit) integers to floating point values in the range [-1, 1]
+		const float fx = (static_cast<float>(this->x) / 32767.5f) - 1.f; // x / ((2^16 - 1) / 2) - 1
+		const float fy = (static_cast<float>(this->y) / 32767.5f) - 1.f; // y / ((2^16 - 1) / 2) - 1
+		const float fz = (static_cast<float>(this->z) / 16383.5f) - 1.f; // z / ((2^15 - 1) / 2) - 1
+
+		// Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1
+		float fw = std::sqrt(1.f - fx * fx - fy * fy - fz * fz);
+
+		// Adjust w based on the stored sign bit
+		if (this->wn) {
+			fw = -fw;
+		}
+
+		return {fx, fy, fz, fw};
+	}
+};
+static_assert(std::is_trivially_copyable_v<QuatCompressed48>);
+
+/// Lower precision Quat compressed to 8 bytes
+struct QuatCompressed64 {
+	uint32_t x : 21;
+	uint32_t y : 21;
+	uint32_t z : 21;
+	uint32_t wn : 1;
+
+	[[nodiscard]] Quat decompress() const {
+		// Convert from 21-bit integers to floating point values in the range [-1, 1]
+		const double fx = (static_cast<double>(this->x) / 1048575.5) - 1.0f; // x / ((2^21 - 1) / 2) - 1
+		const double fy = (static_cast<double>(this->y) / 1048575.5) - 1.0f; // y / ((2^21 - 1) / 2) - 1
+		const double fz = (static_cast<double>(this->z) / 1048575.5) - 1.0f; // z / ((2^21 - 1) / 2) - 1
+
+		// Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1
+		double fw = std::sqrt(1.0 - fx * fx - fy * fy - fz * fz);
+
+		// Adjust w based on the stored sign bit
+		if (this->wn) {
+			fw = -fw;
+		}
+
+		return {static_cast<float>(fx), static_cast<float>(fy), static_cast<float>(fz), static_cast<float>(fw)};
+	}
+};
+static_assert(std::is_trivially_copyable_v<QuatCompressed64>);
+
+template<uint8_t M, uint8_t N, Arithmetic P>
+class Mat {
+	static_assert(M >= 2, "Matrices must have at least two rows!");
+	static_assert(N >= 2, "Matrices must have at least two columns!");
+
+public:
+	[[nodiscard]] P* operator[](uint8_t i) { return this->data[i]; }
+
+	[[nodiscard]] const P* operator[](uint8_t i) const { return this->data[i]; }
+
+private:
+	P data[M][N];
+};
+static_assert(std::is_trivially_copyable_v<Mat<2, 2, float>>);
+
+#define SOURCEPP_MAT_DEFINE(M, N) \
+	template<Arithmetic P> \
+	using Mat##M##x##N       = Mat<M, N, P>; \
+	using Mat##M##x##N##i8   = Mat##M##x##N<int8_t>; \
+	using Mat##M##x##N##i16  = Mat##M##x##N<int16_t>; \
+	using Mat##M##x##N##i32  = Mat##M##x##N<int32_t>; \
+	using Mat##M##x##N##i64  = Mat##M##x##N<int64_t>; \
+	using Mat##M##x##N##i    = Mat##M##x##N##i32; \
+	using Mat##M##x##N##ui8  = Mat##M##x##N<uint8_t>; \
+	using Mat##M##x##N##ui16 = Mat##M##x##N<uint16_t>; \
+	using Mat##M##x##N##ui32 = Mat##M##x##N<uint32_t>; \
+	using Mat##M##x##N##ui64 = Mat##M##x##N<uint64_t>; \
+	using Mat##M##x##N##ui   = Mat##M##x##N##ui32; \
+	using Mat##M##x##N##f16  = Mat##M##x##N<half>; \
+	using Mat##M##x##N##f32  = Mat##M##x##N<float>; \
+	using Mat##M##x##N##f64  = Mat##M##x##N<double>; \
+	using Mat##M##x##N##f    = Mat##M##x##N##f32
+
+SOURCEPP_MAT_DEFINE(2, 2);
+SOURCEPP_MAT_DEFINE(3, 3);
+SOURCEPP_MAT_DEFINE(4, 4);
+SOURCEPP_MAT_DEFINE(2, 3);
+SOURCEPP_MAT_DEFINE(3, 2);
+SOURCEPP_MAT_DEFINE(2, 4);
+SOURCEPP_MAT_DEFINE(4, 2);
+SOURCEPP_MAT_DEFINE(3, 4);
+SOURCEPP_MAT_DEFINE(4, 3);
+
+#undef SOURCEPP_MAT_DEFINE
+
+} // namespace sourcepp::math
diff --git a/include/sourcepp/String.h b/include/sourcepp/String.h
index 51c2703ef..78b6d93f9 100644
--- a/include/sourcepp/String.h
+++ b/include/sourcepp/String.h
@@ -6,7 +6,7 @@
 #include <string_view>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace sourcepp::string {
 
diff --git a/include/sourcepp/Templates.h b/include/sourcepp/Templates.h
new file mode 100644
index 000000000..74a93c867
--- /dev/null
+++ b/include/sourcepp/Templates.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <type_traits>
+
+namespace sourcepp {
+
+namespace detail {
+
+template<typename T>
+struct member_type_helper;
+
+template<typename C, typename T>
+struct member_type_helper<T C::*> { using type = T; };
+
+} // namespace detail
+
+template<typename T>
+struct member_type : detail::member_type_helper<std::remove_cvref_t<T>> {};
+
+template<typename T>
+using member_type_t = typename member_type<T>::type;
+
+} // namespace sourcepp
diff --git a/include/sourcepp/crypto/Adler32.h b/include/sourcepp/crypto/Adler32.h
index 5d3824b25..e18c1674e 100644
--- a/include/sourcepp/crypto/Adler32.h
+++ b/include/sourcepp/crypto/Adler32.h
@@ -3,7 +3,7 @@
 #include <cstddef>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace sourcepp::crypto {
 
diff --git a/include/sourcepp/crypto/CRC32.h b/include/sourcepp/crypto/CRC32.h
index 964042536..96d0869cb 100644
--- a/include/sourcepp/crypto/CRC32.h
+++ b/include/sourcepp/crypto/CRC32.h
@@ -3,7 +3,7 @@
 #include <cstddef>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace sourcepp::crypto {
 
diff --git a/include/sourcepp/crypto/MD5.h b/include/sourcepp/crypto/MD5.h
index bc7b17f66..bfceb29cb 100644
--- a/include/sourcepp/crypto/MD5.h
+++ b/include/sourcepp/crypto/MD5.h
@@ -4,7 +4,7 @@
 #include <cstddef>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace sourcepp::crypto {
 
diff --git a/include/sourcepp/crypto/RSA.h b/include/sourcepp/crypto/RSA.h
index 44d7e913a..af3a3056e 100644
--- a/include/sourcepp/crypto/RSA.h
+++ b/include/sourcepp/crypto/RSA.h
@@ -6,7 +6,7 @@
 #include <tuple>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace sourcepp::crypto {
 
diff --git a/include/sourcepp/math/Angles.h b/include/sourcepp/math/Angles.h
deleted file mode 100644
index 168552913..000000000
--- a/include/sourcepp/math/Angles.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#pragma once
-
-#include <cmath>
-
-#include "Vector.h"
-
-namespace sourcepp::math {
-
-using EulerAngles = Vec3f;
-
-using Quat = Vec4f;
-
-/// Lower precision Quat compressed to 6 bytes
-struct QuatCompressed48 {
-	uint16_t x : 16;
-	uint16_t y : 16;
-	uint16_t z : 15;
-	uint16_t wn : 1;
-
-	[[nodiscard]] Quat decompress() const {
-		// Convert from 16-bit (or 15-bit) integers to floating point values in the range [-1, 1]
-		float fx = (static_cast<float>(this->x) / 32767.5f) - 1.f; // x / ((2^16 - 1) / 2) - 1
-		float fy = (static_cast<float>(this->y) / 32767.5f) - 1.f; // y / ((2^16 - 1) / 2) - 1
-		float fz = (static_cast<float>(this->z) / 16383.5f) - 1.f; // z / ((2^15 - 1) / 2) - 1
-
-		// Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1
-		float fw = std::sqrt(1.f - fx * fx - fy * fy - fz * fz);
-
-		// Adjust w based on the stored sign bit
-		if (this->wn) {
-			fw = -fw;
-		}
-
-		return {fx, fy, fz, fw};
-	}
-};
-
-/// Lower precision Quat compressed to 8 bytes
-struct QuatCompressed64 {
-	uint32_t x : 21;
-	uint32_t y : 21;
-	uint32_t z : 21;
-	uint32_t wn : 1;
-
-	[[nodiscard]] Quat decompress() const {
-		// Convert from 21-bit integers to floating point values in the range [-1, 1]
-		double fx = (static_cast<double>(this->x) / 1048575.5) - 1.0f; // x / ((2^21 - 1) / 2) - 1
-		double fy = (static_cast<double>(this->y) / 1048575.5) - 1.0f; // y / ((2^21 - 1) / 2) - 1
-		double fz = (static_cast<double>(this->z) / 1048575.5) - 1.0f; // z / ((2^21 - 1) / 2) - 1
-
-		// Recalculate w from the constraint that x^2 + y^2 + z^2 + w^2 = 1
-		double fw = std::sqrt(1.0 - fx * fx - fy * fy - fz * fz);
-
-		// Adjust w based on the stored sign bit
-		if (this->wn) {
-			fw = -fw;
-		}
-
-		return {static_cast<float>(fx), static_cast<float>(fy), static_cast<float>(fz), static_cast<float>(fw)};
-	}
-};
-
-} // namespace sourcepp::math
diff --git a/include/sourcepp/math/Float.h b/include/sourcepp/math/Float.h
deleted file mode 100644
index c2ae7d891..000000000
--- a/include/sourcepp/math/Float.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include "Integer.h"
-
-namespace sourcepp::math {
-
-// https://stackoverflow.com/a/60047308
-class FloatCompressed16 {
-public:
-	explicit FloatCompressed16(uint16_t in)
-			: data(in) {}
-
-	// NOLINTNEXTLINE(*-explicit-constructor)
-	FloatCompressed16(float in) { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
-		const auto b = *reinterpret_cast<uint32_t*>(&in) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
-		const auto e = (b & 0x7F800000) >> 23; // exponent
-		const auto m = b & 0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
-		this->data = (b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) | ((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) | (e > 143) * 0x7FFF; // sign : normalized : denormalized : saturate
-	}
-
-	[[nodiscard]] uint16_t toFloat16() const {
-		return this->data;
-	}
-
-	[[nodiscard]] float toFloat32() const { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
-		const uint32_t e = (this->data & 0x7C00) >> 10; // exponent
-		const uint32_t m = (this->data & 0x03FF) << 13; // mantissa
-		const auto mf = static_cast<float>(m);
-		const uint32_t v = *reinterpret_cast<const uint32_t*>(&mf) >> 23; // evil log2 bit hack to count leading zeros in denormalized format
-		const uint32_t vu = (this->data & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) | ((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000)); // sign : normalized : denormalized
-		return *reinterpret_cast<const float*>(&vu);
-	}
-
-	[[nodiscard]] float operator*() const {
-		return this->toFloat32();
-	}
-
-private:
-	uint16_t data;
-};
-
-} // namespace sourcepp::math
diff --git a/include/sourcepp/math/Integer.h b/include/sourcepp/math/Integer.h
deleted file mode 100644
index d245fc014..000000000
--- a/include/sourcepp/math/Integer.h
+++ /dev/null
@@ -1,166 +0,0 @@
-#pragma once
-
-#include <bit>
-#include <concepts>
-#include <cstdint>
-#include <type_traits>
-
-// Integer types are intentionally outside the sourcepp namespace
-using std::int8_t;
-using std::int16_t;
-using std::int32_t;
-using std::int64_t;
-using std::uint8_t;
-using std::uint16_t;
-using std::uint32_t;
-using std::uint64_t;
-
-/// 3-byte wide unsigned integer
-struct uint24_t {
-	uint24_t() = default;
-
-	template<std::integral T>
-	constexpr uint24_t(T value) // NOLINT(*-explicit-constructor)
-			: bytes{
-				static_cast<uint8_t>((value >> 16) & 0xff),
-				static_cast<uint8_t>((value >> 8)  & 0xff),
-				static_cast<uint8_t>( value        & 0xff),
-			} {}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr operator T() const { // NOLINT(*-explicit-constructor)
-		return static_cast<T>((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]);
-	}
-
-	template<std::integral T>
-	constexpr uint24_t& operator=(T value) {
-		*this = {value};
-		return *this;
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr uint24_t operator+(T value) const {
-		return {uint32_t{*this} + value};
-	}
-
-	template<std::integral T>
-	constexpr void operator+=(T value) const {
-		*this = {uint32_t{*this} + value};
-	}
-
-	constexpr uint24_t operator++() {
-		return *this = {uint32_t{*this} + 1};
-	}
-
-	constexpr uint24_t operator++(int) {
-		uint24_t out{*this};
-		*this = {uint32_t{*this} + 1};
-		return out;
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr uint24_t operator-(T value) const {
-		return {uint32_t{*this} - value};
-	}
-
-	template<std::integral T>
-	constexpr void operator-=(T value) const {
-		return *this = {uint32_t{*this} - value};
-	}
-
-	constexpr uint24_t operator--() {
-		return *this = {uint32_t{*this} - 1};
-	}
-
-	constexpr uint24_t operator--(int) {
-		uint24_t out{*this};
-		*this = {uint32_t{*this} - 1};
-		return out;
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr uint24_t operator*(T value) const {
-		return {uint32_t{*this} * value};
-	}
-
-	template<std::integral T>
-	constexpr void operator*=(T value) const {
-		*this = {uint32_t{*this} * value};
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr uint24_t operator/(T value) const {
-		return {uint32_t{*this} / value};
-	}
-
-	template<std::integral T>
-	constexpr void operator/=(T value) const {
-		*this = {uint32_t{*this} / value};
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr uint24_t operator%(T value) const {
-		return {uint32_t{*this} % value};
-	}
-
-	template<std::integral T>
-	constexpr void operator%=(T value) const {
-		*this = {uint32_t{*this} % value};
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr bool operator==(T value) const {
-		return uint32_t{*this} == value;
-	}
-
-	template<std::integral T>
-	[[nodiscard]] constexpr auto operator<=>(T value) const {
-		return uint32_t{*this} <=> value;
-	}
-
-	[[nodiscard]] constexpr operator bool() const { // NOLINT(*-explicit-constructor)
-		return static_cast<bool>(uint32_t{*this});
-	}
-
-	uint8_t bytes[3];
-};
-static_assert(sizeof(uint24_t) == 3, "uint24_t is not 3 bytes wide!");
-static_assert(std::is_trivially_copyable_v<uint24_t>, "uint24_t is not a POD type!");
-
-namespace sourcepp::math {
-
-template<typename T>
-concept Arithmetic = std::is_arithmetic_v<T> || std::same_as<T, uint24_t>;
-
-template<Arithmetic T>
-[[nodiscard]] constexpr T remap(T value, T l1, T h1, T l2, T h2) {
-	return l2 + (value - l1) * (h2 - l2) / (h1 - l1);
-}
-
-template<Arithmetic T>
-[[nodiscard]] constexpr T remap(T value, T h1, T h2) {
-	return value * h2 / h1;
-}
-
-[[nodiscard]] constexpr bool isPowerOf2(std::integral auto n) {
-	return n && !(n & (n - 1));
-}
-
-template<std::integral T>
-[[nodiscard]] constexpr T nearestPowerOf2(T n) {
-	if (math::isPowerOf2(n)) {
-		return n;
-	}
-	auto bigger = std::bit_ceil(n);
-	auto smaller = std::bit_floor(n);
-	return (n - smaller) < (bigger - n) ? smaller : bigger;
-}
-
-[[nodiscard]] constexpr uint16_t getPaddingForAlignment(uint16_t alignment, uint64_t n) {
-	if (const auto rest = n % alignment; rest > 0) {
-		return alignment - rest;
-	}
-	return 0;
-}
-
-} // namespace sourcepp::math
diff --git a/include/sourcepp/math/Matrix.h b/include/sourcepp/math/Matrix.h
deleted file mode 100644
index 5a03ae4da..000000000
--- a/include/sourcepp/math/Matrix.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include "Integer.h"
-
-namespace sourcepp::math {
-
-template<uint8_t M, uint8_t N, Arithmetic P = float>
-class Matrix {
-public:
-	[[nodiscard]] P* operator[](uint8_t i) { return this->data[i]; }
-
-	[[nodiscard]] const P* operator[](uint8_t i) const { return this->data[i]; }
-
-private:
-	P data[M][N];
-};
-
-} // namespace sourcepp::math
diff --git a/include/sourcepp/math/Vector.h b/include/sourcepp/math/Vector.h
deleted file mode 100644
index b50ad76fd..000000000
--- a/include/sourcepp/math/Vector.h
+++ /dev/null
@@ -1,304 +0,0 @@
-#pragma once
-
-#include <array>
-#include <cmath>
-
-#include "Integer.h"
-
-namespace sourcepp::math {
-
-template<uint8_t S, Arithmetic P>
-struct Vec {
-	static_assert(S >= 2, "Vectors must have at least two values!");
-
-	std::array<P, S> values;
-
-	// By defining these constructors, the type becomes nontrivial...
-#if 0
-	constexpr Vec() = default;
-
-	constexpr explicit Vec(values_type vals)
-			: values{vals} {}
-
-	template<std::convertible_to<P>... Vals>
-	requires (sizeof...(Vals) == S)
-	constexpr Vec(Vals... vals) // NOLINT(*-explicit-constructor)
-			: values{static_cast<P>(vals)...} {}
-#endif
-
-	using value_type = P;
-
-	[[nodiscard]] consteval uint8_t size() const {
-		return S;
-	}
-
-	[[nodiscard]] constexpr P& operator[](uint8_t index) {
-		if (index < S) {
-			return this->values[index];
-		}
-		return this->operator[](index % S);
-	}
-
-	[[nodiscard]] constexpr P operator[](uint8_t index) const {
-		if (index < S) {
-			return this->values[index];
-		}
-		return this->operator[](index % S);
-	}
-
-	[[nodiscard]] constexpr Vec operator+() const {
-		return *this;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr Vec operator+(const Vec<SO, PO>& other) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			out[i] += static_cast<P>(other[i]);
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	constexpr void operator+=(const Vec<SO, PO>& other) {
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			(*this)[i] += static_cast<P>(other[i]);
-		}
-	}
-
-	[[nodiscard]] constexpr Vec operator-() const {
-		auto out = *this;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] *= -1;
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr Vec operator-(const Vec<SO, PO>& other) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			out[i] -= static_cast<P>(other[i]);
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	constexpr void operator-=(const Vec<SO, PO>& other) {
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			(*this)[i] -= static_cast<P>(other[i]);
-		}
-	}
-
-	[[nodiscard]] constexpr Vec operator*(Arithmetic auto scalar) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] *= static_cast<P>(scalar);
-		}
-		return out;
-	}
-
-	constexpr void operator*=(Arithmetic auto scalar) {
-		for (uint8_t i = 0; i < S; i++) {
-			(*this)[i] *= static_cast<P>(scalar);
-		}
-	}
-
-	[[nodiscard]] constexpr Vec operator/(Arithmetic auto scalar) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] /= static_cast<P>(scalar);
-		}
-		return out;
-	}
-
-	constexpr void operator/=(Arithmetic auto scalar) {
-		for (uint8_t i = 0; i < S; i++) {
-			(*this)[i] /= static_cast<P>(scalar);
-		}
-	}
-
-	[[nodiscard]] constexpr Vec operator%(Arithmetic auto scalar) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] %= static_cast<P>(scalar);
-		}
-		return out;
-	}
-
-	constexpr void operator%=(Arithmetic auto scalar) {
-		for (uint8_t i = 0; i < S; i++) {
-			(*this)[i] %= static_cast<P>(scalar);
-		}
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr bool operator==(const Vec<SO, PO>& other) const {
-		if constexpr (S != SO) {
-			return false;
-		} else {
-			for (uint8_t i = 0; i < S; i++) {
-				if ((*this)[i] != static_cast<P>(other[i])) {
-					return false;
-				}
-			}
-			return true;
-		}
-	}
-
-	template<uint8_t SO, Arithmetic PO = P>
-	[[nodiscard]] constexpr Vec<SO, PO> to() const {
-		Vec<SO, PO> out{};
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			out[i] = static_cast<PO>((*this)[i]);
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr Vec mul(const Vec<SO, PO>& other) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			out[i] *= static_cast<P>(other[i]);
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr Vec div(const Vec<SO, PO>& other) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			out[i] /= static_cast<P>(other[i]);
-		}
-		return out;
-	}
-
-	template<uint8_t SO, Arithmetic PO>
-	[[nodiscard]] constexpr Vec mod(const Vec<SO, PO>& other) const {
-		auto out = *this;
-		for (uint8_t i = 0; i < (S > SO ? SO : S); i++) {
-			if constexpr ((std::floating_point<P> && std::floating_point<PO>) || std::floating_point<P>) {
-				out[i] = std::fmod(out[i], static_cast<P>(other[i]));
-			} else {
-				out[i] %= static_cast<P>(other[i]);
-			}
-		}
-		return out;
-	}
-
-	[[nodiscard]] constexpr float magf() const {
-		float out = 0.0;
-		for (uint8_t i = 0; i < S; i++) {
-			out += std::pow((*this)[i], 2);
-		}
-		return std::sqrt(out);
-	}
-
-	[[nodiscard]] constexpr double mag() const {
-		double out = 0.0;
-		for (uint8_t i = 0; i < S; i++) {
-			out += std::pow((*this)[i], 2);
-		}
-		return std::sqrt(out);
-	}
-
-	[[nodiscard]] constexpr P sum() const {
-		P out{};
-		for (uint8_t i = 0; i < S; i++) {
-			out += (*this)[i];
-		}
-		return out;
-	}
-
-	template<Arithmetic PO>
-	[[nodiscard]] constexpr Vec scale(const Vec<S, PO>& other) const {
-		Vec out;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] = (*this)[i] * static_cast<P>(other[i]);
-		}
-		return out;
-	}
-
-	template<Arithmetic PO>
-	[[nodiscard]] constexpr P dot(const Vec<S, PO>& other) const {
-		return this->scale(other).sum();
-	}
-
-	[[nodiscard]] constexpr Vec abs() const {
-		auto out = *this;
-		for (uint8_t i = 0; i < S; i++) {
-			out[i] = std::abs(out[i]);
-		}
-		return out;
-	}
-
-	[[nodiscard]] static constexpr Vec zero() {
-		return {};
-	}
-
-	[[nodiscard]] constexpr bool isZero() const {
-		return *this == zero();
-	}
-};
-
-template<Arithmetic P>
-using Vec2 = Vec<2, P>;
-
-using Vec2i8 = Vec2<int8_t>;
-using Vec2i16 = Vec2<int16_t>;
-using Vec2i32 = Vec2<int32_t>;
-using Vec2i64 = Vec2<int64_t>;
-using Vec2i = Vec2i32;
-
-using Vec2ui8 = Vec2<uint8_t>;
-using Vec2ui16 = Vec2<uint16_t>;
-using Vec2ui24 = Vec2<uint24_t>;
-using Vec2ui32 = Vec2<uint32_t>;
-using Vec2ui64 = Vec2<uint64_t>;
-using Vec2ui = Vec2ui32;
-
-using Vec2f32 = Vec2<float>;
-using Vec2f64 = Vec2<double>;
-using Vec2f = Vec2f32;
-
-template<Arithmetic P>
-using Vec3 = Vec<3, P>;
-
-using Vec3i8 = Vec3<int8_t>;
-using Vec3i16 = Vec3<int16_t>;
-using Vec3i32 = Vec3<int32_t>;
-using Vec3i64 = Vec3<int64_t>;
-using Vec3i = Vec3i32;
-
-using Vec3ui8 = Vec3<uint8_t>;
-using Vec3ui16 = Vec3<uint16_t>;
-using Vec3ui24 = Vec3<uint24_t>;
-using Vec3ui32 = Vec3<uint32_t>;
-using Vec3ui64 = Vec3<uint64_t>;
-using Vec3ui = Vec3ui32;
-
-using Vec3f32 = Vec3<float>;
-using Vec3f64 = Vec3<double>;
-using Vec3f = Vec3f32;
-
-template<Arithmetic P>
-using Vec4 = Vec<4, P>;
-
-using Vec4i8 = Vec4<int8_t>;
-using Vec4i16 = Vec4<int16_t>;
-using Vec4i32 = Vec4<int32_t>;
-using Vec4i64 = Vec4<int64_t>;
-using Vec4i = Vec4i32;
-
-using Vec4ui8 = Vec4<uint8_t>;
-using Vec4ui16 = Vec4<uint16_t>;
-using Vec4ui24 = Vec4<uint24_t>;
-using Vec4ui32 = Vec4<uint32_t>;
-using Vec4ui64 = Vec4<uint64_t>;
-using Vec4ui = Vec4ui32;
-
-using Vec4f32 = Vec4<float>;
-using Vec4f64 = Vec4<double>;
-using Vec4f = Vec4f32;
-
-} // namespace sourcepp::math
diff --git a/include/sourcepp/parser/Binary.h b/include/sourcepp/parser/Binary.h
index 163323b99..11e5b8c16 100644
--- a/include/sourcepp/parser/Binary.h
+++ b/include/sourcepp/parser/Binary.h
@@ -3,9 +3,8 @@
 #include <cstddef>
 #include <fstream>
 #include <string>
-#include <string_view>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 class BufferStream;
 
diff --git a/include/steampp/steampp.h b/include/steampp/steampp.h
index 374d07c2a..23b02f7d4 100644
--- a/include/steampp/steampp.h
+++ b/include/steampp/steampp.h
@@ -10,7 +10,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace steampp {
 
diff --git a/include/toolpp/CmdSeq.h b/include/toolpp/CmdSeq.h
index 8e69d9f19..695bead1f 100644
--- a/include/toolpp/CmdSeq.h
+++ b/include/toolpp/CmdSeq.h
@@ -1,11 +1,10 @@
 #pragma once
 
 #include <cstddef>
-#include <optional>
 #include <string>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace toolpp {
 
@@ -20,9 +19,12 @@ class CmdSeq {
 			COPY_FILE = 257,
 			DELETE_FILE = 258,
 			RENAME_FILE = 259,
-			COPY_FILE_IF_EXISTS_ALT = 260,
+			// This used to be a different thing - Strata changes it to be an alias for 261
+			//COPY_FILE_IF_EXISTS_ALT = 260,
 			COPY_FILE_IF_EXISTS = 261,
 		} special;
+		static constexpr auto SPECIAL_COPY_FILE_IF_EXISTS_ALIAS = static_cast<Special>(260);
+
 		std::string executable;
 		std::string arguments;
 
@@ -32,6 +34,10 @@ class CmdSeq {
 		bool useProcessWindow;
 
 		bool waitForKeypress;
+
+		[[nodiscard]] static std::string getSpecialDisplayNameFor(Special special);
+
+		[[nodiscard]] std::string getExecutableDisplayName() const;
 	};
 
 	struct Sequence {
@@ -39,7 +45,21 @@ class CmdSeq {
 		std::vector<Command> commands;
 	};
 
-	explicit CmdSeq(std::string path_);
+	enum class Type {
+		INVALID,
+		BINARY,
+		KEYVALUES_STRATA,
+	};
+
+	explicit CmdSeq(const std::string& path);
+
+	explicit CmdSeq(Type type_);
+
+	[[nodiscard]] explicit operator bool() const;
+
+	[[nodiscard]] Type getType() const;
+
+	void setType(Type type_);
 
 	[[nodiscard]] float getVersion() const;
 
@@ -51,20 +71,19 @@ class CmdSeq {
 
 	[[nodiscard]] std::vector<std::byte> bake() const;
 
-	[[nodiscard]] std::vector<std::byte> bake(bool overrideUsingKeyValues) const;
-
-	bool bake(const std::string& path_);
-
-	bool bake(const std::string& path_, bool overrideUsingKeyValues);
+	bool bake(const std::string& path) const; // NOLINT(*-use-nodiscard)
 
 protected:
 	void parseBinary(const std::string& path);
 
-	void parseKeyValues(const std::string& path);
+	void parseKeyValuesStrata(const std::string& path);
+
+	[[nodiscard]] std::vector<std::byte> bakeBinary() const;
+
+	[[nodiscard]] std::vector<std::byte> bakeKeyValuesStrata() const;
 
-	bool usingKeyValues = false;
+	Type type;
 	float version;
-	std::string path;
 	std::vector<Sequence> sequences;
 };
 
diff --git a/include/toolpp/FGD.h b/include/toolpp/FGD.h
index bb36c53d1..28b1a4e81 100644
--- a/include/toolpp/FGD.h
+++ b/include/toolpp/FGD.h
@@ -7,7 +7,7 @@
 #include <vector>
 
 #include <BufferStream.h>
-#include <sourcepp/math/Vector.h>
+#include <sourcepp/Math.h>
 
 namespace toolpp {
 
@@ -187,11 +187,11 @@ class FGDWriter {
 
 	AutoVisGroupWriter beginAutoVisGroup(const std::string& parentName);
 
-	EntityWriter beginEntity(const std::string& classType, const std::vector<std::string>& classProperties, const std::string& name, const std::string& description);
+	EntityWriter beginEntity(const std::string& classType, const std::vector<std::string>& classProperties, const std::string& name, const std::string& description = "");
 
-	[[nodiscard]] std::string bake();
+	[[nodiscard]] std::string bake() const;
 
-	bool bake(const std::string& fgdPath);
+	bool bake(const std::string& fgdPath) const; // NOLINT(*-use-nodiscard)
 
 protected:
 	FGDWriter();
diff --git a/include/vcryptpp/VFONT.h b/include/vcryptpp/VFONT.h
index 011a8db0f..ba2c93ab7 100644
--- a/include/vcryptpp/VFONT.h
+++ b/include/vcryptpp/VFONT.h
@@ -5,7 +5,7 @@
 #include <string_view>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace vcryptpp::VFONT {
 
diff --git a/include/vpkpp/Entry.h b/include/vpkpp/Entry.h
index c9b09c0d8..b327d2498 100644
--- a/include/vpkpp/Entry.h
+++ b/include/vpkpp/Entry.h
@@ -5,7 +5,7 @@
 #include <variant>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace vpkpp {
 
diff --git a/include/vpkpp/Options.h b/include/vpkpp/Options.h
index 2e083754e..4102057b7 100644
--- a/include/vpkpp/Options.h
+++ b/include/vpkpp/Options.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace vpkpp {
 
diff --git a/include/vpkpp/format/GCF.h b/include/vpkpp/format/GCF.h
index 109e4b32a..43d0b58a8 100644
--- a/include/vpkpp/format/GCF.h
+++ b/include/vpkpp/format/GCF.h
@@ -118,6 +118,10 @@ class GCF : public PackFileReadOnly {
 
 	[[nodiscard]] std::vector<std::string> verifyEntryChecksums() const override;
 
+	[[nodiscard]] constexpr bool isCaseSensitive() const noexcept override {
+		return true;
+	}
+
 	[[nodiscard]] std::optional<std::vector<std::byte>> readEntry(const std::string& path_) const override;
 
 	[[nodiscard]] Attribute getSupportedEntryAttributes() const override;
diff --git a/include/vpkpp/format/PCK.h b/include/vpkpp/format/PCK.h
index 844c80ad7..68712d624 100644
--- a/include/vpkpp/format/PCK.h
+++ b/include/vpkpp/format/PCK.h
@@ -12,10 +12,16 @@ constexpr std::string_view PCK_EXTENSION = ".pck";
 
 class PCK : public PackFile {
 protected:
-	enum FlagsV2 : uint32_t {
-		FLAG_NONE                    = 0,
-		FLAG_ENCRYPTED          = 1 << 0,
-		FLAG_RELATIVE_FILE_DATA = 1 << 1,
+	enum FlagsDirV2 : uint32_t {
+		FLAG_DIR_NONE               = 0,
+		FLAG_DIR_ENCRYPTED          = 1 << 0,
+		FLAG_DIR_RELATIVE_FILE_DATA = 1 << 1,
+	};
+
+	enum FlagsFileV2 : uint32_t {
+		FLAG_FILE_NONE      = 0,
+		FLAG_FILE_ENCRYPTED = 1 << 0,
+		FLAG_FILE_REMOVED   = 1 << 1,
 	};
 
 	struct Header {
@@ -23,7 +29,7 @@ class PCK : public PackFile {
 		uint32_t godotVersionMajor;
 		uint32_t godotVersionMinor;
 		uint32_t godotVersionPatch;
-		FlagsV2 flags; // packVersion >= 2
+		FlagsDirV2 flags; // packVersion >= 2
 	};
 
 public:
diff --git a/include/vpkpp/format/ZIP.h b/include/vpkpp/format/ZIP.h
index 2ac101c18..a0d7249d4 100644
--- a/include/vpkpp/format/ZIP.h
+++ b/include/vpkpp/format/ZIP.h
@@ -5,13 +5,9 @@
 namespace vpkpp {
 
 constexpr std::string_view BMZ_EXTENSION  = ".bmz";
-constexpr std::string_view BZ2_EXTENSION  = ".bz2";
-constexpr std::string_view GZIP_EXTENSION = ".gz";
 constexpr std::string_view PK3_EXTENSION  = ".pk3";
 constexpr std::string_view PK4_EXTENSION  = ".pk4";
-constexpr std::string_view XZ_EXTENSION   = ".xz";
 constexpr std::string_view ZIP_EXTENSION  = ".zip";
-constexpr std::string_view ZST_EXTENSION  = ".zst";
 
 class ZIP : public PackFile {
 public:
@@ -67,14 +63,10 @@ class ZIP : public PackFile {
 	bool zipOpen = false;
 
 private:
-	VPKPP_REGISTER_PACKFILE_OPEN(BMZ_EXTENSION,  &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(BZ2_EXTENSION,  &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(GZIP_EXTENSION, &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(PK3_EXTENSION,  &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(PK4_EXTENSION,  &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(XZ_EXTENSION,   &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(ZIP_EXTENSION,  &ZIP::open);
-	VPKPP_REGISTER_PACKFILE_OPEN(ZST_EXTENSION,  &ZIP::open);
+	VPKPP_REGISTER_PACKFILE_OPEN(BMZ_EXTENSION, &ZIP::open);
+	VPKPP_REGISTER_PACKFILE_OPEN(PK3_EXTENSION, &ZIP::open);
+	VPKPP_REGISTER_PACKFILE_OPEN(PK4_EXTENSION, &ZIP::open);
+	VPKPP_REGISTER_PACKFILE_OPEN(ZIP_EXTENSION, &ZIP::open);
 };
 
 } // namespace vpkpp
diff --git a/include/vtfpp/ImageConversion.h b/include/vtfpp/ImageConversion.h
index ae1f19c34..a8e65f7ff 100644
--- a/include/vtfpp/ImageConversion.h
+++ b/include/vtfpp/ImageConversion.h
@@ -1,10 +1,12 @@
 #pragma once
 
+#include <concepts>
 #include <cstddef>
 #include <span>
 #include <vector>
 
-#include <sourcepp/math/Float.h>
+#include <BufferStream.h>
+#include <sourcepp/Templates.h>
 
 #include "ImageFormats.h"
 
@@ -172,10 +174,10 @@ VTFPP_CHECK_SIZE(UVWQ8888);
 
 struct RGBA16161616F {
 	static constexpr auto FORMAT = ImageFormat::RGBA16161616F;
-	sourcepp::math::FloatCompressed16 r;
-	sourcepp::math::FloatCompressed16 g;
-	sourcepp::math::FloatCompressed16 b;
-	sourcepp::math::FloatCompressed16 a;
+	half r;
+	half g;
+	half b;
+	half a;
 };
 VTFPP_CHECK_SIZE(RGBA16161616F);
 
@@ -222,8 +224,8 @@ VTFPP_CHECK_SIZE(RGBA32323232F);
 
 struct RG1616F {
 	static constexpr auto FORMAT = ImageFormat::RG1616F;
-	sourcepp::math::FloatCompressed16 r;
-	sourcepp::math::FloatCompressed16 g;
+	half r;
+	half g;
 };
 VTFPP_CHECK_SIZE(RG1616F);
 
@@ -263,7 +265,7 @@ VTFPP_CHECK_SIZE(BGRA1010102);
 
 struct R16F {
 	static constexpr auto FORMAT = ImageFormat::R16F;
-	sourcepp::math::FloatCompressed16 r;
+	half r;
 };
 VTFPP_CHECK_SIZE(R16F);
 
@@ -275,6 +277,42 @@ VTFPP_CHECK_SIZE(R8);
 
 #undef VTFPP_CHECK_SIZE
 
+template<typename T>
+concept PixelType =
+		std::same_as<T, RGBA8888> ||
+		std::same_as<T, ABGR8888> ||
+		std::same_as<T, RGB888> ||
+		std::same_as<T, BGR888> ||
+		std::same_as<T, RGB565> ||
+		std::same_as<T, I8> ||
+		std::same_as<T, IA88> ||
+		std::same_as<T, P8> ||
+		std::same_as<T, A8> ||
+		std::same_as<T, RGB888_BLUESCREEN> ||
+		std::same_as<T, BGR888_BLUESCREEN> ||
+		std::same_as<T, ARGB8888> ||
+		std::same_as<T, BGRA8888> ||
+		std::same_as<T, BGRX8888> ||
+		std::same_as<T, BGR565> ||
+		std::same_as<T, BGRX5551> ||
+		std::same_as<T, BGRA4444> ||
+		std::same_as<T, BGRA5551> ||
+		std::same_as<T, UV88> ||
+		std::same_as<T, UVWQ8888> ||
+		std::same_as<T, RGBA16161616F> ||
+		std::same_as<T, RGBA16161616> ||
+		std::same_as<T, UVLX8888> ||
+		std::same_as<T, R32F> ||
+		std::same_as<T, RGB323232F> ||
+		std::same_as<T, RGBA32323232F> ||
+		std::same_as<T, RG1616F> ||
+		std::same_as<T, RG3232F> ||
+		std::same_as<T, RGBX8888> ||
+		std::same_as<T, RGBA1010102> ||
+		std::same_as<T, BGRA1010102> ||
+		std::same_as<T, R16F> ||
+		std::same_as<T, R8>;
+
 } // namespace ImagePixel
 
 namespace ImageConversion {
@@ -292,9 +330,13 @@ enum class FileFormat {
 	BMP,
 	TGA,
 	HDR,
+	EXR,
 };
 
-/// Converts image data to a PNG or HDR file. HDR output will be used for floating-point formats.
+/// PNG for integer formats, EXR for floating point formats
+[[nodiscard]] FileFormat getDefaultFileFormatForImageFormat(ImageFormat format);
+
+/// Converts image data to a PNG or EXR file. EXR format will be used for floating-point image formats.
 [[nodiscard]] std::vector<std::byte> convertImageDataToFile(std::span<const std::byte> imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat = FileFormat::DEFAULT);
 
 [[nodiscard]] std::vector<std::byte> convertFileToImageData(std::span<const std::byte> fileData, ImageFormat& format, int& width, int& height, int& frameCount);
@@ -340,6 +382,47 @@ void setResizedDims(uint16_t& width, ResizeMethod widthResize, uint16_t& height,
 
 [[nodiscard]] std::vector<std::byte> cropImageData(const std::span<const std::byte> full_image, uint16_t full_width, uint16_t full_height, uint16_t channels, uint16_t x, uint16_t y, uint16_t subrect_width, uint16_t subrect_height);
 
+/// Extracts a single channel from the given image data.
+/// May have unexpected behavior if called on formats that use bitfields like BGRA5551!
+/// Data is packed according to pixel channel C++ type size
+/// (e.g. in the case of BGRA5551's green channel, it'll be 2 bytes per green value despite only 5 bits being used in the original data)
+template<ImagePixel::PixelType P>
+[[nodiscard]] std::vector<std::byte> extractChannelFromImageData(std::span<const std::byte> imageData, auto P::*channel) {
+	using C = sourcepp::member_type_t<decltype(channel)>;
+	if (imageData.empty() || imageData.size() % sizeof(P) != 0) {
+		return {};
+	}
+
+	std::span pixels{reinterpret_cast<const P*>(imageData.data()), imageData.size() / sizeof(P)};
+
+	std::vector<std::byte> out(imageData.size() / sizeof(P) * sizeof(C));
+	BufferStream stream{out, false};
+	for (const auto& pixel : pixels) {
+		stream << pixel.*channel;
+	}
+	return out;
+}
+
+/// Applies a single channel to the given image data.
+/// May have unexpected behavior if called on formats that use bitfields like BGRA5551!
+/// Data is packed according to pixel channel C++ type size
+/// (e.g. in the case of BGRA5551's green channel, it'll be 2 bytes per green value despite only 5 bits being used in the original data)
+template<ImagePixel::PixelType P>
+bool applyChannelToImageData(std::span<std::byte> imageData, std::span<const std::byte> channelData, auto P::*channel) {
+	using C = sourcepp::member_type_t<decltype(channel)>;
+	if (imageData.empty() || imageData.size() % sizeof(P) != 0 || channelData.empty() || channelData.size() % sizeof(C) != 0 || imageData.size() / sizeof(P) != channelData.size() / sizeof(C)) {
+		return false;
+	}
+
+	std::span pixels{reinterpret_cast<P*>(imageData.data()), imageData.size() / sizeof(P)};
+	std::span values{reinterpret_cast<C*>(channelData.data()), channelData.size() / sizeof(C)};
+
+	for (int i = 0; i < pixels.size(); i++) {
+		pixels[i].*channel = values[i];
+	}
+	return true;
+}
+
 } // namespace ImageConversion
 
 } // namespace vtfpp
diff --git a/include/vtfpp/ImageFormats.h b/include/vtfpp/ImageFormats.h
index cb2c01225..f515275ed 100644
--- a/include/vtfpp/ImageFormats.h
+++ b/include/vtfpp/ImageFormats.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <sourcepp/math/Integer.h>
+#include <sourcepp/Math.h>
 
 namespace vtfpp {
 
diff --git a/include/vtfpp/PPL.h b/include/vtfpp/PPL.h
index 8ed727c4c..18a8329c4 100644
--- a/include/vtfpp/PPL.h
+++ b/include/vtfpp/PPL.h
@@ -7,8 +7,6 @@
 #include <unordered_map>
 #include <vector>
 
-#include <sourcepp/math/Integer.h>
-
 #include "ImageConversion.h"
 
 namespace vtfpp {
@@ -55,7 +53,7 @@ class PPL {
 
 	bool setImage(std::span<const std::byte> imageData, ImageFormat format_, uint32_t width, uint32_t height, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR);
 
-	bool setImage(const std::string& imagePath, uint32_t lod);
+	bool setImage(const std::string& imagePath, uint32_t lod = 0);
 
 	bool setImage(const std::string& imagePath, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR);
 
diff --git a/include/vtfpp/VTF.h b/include/vtfpp/VTF.h
index 0976a8ef4..a44897da3 100644
--- a/include/vtfpp/VTF.h
+++ b/include/vtfpp/VTF.h
@@ -9,7 +9,6 @@
 #include <variant>
 #include <vector>
 
-#include <sourcepp/math/Vector.h>
 #include <sourcepp/parser/Binary.h>
 #include <sourcepp/Macros.h>
 
@@ -20,6 +19,11 @@ namespace vtfpp {
 
 constexpr uint32_t VTF_SIGNATURE = sourcepp::parser::binary::makeFourCC("VTF\0");
 
+enum class CompressionMethod : int16_t {
+	DEFLATE = 8,
+	ZSTD = 93,
+};
+
 struct Resource {
 	enum Type : uint32_t {
 		TYPE_UNKNOWN             = 0,   // Unknown
@@ -32,12 +36,7 @@ struct Resource {
 		TYPE_KEYVALUES_DATA      = sourcepp::parser::binary::makeFourCC("KVD\0"),
 		TYPE_AUX_COMPRESSION     = sourcepp::parser::binary::makeFourCC("AXC\0"),
 	};
-	static constexpr std::array<Resource::Type, 8> TYPE_ARRAY_ORDER{
-		// These don't really matter
-		Resource::TYPE_CRC, Resource::TYPE_EXTENDED_FLAGS, Resource::TYPE_LOD_CONTROL_INFO, Resource::TYPE_KEYVALUES_DATA, Resource::TYPE_PARTICLE_SHEET_DATA,
-		// These matter
-		Resource::TYPE_THUMBNAIL_DATA, Resource::TYPE_AUX_COMPRESSION, Resource::TYPE_IMAGE_DATA,
-	};
+	static const std::array<Type, 8>& getOrder();
 
 	enum Flags : uint8_t {
 		FLAG_NONE       = 0,
@@ -73,8 +72,16 @@ struct Resource {
 		return std::get<std::string>(this->convertData());
 	}
 
-	[[nodiscard]] int32_t getDataAsAuxCompressionLevel() const {
-		return static_cast<int32_t>(std::get<std::span<uint32_t>>(this->convertData())[1]);
+	[[nodiscard]] int16_t getDataAsAuxCompressionLevel() const {
+		return static_cast<int16_t>(std::get<std::span<uint32_t>>(this->convertData())[1] & 0xffff);
+	}
+
+	[[nodiscard]] CompressionMethod getDataAsAuxCompressionMethod() const {
+		auto method = static_cast<int16_t>((std::get<std::span<uint32_t>>(this->convertData())[1] & 0xffff0000) >> 16);
+		if (method <= 0) {
+			return CompressionMethod::DEFLATE;
+		}
+		return static_cast<CompressionMethod>(method);
 	}
 
 	[[nodiscard]] uint32_t getDataAsAuxCompressionLength(uint8_t mip, uint8_t mipCount, uint16_t frame, uint16_t frameCount, uint16_t face, uint16_t faceCount) const {
@@ -124,7 +131,7 @@ class VTF {
 		FLAG_NORMAL                                  = 1 <<  7,
 		FLAG_NO_MIP                                  = 1 <<  8, // Added at VTF creation time
 		FLAG_NO_LOD                                  = 1 <<  9, // Added at VTF creation time
-		FLAG_MIN_MIP                                 = 1 << 10,
+		FLAG_LOAD_LOWEST_MIPS                        = 1 << 10,
 		FLAG_PROCEDURAL                              = 1 << 11,
 		FLAG_ONE_BIT_ALPHA                           = 1 << 12, // Added at VTF creation time
 		FLAG_MULTI_BIT_ALPHA                         = 1 << 13, // Added at VTF creation time
@@ -136,7 +143,7 @@ class VTF {
 		FLAG_ONE_OVER_MIP_LEVEL_IN_ALPHA             = 1 << 19, // Internal to vtex, removed
 		FLAG_PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL = 1 << 20, // Internal to vtex, removed
 		FLAG_NORMAL_TO_DUDV                          = 1 << 21, // Internal to vtex, removed
-		FLAG_ALPHA_TEST_MIP_GENERATION               = 1 << 22,
+		FLAG_ALPHA_TEST_MIP_GENERATION               = 1 << 22, // Internal to vtex, removed
 		FLAG_NO_DEPTH_BUFFER                         = 1 << 23,
 		FLAG_NICE_FILTERED                           = 1 << 24, // Internal to vtex, removed
 		FLAG_CLAMP_U                                 = 1 << 25,
@@ -165,7 +172,8 @@ class VTF {
 		bool createMips = true;
 		bool createThumbnail = true;
 		bool createReflectivity = true;
-		uint8_t compressionLevel = 6;
+		int16_t compressionLevel = -1;
+		CompressionMethod compressionMethod = CompressionMethod::ZSTD;
 		float bumpMapScale = 1.f;
 	};
 
@@ -223,6 +231,10 @@ class VTF {
 
 	void setImageResizeMethods(ImageConversion::ResizeMethod imageWidthResizeMethod_, ImageConversion::ResizeMethod imageHeightResizeMethod_);
 
+	void setImageWidthResizeMethod(ImageConversion::ResizeMethod imageWidthResizeMethod_);
+
+	void setImageHeightResizeMethod(ImageConversion::ResizeMethod imageHeightResizeMethod_);
+
 	[[nodiscard]] uint16_t getWidth(uint8_t mip = 0) const;
 
 	[[nodiscard]] uint16_t getHeight(uint8_t mip = 0) const;
@@ -305,13 +317,17 @@ class VTF {
 
 	void removeExtendedFlagsResource();
 
-	void setKeyValuesData(const std::string& value);
+	void setKeyValuesDataResource(const std::string& value);
+
+	void removeKeyValuesDataResource();
 
-	void removeKeyValuesData();
+	[[nodiscard]] int16_t getCompressionLevel() const;
 
-	[[nodiscard]] uint8_t getCompressionLevel() const;
+	void setCompressionLevel(int16_t newCompressionLevel);
 
-	void setCompressionLevel(uint8_t newCompressionLevel);
+	[[nodiscard]] CompressionMethod getCompressionMethod() const;
+
+	void setCompressionMethod(CompressionMethod newCompressionMethod);
 
 	[[nodiscard]] bool hasImageData() const;
 
@@ -342,6 +358,8 @@ class VTF {
 
 	[[nodiscard]] std::vector<std::byte> getThumbnailDataAsRGBA8888() const;
 
+	void setThumbnail(std::span<const std::byte> imageData_, ImageFormat format_, uint16_t width_, uint16_t height_);
+
 	void computeThumbnail(ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR);
 
 	void removeThumbnail();
@@ -405,7 +423,8 @@ class VTF {
 	//uint8_t _padding3[4];
 
 	// These aren't in the header, these are for VTF modification
-	uint8_t compressionLevel = 0;
+	int16_t compressionLevel = 0;
+	CompressionMethod compressionMethod = CompressionMethod::ZSTD;
 	ImageConversion::ResizeMethod imageWidthResizeMethod  = ImageConversion::ResizeMethod::POWER_OF_TWO_BIGGER;
 	ImageConversion::ResizeMethod imageHeightResizeMethod = ImageConversion::ResizeMethod::POWER_OF_TWO_BIGGER;
 };
diff --git a/lang/c/include/gameppc/Convert.hpp b/lang/c/include/gameppc/Convert.hpp
new file mode 100644
index 000000000..ce6eb85f8
--- /dev/null
+++ b/lang/c/include/gameppc/Convert.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+/*
+ * This is a header designed to be included in C++ source code.
+ * It should not be included in applications using any C wrapper libraries!
+ */
+#ifndef __cplusplus
+#error "This header can only be used in C++!"
+#endif
+
+#include "gamepp.h"
+
+namespace gamepp {
+
+class GameInstance;
+
+} // namespace gamepp
+
+namespace Convert {
+
+gamepp::GameInstance* gameInstance(gamepp_game_instance_handle_t handle);
+
+} // namespace Convert
diff --git a/lang/c/include/gameppc/gamepp.h b/lang/c/include/gameppc/gamepp.h
new file mode 100644
index 000000000..7205fc261
--- /dev/null
+++ b/lang/c/include/gameppc/gamepp.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <sourceppc/Buffer.h>
+#include <sourceppc/String.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void* gamepp_game_instance_handle_t;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+// REQUIRES MANUAL FREE: gamepp_game_instance_free
+SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance();
+
+// REQUIRES MANUAL FREE: gamepp_game_instance_free
+SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance_with_name(const char* windowNameOverride);
+
+SOURCEPP_API void gamepp_game_instance_free(gamepp_game_instance_handle_t* handle);
+
+// REQUIRES MANUAL FREE: sourcepp_string_free
+SOURCEPP_API sourcepp_string_t gamepp_get_window_title(gamepp_game_instance_handle_t handle);
+
+SOURCEPP_API int gamepp_get_window_pos_x(gamepp_game_instance_handle_t handle);
+
+SOURCEPP_API int gamepp_get_window_pos_y(gamepp_game_instance_handle_t handle);
+
+SOURCEPP_API int gamepp_get_window_width(gamepp_game_instance_handle_t handle);
+
+SOURCEPP_API int gamepp_get_window_height(gamepp_game_instance_handle_t handle);
+
+SOURCEPP_API void gamepp_command(gamepp_game_instance_handle_t handle, const char* command);
+
+SOURCEPP_API void gamepp_input_begin(gamepp_game_instance_handle_t handle, const char* input);
+
+SOURCEPP_API void gamepp_input_end(gamepp_game_instance_handle_t handle, const char* input);
+
+SOURCEPP_API void gamepp_input_once(gamepp_game_instance_handle_t handle, const char* input);
+
+SOURCEPP_API void gamepp_input_hold(gamepp_game_instance_handle_t handle, const char* input, double sec);
+
+SOURCEPP_API void gamepp_wait(gamepp_game_instance_handle_t handle, double sec);
diff --git a/lang/c/src/gameppc/Convert.cpp b/lang/c/src/gameppc/Convert.cpp
new file mode 100644
index 000000000..f0a22c653
--- /dev/null
+++ b/lang/c/src/gameppc/Convert.cpp
@@ -0,0 +1,9 @@
+#include <gameppc/Convert.hpp>
+
+#include <gamepp/gamepp.h>
+
+using namespace gamepp;
+
+GameInstance* Convert::gameInstance(gamepp_game_instance_handle_t handle) {
+	return static_cast<GameInstance*>(handle);
+}
diff --git a/lang/c/src/gameppc/_gameppc.cmake b/lang/c/src/gameppc/_gameppc.cmake
new file mode 100644
index 000000000..8516bd855
--- /dev/null
+++ b/lang/c/src/gameppc/_gameppc.cmake
@@ -0,0 +1,6 @@
+add_pretty_parser(gamepp C
+        SOURCES
+        "${CMAKE_CURRENT_SOURCE_DIR}/lang/c/include/gameppc/Convert.hpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/lang/c/include/gameppc/gamepp.h"
+        "${CMAKE_CURRENT_LIST_DIR}/Convert.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/gamepp.cpp")
diff --git a/lang/c/src/gameppc/gamepp.cpp b/lang/c/src/gameppc/gamepp.cpp
new file mode 100644
index 000000000..12c660899
--- /dev/null
+++ b/lang/c/src/gameppc/gamepp.cpp
@@ -0,0 +1,98 @@
+#include <gameppc/gamepp.h>
+
+#include <gamepp/gamepp.h>
+
+#include <gameppc/Convert.hpp>
+#include <sourceppc/Convert.hpp>
+#include <sourceppc/Helpers.h>
+
+using namespace gamepp;
+
+SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance() {
+	auto instance = GameInstance::find();
+	if (!instance) {
+		return nullptr;
+	}
+	return new GameInstance{*instance};
+}
+
+SOURCEPP_API gamepp_game_instance_handle_t gamepp_find_game_instance_with_name(const char* windowNameOverride) {
+	auto instance = GameInstance::find(windowNameOverride);
+	if (!instance) {
+		return nullptr;
+	}
+	return new GameInstance{*instance};
+}
+
+SOURCEPP_API void gamepp_game_instance_free(gamepp_game_instance_handle_t* handle) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	delete Convert::gameInstance(*handle);
+	*handle = nullptr;
+}
+
+SOURCEPP_API sourcepp_string_t gamepp_get_window_title(gamepp_game_instance_handle_t handle) {
+	SOURCEPP_EARLY_RETURN_VAL(handle, SOURCEPP_STRING_INVALID);
+
+	return Convert::toString(Convert::gameInstance(handle)->getWindowTitle());
+}
+
+SOURCEPP_API int gamepp_get_window_pos_x(gamepp_game_instance_handle_t handle) {
+	SOURCEPP_EARLY_RETURN_VAL(handle, 0);
+
+	return Convert::gameInstance(handle)->getWindowPos()[0];
+}
+
+SOURCEPP_API int gamepp_get_window_pos_y(gamepp_game_instance_handle_t handle) {
+	SOURCEPP_EARLY_RETURN_VAL(handle, 0);
+
+	return Convert::gameInstance(handle)->getWindowPos()[1];
+}
+
+SOURCEPP_API int gamepp_get_window_width(gamepp_game_instance_handle_t handle) {
+	SOURCEPP_EARLY_RETURN_VAL(handle, 0);
+
+	return Convert::gameInstance(handle)->getWindowSize()[0];
+}
+
+SOURCEPP_API int gamepp_get_window_height(gamepp_game_instance_handle_t handle) {
+	SOURCEPP_EARLY_RETURN_VAL(handle, 0);
+
+	return Convert::gameInstance(handle)->getWindowSize()[1];
+}
+
+SOURCEPP_API void gamepp_command(gamepp_game_instance_handle_t handle, const char* command) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->command(command);
+}
+
+SOURCEPP_API void gamepp_input_begin(gamepp_game_instance_handle_t handle, const char* input) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->inputBegin(input);
+}
+
+SOURCEPP_API void gamepp_input_end(gamepp_game_instance_handle_t handle, const char* input) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->inputEnd(input);
+}
+
+SOURCEPP_API void gamepp_input_once(gamepp_game_instance_handle_t handle, const char* input) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->inputOnce(input);
+}
+
+SOURCEPP_API void gamepp_input_hold(gamepp_game_instance_handle_t handle, const char* input, double sec) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->inputHold(input, sec);
+}
+
+SOURCEPP_API void gamepp_wait(gamepp_game_instance_handle_t handle, double sec) {
+	SOURCEPP_EARLY_RETURN(handle);
+
+	Convert::gameInstance(handle)->wait(sec);
+}
diff --git a/lang/python/cfg/CMakeLists.txt b/lang/python/cfg/CMakeLists.txt
new file mode 100644
index 000000000..65080cc88
--- /dev/null
+++ b/lang/python/cfg/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Load this to build the sourcepp Python package
+
+cmake_minimum_required(VERSION 3.25 FATAL_ERROR)
+set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE INTERNAL "" FORCE)
+project(sourcepp_python)
+
+if (NOT SKBUILD)
+    message(WARNING "\
+This CMake file is meant to be executed using 'scikit-build-core'.
+Running it directly will almost certainly not produce the desired
+result. If you are a user trying to install this package, use the
+command below, which will install all necessary build dependencies,
+compile the package in an isolated environment, and then install it.
+=====================================================================
+ $ pip install .
+=====================================================================
+If you are a software developer, and this is your own package, then
+it is usually much more efficient to install the build dependencies
+in your environment once and use the following command that avoids
+a costly creation of a new virtual environment at every compilation:
+=====================================================================
+ $ pip install nanobind scikit-build-core[pyproject]
+ $ pip install --no-build-isolation -ve .
+=====================================================================
+You may optionally add -Ceditable.rebuild=true to auto-rebuild when
+the package is imported. Otherwise, you need to rerun the above
+after editing C++ files.")
+endif()
+
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+set(SOURCEPP_BUILD_PYTHON_WRAPPERS ON CACHE INTERNAL "" FORCE)
+set(SOURCEPP_PYTHON_VERSION "@SOURCEPP_PYTHON_VERSION@")
+
+# As weird as this looks, this is necessary for sdist wheel
+set(SOURCEPP_PYTHON_IS_SUBDIR OFF)
+if(SOURCEPP_PYTHON_IS_SUBDIR)
+    add_subdirectory("../.." "${CMAKE_CURRENT_BINARY_DIR}/sourcepp")
+else()
+    include(FetchContent)
+    FetchContent_Declare(
+            sourcepp
+            GIT_REPOSITORY "https://github.com/craftablescience/sourcepp.git"
+            GIT_TAG "@SOURCEPP_GIT_TAG@")
+    FetchContent_MakeAvailable(sourcepp)
+endif()
diff --git a/lang/python/cfg/__init__.py b/lang/python/cfg/__init__.py
new file mode 100644
index 000000000..75430694c
--- /dev/null
+++ b/lang/python/cfg/__init__.py
@@ -0,0 +1,5 @@
+from ._sourcepp_impl import __doc__, gamepp, sourcepp, steampp, toolpp, vcryptpp, vtfpp
+
+__author__ = "craftablescience"
+__version__ = "${SOURCEPP_PYTHON_VERSION}"
+__all__ = ['__author__', '__doc__', '__version__', 'gamepp', 'sourcepp', 'steampp', 'toolpp', 'vcryptpp', 'vtfpp']
diff --git a/lang/python/cfg/pyproject.toml b/lang/python/cfg/pyproject.toml
new file mode 100644
index 000000000..c82a7f56e
--- /dev/null
+++ b/lang/python/cfg/pyproject.toml
@@ -0,0 +1,55 @@
+[build-system]
+requires = ["scikit-build-core >=0.10.7", "nanobind >=1.3.2"]
+build-backend = "scikit_build_core.build"
+
+
+[project]
+name = "sourcepp"
+version = "${SOURCEPP_PYTHON_VERSION}"
+authors = [{ name = "craftablescience", email = "lauralewisdev@gmail.com" }]
+maintainers = [{ name = "craftablescience", email = "lauralewisdev@gmail.com" }]
+description = "Several modern C++20 libraries for sanely parsing Valve formats."
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+	"License :: OSI Approved :: MIT License",
+	"Intended Audience :: Developers",
+	"Programming Language :: Python :: 3 :: Only",
+	"Programming Language :: Python :: 3.8",
+	"Programming Language :: Python :: 3.9",
+	"Programming Language :: Python :: 3.10",
+	"Programming Language :: Python :: 3.11",
+	"Programming Language :: Python :: 3.12",
+	"Programming Language :: Python :: 3.13",
+]
+
+[project.urls]
+"homepage" = "https://github.com/craftablescience/sourcepp"
+"repository" = "https://github.com/craftablescience/sourcepp"
+"issue tracker" = "https://github.com/craftablescience/sourcepp/issues"
+"funding" = "https://ko-fi.com/craftablescience"
+
+[tool.scikit-build]
+minimum-version = "build-system.requires"
+build-dir = "build/{wheel_tag}"
+build.targets = ["sourcepp_python_all"]
+sdist.reproducible = true
+sdist.include = ["src/*", "test/*", "CMakeLists.txt", "LICENSE", "pyproject.toml", "README.md", "THIRDPARTY_LEGAL_NOTICES.txt"]
+sdist.exclude = ["cfg/*"]
+wheel.py-api = "cp312"
+wheel.license-files = ["LICENSE", "THIRDPARTY_LEGAL_NOTICES.txt"]
+build.verbose = true
+logging.level = "INFO"
+
+
+[tool.cibuildwheel]
+archs = ["auto64"]
+build-verbosity = 1
+#test-command = "pytest {project}/test"
+#test-requires = "pytest"
+
+[tool.cibuildwheel.macos]
+archs = ["arm64"]
+
+[tool.cibuildwheel.macos.environment]
+MACOSX_DEPLOYMENT_TARGET = "14.7"
diff --git a/lang/python/src/gamepp.h b/lang/python/src/gamepp.h
new file mode 100644
index 000000000..5353c4134
--- /dev/null
+++ b/lang/python/src/gamepp.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/optional.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/string_view.h>
+
+namespace py = nanobind;
+
+#include <gamepp/gamepp.h>
+
+namespace gamepp {
+
+inline void register_python(py::module_& m) {
+	auto gamepp = m.def_submodule("gamepp");
+	using namespace gamepp;
+
+	py::class_<GameInstance>(gamepp, "GameInstance")
+			.def_static("find",          &GameInstance::find,       py::arg("window_name_override") = "")
+			.def_prop_ro("window_title", &GameInstance::getWindowTitle)
+			.def_prop_ro("window_pos",   &GameInstance::getWindowPos)
+			.def_prop_ro("window_size",  &GameInstance::getWindowSize)
+			.def("command",              &GameInstance::command,    py::arg("command"),               py::rv_policy::reference)
+			.def("input_begin",          &GameInstance::inputBegin, py::arg("input"),                 py::rv_policy::reference)
+			.def("input_end",            &GameInstance::inputEnd,   py::arg("input"),                 py::rv_policy::reference)
+			.def("input_once",           &GameInstance::inputOnce,  py::arg("input"),                 py::rv_policy::reference)
+			.def("input_hold",           &GameInstance::inputHold,  py::arg("input"), py::arg("sec"), py::rv_policy::reference)
+			.def("wait",                 &GameInstance::wait,       py::arg("sec"),                   py::rv_policy::reference);
+}
+
+} // namespace gamepp
diff --git a/lang/python/src/sourcepp.cpp b/lang/python/src/sourcepp.cpp
new file mode 100644
index 000000000..35c33c697
--- /dev/null
+++ b/lang/python/src/sourcepp.cpp
@@ -0,0 +1,57 @@
+#include "sourcepp.h"
+
+#ifdef GAMEPP
+#include "gamepp.h"
+#endif
+
+#ifdef STEAMPP
+#include "steampp.h"
+#endif
+
+#ifdef TOOLPP
+#include "toolpp.h"
+#endif
+
+#ifdef VCRYPTPP
+#include "vcryptpp.h"
+#endif
+
+#ifdef VTFPP
+#include "vtfpp.h"
+#endif
+
+NB_MODULE(_sourcepp_impl, m) {
+	m.doc() = "SourcePP: A Python wrapper around several modern C++20 libraries for sanely parsing Valve's formats.";
+
+	sourcepp::register_python(m);
+
+#ifdef GAMEPP
+	gamepp::register_python(m);
+#else
+	m.def_submodule("gamepp");
+#endif
+
+#ifdef STEAMPP
+	steampp::register_python(m);
+#else
+	m.def_submodule("steampp");
+#endif
+
+#ifdef TOOLPP
+	toolpp::register_python(m);
+#else
+	m.def_submodule("toolpp");
+#endif
+
+#ifdef VCRYPTPP
+	vcryptpp::register_python(m);
+#else
+	m.def_submodule("vcryptpp");
+#endif
+
+#ifdef VTFPP
+	vtfpp::register_python(m);
+#else
+	m.def_submodule("vtfpp");
+#endif
+}
diff --git a/lang/python/src/sourcepp.h b/lang/python/src/sourcepp.h
new file mode 100644
index 000000000..079c37ae0
--- /dev/null
+++ b/lang/python/src/sourcepp.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <string_view>
+
+#include <nanobind/nanobind.h>
+#include <nanobind/operators.h>
+
+namespace py = nanobind;
+
+#include <sourcepp/Math.h>
+
+namespace sourcepp {
+
+inline void register_python(py::module_& m) {
+	auto sourcepp = m.def_submodule("sourcepp");
+	using namespace sourcepp;
+
+	{
+		auto math = sourcepp.def_submodule("math");
+		using namespace math;
+
+		const auto registerVecType = [&math]<typename V>(std::string_view name) {
+			py::class_<V>(math, name.data())
+					.def("__len__", &V::size)
+					.def("__setitem__", [](V& self, uint8_t index, typename V::value_type val) { self[index] = val; })
+					.def("__getitem__", [](V& self, uint8_t index) { return self[index]; })
+					.def_static("zero", &V::zero)
+					.def("is_zero", &V::isZero);
+		};
+
+		registerVecType.operator()<Vec2i8>("Vec2i8");
+		registerVecType.operator()<Vec2i16>("Vec2i16");
+		registerVecType.operator()<Vec2i32>("Vec2i32");
+		registerVecType.operator()<Vec2i64>("Vec2i64");
+		//registerVecType.operator()<Vec2i>("Vec2i");
+
+		registerVecType.operator()<Vec2ui8>("Vec2ui8");
+		registerVecType.operator()<Vec2ui16>("Vec2ui16");
+		registerVecType.operator()<Vec2ui32>("Vec2ui32");
+		registerVecType.operator()<Vec2ui64>("Vec2ui64");
+		//registerVecType.operator()<Vec2ui>("Vec2ui");
+
+		//registerVecType.operator()<Vec2f16>("Vec2f16");
+		registerVecType.operator()<Vec2f32>("Vec2f32");
+		registerVecType.operator()<Vec2f64>("Vec2f64");
+		//registerVecType.operator()<Vec2f>("Vec2f");
+
+		registerVecType.operator()<Vec3i8>("Vec3i8");
+		registerVecType.operator()<Vec3i16>("Vec3i16");
+		registerVecType.operator()<Vec3i32>("Vec3i32");
+		registerVecType.operator()<Vec3i64>("Vec3i64");
+		//registerVecType.operator()<Vec3i>("Vec3i");
+
+		registerVecType.operator()<Vec3ui8>("Vec3ui8");
+		registerVecType.operator()<Vec3ui16>("Vec3ui16");
+		registerVecType.operator()<Vec3ui32>("Vec3ui32");
+		registerVecType.operator()<Vec3ui64>("Vec3ui64");
+		//registerVecType.operator()<Vec3ui>("Vec3ui");
+
+		//registerVecType.operator()<Vec3f16>("Vec3f16");
+		registerVecType.operator()<Vec3f32>("Vec3f32");
+		registerVecType.operator()<Vec3f64>("Vec3f64");
+		//registerVecType.operator()<Vec3f>("Vec3f");
+
+		registerVecType.operator()<Vec4i8>("Vec4i8");
+		registerVecType.operator()<Vec4i16>("Vec4i16");
+		registerVecType.operator()<Vec4i32>("Vec4i32");
+		registerVecType.operator()<Vec4i64>("Vec4i64");
+		//registerVecType.operator()<Vec4i>("Vec4i");
+
+		registerVecType.operator()<Vec4ui8>("Vec4ui8");
+		registerVecType.operator()<Vec4ui16>("Vec4ui16");
+		registerVecType.operator()<Vec4ui32>("Vec4ui32");
+		registerVecType.operator()<Vec4ui64>("Vec4ui64");
+		//registerVecType.operator()<Vec4ui>("Vec4ui");
+
+		//registerVecType.operator()<Vec4f16>("Vec4f16");
+		registerVecType.operator()<Vec4f32>("Vec4f32");
+		registerVecType.operator()<Vec4f64>("Vec4f64");
+		//registerVecType.operator()<Vec4f>("Vec4f");
+	}
+}
+
+} // namespace sourcepp
diff --git a/lang/python/src/steampp.h b/lang/python/src/steampp.h
new file mode 100644
index 000000000..bd44ebfd3
--- /dev/null
+++ b/lang/python/src/steampp.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/string_view.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/vector.h>
+
+namespace py = nanobind;
+
+#include <steampp/steampp.h>
+
+namespace steampp {
+
+inline void register_python(py::module_& m) {
+	auto steampp = m.def_submodule("steampp");
+	using namespace steampp;
+
+	py::class_<Steam>(steampp, "Steam")
+			.def(py::init<>())
+			.def_prop_ro("install_dir",          &Steam::getInstallDir)
+			.def_prop_ro("library_dirs",         &Steam::getLibraryDirs)
+			.def_prop_ro("sourcemod_dir",        &Steam::getSourceModDir)
+			.def_prop_ro("installed_apps",       &Steam::getInstalledApps)
+			.def("is_app_installed",             &Steam::isAppInstalled,          py::arg("appID"))
+			.def("get_app_name",                 &Steam::getAppName,              py::arg("appID"))
+			.def("get_app_install_dir",          &Steam::getAppInstallDir,        py::arg("appID"))
+			.def("get_app_icon_path",            &Steam::getAppIconPath,          py::arg("appID"))
+			.def("get_app_logo_path",            &Steam::getAppLogoPath,          py::arg("appID"))
+			.def("get_app_box_art_path",         &Steam::getAppBoxArtPath,        py::arg("appID"))
+			.def("get_app_store_art_path",       &Steam::getAppStoreArtPath,      py::arg("appID"))
+			.def("is_app_using_source_engine",   &Steam::isAppUsingSourceEngine,  py::arg("appID"))
+			.def("is_app_using_source_2_engine", &Steam::isAppUsingSource2Engine, py::arg("appID"))
+			.def("__bool__",                     &Steam::operator bool,           py::is_operator());
+}
+
+} // namespace steampp
diff --git a/lang/python/src/toolpp.h b/lang/python/src/toolpp.h
new file mode 100644
index 000000000..a81a303cb
--- /dev/null
+++ b/lang/python/src/toolpp.h
@@ -0,0 +1,169 @@
+#pragma once
+
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/string_view.h>
+#include <nanobind/stl/unordered_map.h>
+#include <nanobind/stl/vector.h>
+
+namespace py = nanobind;
+
+#include <toolpp/toolpp.h>
+
+namespace toolpp {
+
+inline void register_python(py::module_& m) {
+	auto toolpp = m.def_submodule("toolpp");
+	using namespace toolpp;
+
+	py::enum_<CmdSeq::Command::Special>(toolpp, "CmdSeqCommandSpecial")
+			.value("NONE",                      CmdSeq::Command::Special::NONE)
+			.value("CHANGE_DIRECTORY",          CmdSeq::Command::Special::CHANGE_DIRECTORY)
+			.value("COPY_FILE",                 CmdSeq::Command::Special::COPY_FILE)
+			.value("DELETE_FILE",               CmdSeq::Command::Special::DELETE_FILE)
+			.value("RENAME_FILE",               CmdSeq::Command::Special::RENAME_FILE)
+			.value("COPY_FILE_IF_EXISTS_ALIAS", CmdSeq::Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS)
+			.value("COPY_FILE_IF_EXISTS",       CmdSeq::Command::Special::COPY_FILE_IF_EXISTS)
+			.export_values();
+
+	py::class_<CmdSeq::Command>(toolpp, "CmdSeqCommand")
+			.def_rw("enabled",                             &CmdSeq::Command::enabled)
+			.def_rw("executable",                          &CmdSeq::Command::executable)
+			.def_rw("arguments",                           &CmdSeq::Command::arguments)
+			.def_rw("ensure_file_exists",                  &CmdSeq::Command::ensureFileExists)
+			.def_rw("path_to_theoretically_existing_file", &CmdSeq::Command::pathToTheoreticallyExistingFile)
+			.def_rw("use_process_window",                  &CmdSeq::Command::useProcessWindow)
+			.def_rw("wait_for_keypress",                   &CmdSeq::Command::waitForKeypress)
+			.def_static("get_special_display_name_for", &CmdSeq::Command::getSpecialDisplayNameFor, py::arg("special"))
+			.def("get_executable_display_name", &CmdSeq::Command::getExecutableDisplayName);
+
+	py::class_<CmdSeq::Sequence>(toolpp, "CmdSeqSequence")
+			.def_rw("name",     &CmdSeq::Sequence::name)
+			.def_rw("commands", &CmdSeq::Sequence::commands);
+
+	py::enum_<CmdSeq::Type>(toolpp, "CmdSeqType")
+			.value("INVALID",          CmdSeq::Type::INVALID)
+			.value("BINARY",           CmdSeq::Type::BINARY)
+			.value("KEYVALUES_STRATA", CmdSeq::Type::KEYVALUES_STRATA)
+			.export_values();
+
+	py::class_<CmdSeq>(toolpp, "CmdSeq")
+			.def(py::init<const std::string&>(), py::arg("path"))
+			.def(py::init<CmdSeq::Type>(), py::arg("type"))
+			.def("__bool__", &CmdSeq::operator bool, py::is_operator())
+			.def_prop_rw("type", &CmdSeq::getType, &CmdSeq::setType)
+			.def_prop_ro("version", &CmdSeq::getVersion)
+			.def("set_version", &CmdSeq::setVersion, py::arg("is_v02"))
+			.def("sequences", py::overload_cast<>(&CmdSeq::getSequences), py::rv_policy::reference_internal)
+			.def("bake", [](const CmdSeq& self) {
+				const auto d = self.bake();
+				return py::bytes{d.data(), d.size()};
+			})
+			.def("bake_to_file", py::overload_cast<const std::string&>(&CmdSeq::bake, py::const_), py::arg("path"));
+
+	py::class_<FGD::Entity::ClassProperty>(toolpp, "FGDEntityClassProperty")
+			.def_ro("name",      &FGD::Entity::ClassProperty::name)
+			.def_ro("arguments", &FGD::Entity::ClassProperty::arguments);
+
+	py::class_<FGD::Entity::Field>(toolpp, "FGDEntityField")
+			.def_ro("name",          &FGD::Entity::Field::name)
+			.def_ro("value_type",    &FGD::Entity::Field::valueType)
+			.def_ro("readonly",      &FGD::Entity::Field::readonly)
+			.def_ro("reportable",    &FGD::Entity::Field::reportable)
+			.def_ro("display_name",  &FGD::Entity::Field::displayName)
+			.def_ro("value_default", &FGD::Entity::Field::valueDefault)
+			.def_ro("description",   &FGD::Entity::Field::description);
+
+	py::class_<FGD::Entity::FieldChoices::Choice>(toolpp, "FGDEntityFieldChoicesChoice")
+			.def_ro("value",        &FGD::Entity::FieldChoices::Choice::value)
+			.def_ro("display_name", &FGD::Entity::FieldChoices::Choice::displayName);
+
+	py::class_<FGD::Entity::FieldChoices>(toolpp, "FGDEntityFieldChoices")
+			.def_ro("name",          &FGD::Entity::FieldChoices::name)
+			.def_ro("readonly",      &FGD::Entity::FieldChoices::readonly)
+			.def_ro("reportable",    &FGD::Entity::FieldChoices::reportable)
+			.def_ro("display_name",  &FGD::Entity::FieldChoices::displayName)
+			.def_ro("value_default", &FGD::Entity::FieldChoices::valueDefault)
+			.def_ro("description",   &FGD::Entity::FieldChoices::description)
+			.def_ro("choices",       &FGD::Entity::FieldChoices::choices);
+
+	py::class_<FGD::Entity::FieldFlags::Flag>(toolpp, "FGDEntityFieldFlagsFlag")
+			.def_ro("value",              &FGD::Entity::FieldFlags::Flag::value)
+			.def_ro("display_name",       &FGD::Entity::FieldFlags::Flag::displayName)
+			.def_ro("enabled_by_default", &FGD::Entity::FieldFlags::Flag::enabledByDefault)
+			.def_ro("description",        &FGD::Entity::FieldFlags::Flag::description);
+
+	py::class_<FGD::Entity::FieldFlags>(toolpp, "FGDEntityFieldFlags")
+			.def_ro("name",          &FGD::Entity::FieldFlags::name)
+			.def_ro("readonly",      &FGD::Entity::FieldFlags::readonly)
+			.def_ro("reportable",    &FGD::Entity::FieldFlags::reportable)
+			.def_ro("display_name",  &FGD::Entity::FieldFlags::displayName)
+			.def_ro("description",   &FGD::Entity::FieldFlags::description)
+			.def_ro("flags",         &FGD::Entity::FieldFlags::flags);
+
+	py::class_<FGD::Entity::IO>(toolpp, "FGDEntityIO")
+			.def_ro("name",        &FGD::Entity::IO::name)
+			.def_ro("value_type",  &FGD::Entity::IO::valueType)
+			.def_ro("description", &FGD::Entity::IO::description);
+
+	py::class_<FGD::Entity>(toolpp, "FGDEntity")
+			.def_ro("class_type",          &FGD::Entity::classType)
+			.def_ro("class_properties",    &FGD::Entity::classProperties)
+			.def_ro("description",         &FGD::Entity::description)
+			.def_ro("fields",              &FGD::Entity::fields)
+			.def_ro("fields_with_choices", &FGD::Entity::fieldsWithChoices)
+			.def_ro("fields_with_flags",   &FGD::Entity::fieldsWithFlags)
+			.def_ro("inputs",              &FGD::Entity::inputs)
+			.def_ro("outputs",             &FGD::Entity::outputs);
+
+	py::class_<FGD::AutoVisGroup>(toolpp, "FGDAutoVisGroup")
+			.def_ro("parent_name", &FGD::AutoVisGroup::parentName)
+			.def_ro("name",        &FGD::AutoVisGroup::name)
+			.def_ro("entities",    &FGD::AutoVisGroup::entities);
+
+	py::class_<FGD>(toolpp, "FGD")
+			.def(py::init<>())
+			.def(py::init<const std::string&>(), py::arg("fgd_path"))
+			.def("load", &FGD::load, py::arg("fgd_path"))
+			.def_prop_ro("version", &FGD::getVersion)
+			.def_prop_ro("map_size", &FGD::getMapSize)
+			.def_prop_ro("entities", &FGD::getEntities)
+			.def_prop_ro("material_exclusion_dirs", &FGD::getMaterialExclusionDirs)
+			.def_prop_ro("auto_visgroups", &FGD::getAutoVisGroups);
+
+	py::class_<FGDWriter::AutoVisGroupWriter>(toolpp, "FGDWriterAutoVisGroupWriter")
+	        .def("visgroup", &FGDWriter::AutoVisGroupWriter::visGroup, py::arg("name"), py::arg("entities"), py::rv_policy::reference)
+			.def("end_auto_visgroup", &FGDWriter::AutoVisGroupWriter::endAutoVisGroup,                       py::rv_policy::reference);
+
+	py::class_<FGDWriter::EntityWriter::KeyValueChoicesWriter>(toolpp, "FGDWriterEntityWriterKeyValueChoicesWriter")
+	        .def("choice", &FGDWriter::EntityWriter::KeyValueChoicesWriter::choice, py::arg("value"), py::arg("display_name"), py::rv_policy::reference)
+			.def("end_key_value_choices", &FGDWriter::EntityWriter::KeyValueChoicesWriter::endKeyValueChoices,                 py::rv_policy::reference);
+
+	py::class_<FGDWriter::EntityWriter::KeyValueFlagsWriter>(toolpp, "FGDWriterEntityWriterKeyValueFlagsWriter")
+	        .def("flag", &FGDWriter::EntityWriter::KeyValueFlagsWriter::flag, py::arg("value"), py::arg("display_name"), py::arg("enabled_by_default"), py::arg("description") = "", py::rv_policy::reference)
+			.def("end_key_value_flags", &FGDWriter::EntityWriter::KeyValueFlagsWriter::endKeyValueFlags,                                                                             py::rv_policy::reference);
+
+	py::class_<FGDWriter::EntityWriter>(toolpp, "FGDWriterEntityWriter")
+	        .def("key_value", &FGDWriter::EntityWriter::keyValue, py::arg("name"), py::arg("value_type"), py::arg("display_name") = "", py::arg("value_default") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false, py::rv_policy::reference)
+			.def("begin_key_value_choices", &FGDWriter::EntityWriter::beginKeyValueChoices, py::arg("name"), py::arg("display_name") = "", py::arg("value_default") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false)
+			.def("begin_key_value_flags", &FGDWriter::EntityWriter::beginKeyValueFlags, py::arg("name"), py::arg("display_name") = "", py::arg("description") = "", py::arg("readonly") = false, py::arg("report") = false)
+			.def("input", &FGDWriter::EntityWriter::input, py::arg("name"), py::arg("value_type"), py::arg("description") = "", py::rv_policy::reference)
+			.def("output", &FGDWriter::EntityWriter::output, py::arg("name"), py::arg("value_type"), py::arg("description") = "", py::rv_policy::reference)
+			.def("end_entity", &FGDWriter::EntityWriter::endEntity, py::rv_policy::reference);
+
+	py::class_<FGDWriter>(toolpp, "FGDWriter")
+	        .def_static("begin", &FGDWriter::begin)
+			.def("include", &FGDWriter::include, py::arg("fgd_path"), py::rv_policy::reference)
+			.def("version", &FGDWriter::version, py::arg("version"), py::rv_policy::reference)
+			.def("map_size", &FGDWriter::mapSize, py::arg("map_size"), py::rv_policy::reference)
+			.def("material_exclusion_dirs", &FGDWriter::materialExclusionDirs, py::arg("material_exclusion_dirs"), py::rv_policy::reference)
+			.def("begin_auto_visgroup", &FGDWriter::beginAutoVisGroup, py::arg("parent_name"))
+			.def("begin_entity", &FGDWriter::beginEntity, py::arg("class_type"), py::arg("class_properties"), py::arg("name"), py::arg("description") = "")
+	        .def("bake", [](const FGDWriter& self) {
+		        const auto d = self.bake();
+		        return py::bytes{d.data(), d.size()};
+	        })
+	        .def("bake_to_file", py::overload_cast<const std::string&>(&FGDWriter::bake, py::const_), py::arg("path"));
+}
+
+} // namespace vcryptpp
diff --git a/lang/python/src/vcryptpp.h b/lang/python/src/vcryptpp.h
new file mode 100644
index 000000000..1ae642783
--- /dev/null
+++ b/lang/python/src/vcryptpp.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/string_view.h>
+
+namespace py = nanobind;
+
+#include <vcryptpp/vcryptpp.h>
+
+namespace vcryptpp {
+
+inline void register_python(py::module_& m) {
+	auto vcryptpp = m.def_submodule("vcryptpp");
+	using namespace vcryptpp;
+
+	{
+		auto VFONT = vcryptpp.def_submodule("VFONT");
+		using namespace VFONT;
+
+		VFONT.attr("IDENTIFIER") = IDENTIFIER;
+
+		VFONT.attr("MAGIC") = MAGIC;
+
+		VFONT.def("decrypt_bytes", [](const py::bytes& data) {
+			const auto d = decrypt({reinterpret_cast<const std::byte*>(data.data()), data.size()});
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("data"));
+	}
+
+	{
+		auto VICE = vcryptpp.def_submodule("VICE");
+		using namespace VICE;
+
+		{
+			auto KnownCodes = VICE.def_submodule("KnownCodes");
+			using namespace KnownCodes;
+
+			KnownCodes.attr("DEFAULT")                         = DEFAULT;
+			KnownCodes.attr("CONTAGION_WEAPONS")               = CONTAGION_WEAPONS;
+			KnownCodes.attr("CONTAGION_SCRIPTS")               = CONTAGION_SCRIPTS;
+			KnownCodes.attr("COUNTER_STRIKE_SOURCE")           = COUNTER_STRIKE_SOURCE;
+			KnownCodes.attr("COUNTER_STRIKE_GLOBAL_OFFENSIVE") = COUNTER_STRIKE_GLOBAL_OFFENSIVE;
+			KnownCodes.attr("COUNTER_STRIKE_2")                = COUNTER_STRIKE_2;
+			KnownCodes.attr("COUNTER_STRIKE_PROMOD")           = COUNTER_STRIKE_PROMOD;
+			KnownCodes.attr("DAY_OF_DEFEAT_SOURCE")            = DAY_OF_DEFEAT_SOURCE;
+			KnownCodes.attr("DYSTOPIA_1_2")                    = DYSTOPIA_1_2;
+			KnownCodes.attr("DYSTOPIA_1_3")                    = DYSTOPIA_1_3;
+			KnownCodes.attr("GOLDEN_EYE_SOURCE")               = GOLDEN_EYE_SOURCE;
+			KnownCodes.attr("HALF_LIFE_2_CTF")                 = HALF_LIFE_2_CTF;
+			KnownCodes.attr("HALF_LIFE_2_DM")                  = HALF_LIFE_2_DM;
+			KnownCodes.attr("INSURGENCY")                      = INSURGENCY;
+			KnownCodes.attr("LEFT_4_DEAD_2")                   = LEFT_4_DEAD_2;
+			KnownCodes.attr("NO_MORE_ROOM_IN_HELL")            = NO_MORE_ROOM_IN_HELL;
+			KnownCodes.attr("NUCLEAR_DAWN")                    = NUCLEAR_DAWN;
+			KnownCodes.attr("TACTICAL_INTERVENTION")           = TACTICAL_INTERVENTION;
+			KnownCodes.attr("TEAM_FORTRESS_2")                 = TEAM_FORTRESS_2;
+			KnownCodes.attr("TEAM_FORTRESS_2_ITEMS")           = TEAM_FORTRESS_2_ITEMS;
+			KnownCodes.attr("THE_SHIP")                        = THE_SHIP;
+			KnownCodes.attr("ZOMBIE_PANIC_SOURCE")             = ZOMBIE_PANIC_SOURCE;
+
+			KnownCodes.attr("EKV_GPU_DEFAULT")       = EKV_GPU_DEFAULT;
+			KnownCodes.attr("EKV_GPU_ALIEN_SWARM")   = EKV_GPU_ALIEN_SWARM;
+			KnownCodes.attr("EKV_GPU_LEFT_4_DEAD_1") = EKV_GPU_LEFT_4_DEAD_1;
+			KnownCodes.attr("EKV_GPU_LEFT_4_DEAD_2") = EKV_GPU_LEFT_4_DEAD_2;
+			KnownCodes.attr("EKV_GPU_PORTAL_2")      = EKV_GPU_PORTAL_2;
+		}
+
+		VICE.def("decrypt_bytes", [](const py::bytes& data, std::string_view code = KnownCodes::DEFAULT) {
+			const auto d = decrypt({reinterpret_cast<const std::byte*>(data.data()), data.size()}, code);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT);
+
+		VICE.def("decrypt_str", [](std::string_view data, std::string_view code = KnownCodes::DEFAULT) -> std::string {
+			const auto d = decrypt({reinterpret_cast<const std::byte*>(data.data()), data.size()}, code);
+			return {reinterpret_cast<const char*>(d.data()), d.size()};
+		}, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT);
+
+		VICE.def("encrypt_bytes", [](const py::bytes& data, std::string_view code = KnownCodes::DEFAULT) {
+			const auto d = encrypt({reinterpret_cast<const std::byte*>(data.data()), data.size()}, code);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT);
+
+		VICE.def("encrypt_str", [](std::string_view data, std::string_view code = KnownCodes::DEFAULT) -> std::string {
+			const auto d = encrypt({reinterpret_cast<const std::byte*>(data.data()), data.size()}, code);
+			return {reinterpret_cast<const char*>(d.data()), d.size()};
+		}, py::arg("data"), py::arg("code") = KnownCodes::DEFAULT);
+	}
+}
+
+} // namespace vcryptpp
diff --git a/lang/python/src/vtfpp.h b/lang/python/src/vtfpp.h
new file mode 100644
index 000000000..92e0c6b32
--- /dev/null
+++ b/lang/python/src/vtfpp.h
@@ -0,0 +1,436 @@
+#pragma once
+
+#include <tuple>
+
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/pair.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/string_view.h>
+#include <nanobind/stl/tuple.h>
+
+namespace py = nanobind;
+
+#include <vtfpp/vtfpp.h>
+
+namespace vtfpp {
+
+void register_python(py::module_& m) {
+	using namespace vtfpp;
+	auto vtfpp = m.def_submodule("vtfpp");
+
+	py::enum_<ImageFormat>(vtfpp, "ImageFormat")
+			.value("RGBA8888",           ImageFormat::RGBA8888)
+			.value("ABGR8888",           ImageFormat::ABGR8888)
+			.value("RGB888",             ImageFormat::RGB888)
+			.value("BGR888",             ImageFormat::BGR888)
+			.value("RGB565",             ImageFormat::RGB565)
+			.value("I8",                 ImageFormat::I8)
+			.value("IA88",               ImageFormat::IA88)
+			.value("P8",                 ImageFormat::P8)
+			.value("A8",                 ImageFormat::A8)
+			.value("RGB888_BLUESCREEN",  ImageFormat::RGB888_BLUESCREEN)
+			.value("BGR888_BLUESCREEN",  ImageFormat::BGR888_BLUESCREEN)
+			.value("ARGB8888",           ImageFormat::ARGB8888)
+			.value("BGRA8888",           ImageFormat::BGRA8888)
+			.value("DXT1",               ImageFormat::DXT1)
+			.value("DXT3",               ImageFormat::DXT3)
+			.value("DXT5",               ImageFormat::DXT5)
+			.value("BGRX8888",           ImageFormat::BGRX8888)
+			.value("BGR565",             ImageFormat::BGR565)
+			.value("BGRX5551",           ImageFormat::BGRX5551)
+			.value("BGRA4444",           ImageFormat::BGRA4444)
+			.value("DXT1_ONE_BIT_ALPHA", ImageFormat::DXT1_ONE_BIT_ALPHA)
+			.value("BGRA5551",           ImageFormat::BGRA5551)
+			.value("UV88",               ImageFormat::UV88)
+			.value("UVWQ8888",           ImageFormat::UVWQ8888)
+			.value("RGBA16161616F",      ImageFormat::RGBA16161616F)
+			.value("RGBA16161616",       ImageFormat::RGBA16161616)
+			.value("UVLX8888",           ImageFormat::UVLX8888)
+			.value("R32F",               ImageFormat::R32F)
+			.value("RGB323232F",         ImageFormat::RGB323232F)
+			.value("RGBA32323232F",      ImageFormat::RGBA32323232F)
+			.value("RG1616F",            ImageFormat::RG1616F)
+			.value("RG3232F",            ImageFormat::RG3232F)
+			.value("RGBX8888",           ImageFormat::RGBX8888)
+			.value("EMPTY",              ImageFormat::EMPTY)
+			.value("ATI2N",              ImageFormat::ATI2N)
+			.value("ATI1N",              ImageFormat::ATI1N)
+			.value("RGBA1010102",        ImageFormat::RGBA1010102)
+			.value("BGRA1010102",        ImageFormat::BGRA1010102)
+			.value("R16F",               ImageFormat::R16F)
+			.value("R8",                 ImageFormat::R8)
+			.value("BC7",                ImageFormat::BC7)
+			.value("BC6H",               ImageFormat::BC6H)
+			.export_values();
+
+	{
+		using namespace ImageFormatDetails;
+		auto ImageFormatDetails = vtfpp.def_submodule("ImageFormatDetails");
+
+		ImageFormatDetails.def("red",               &red,               py::arg("format"));
+		ImageFormatDetails.def("decompressedRed",   &decompressedRed,   py::arg("format"));
+		ImageFormatDetails.def("green",             &green,             py::arg("format"));
+		ImageFormatDetails.def("decompressedGreen", &decompressedGreen, py::arg("format"));
+		ImageFormatDetails.def("blue",              &blue,              py::arg("format"));
+		ImageFormatDetails.def("decompressedBlue",  &decompressedBlue,  py::arg("format"));
+		ImageFormatDetails.def("alpha",             &alpha,             py::arg("format"));
+		ImageFormatDetails.def("decompressedAlpha", &decompressedAlpha, py::arg("format"));
+		ImageFormatDetails.def("bpp",               &bpp,               py::arg("format"));
+		ImageFormatDetails.def("containerFormat",   &containerFormat,   py::arg("format"));
+		ImageFormatDetails.def("large",             &large,             py::arg("format"));
+		ImageFormatDetails.def("decimal",           &decimal,           py::arg("format"));
+		ImageFormatDetails.def("compressed",        &compressed,        py::arg("format"));
+		ImageFormatDetails.def("transparent",       &transparent,       py::arg("format"));
+		ImageFormatDetails.def("opaque",            &opaque,            py::arg("format"));
+
+		ImageFormatDetails.def("get_data_length", py::overload_cast<ImageFormat, uint16_t, uint16_t, uint16_t>(&getDataLength), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("slice_count") = 1);
+		ImageFormatDetails.def("get_data_length_extended", py::overload_cast<ImageFormat, uint8_t, uint16_t, uint8_t, uint16_t, uint16_t, uint16_t>(&getDataLength), py::arg("format"), py::arg("mip_count"), py::arg("frame_count"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice_count"));
+		ImageFormatDetails.def("get_data_position", [](ImageFormat format, uint8_t mip, uint8_t mipCount, uint16_t frame, uint16_t frameCount, uint8_t face, uint8_t faceCount, uint16_t width, uint16_t height, uint16_t slice = 0, uint16_t sliceCount = 1) -> std::pair<uint32_t, uint32_t> {
+			uint32_t offset, length;
+			if (getDataPosition(offset, length, format, mip, mipCount, frame, frameCount, face, faceCount, width, height, slice, sliceCount)) {
+				return {offset, length};
+			}
+			return {0, 0};
+		}, py::arg("format"), py::arg("mip"), py::arg("mip_count"), py::arg("frame"), py::arg("frame_count"), py::arg("face"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice") = 0, py::arg("slice_count") = 1);
+	}
+
+	{
+		using namespace ImageDimensions;
+		auto ImageDimensions = vtfpp.def_submodule("ImageDimensions");
+
+		ImageDimensions.def("get_mip_dim", &getMipDim, py::arg("mip"), py::arg("dim"));
+		ImageDimensions.def("get_recommended_mip_count_for_dims", &getRecommendedMipCountForDims, py::arg("format"), py::arg("width"), py::arg("height"));
+	}
+
+	// Skip ImagePixel, difficult to bind
+
+	{
+		using namespace ImageConversion;
+		auto ImageConversion = vtfpp.def_submodule("ImageConversion");
+
+		ImageConversion.def("convert_image_data_to_format", [](const py::bytes& imageData, ImageFormat oldFormat, ImageFormat newFormat, uint16_t width, uint16_t height) {
+			const auto d = convertImageDataToFormat({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, oldFormat, newFormat, width, height);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("image_data"), py::arg("old_format"), py::arg("new_format"), py::arg("width"), py::arg("height"));
+
+		ImageConversion.def("convert_several_image_data_to_format", [](const py::bytes& imageData, ImageFormat oldFormat, ImageFormat newFormat, uint8_t mipCount, uint16_t frameCount, uint16_t faceCount, uint16_t width, uint16_t height, uint16_t sliceCount) {
+			const auto d = convertSeveralImageDataToFormat({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, oldFormat, newFormat, mipCount, frameCount, faceCount, width, height, sliceCount);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("image_data"), py::arg("old_format"), py::arg("new_format"), py::arg("mip_count"), py::arg("frame_count"), py::arg("face_count"), py::arg("width"), py::arg("height"), py::arg("slice_count"));
+
+		py::enum_<FileFormat>(ImageConversion, "FileFormat")
+				.value("DEFAULT", FileFormat::DEFAULT)
+				.value("PNG",     FileFormat::PNG)
+				.value("JPEG",    FileFormat::JPEG)
+				.value("BMP",     FileFormat::BMP)
+				.value("TGA",     FileFormat::TGA)
+				.value("HDR",     FileFormat::HDR)
+				.export_values();
+
+		ImageConversion.def("get_default_file_format_for_image_format", &getDefaultFileFormatForImageFormat, py::arg("format"));
+
+		ImageConversion.def("convert_image_data_to_file", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat = FileFormat::DEFAULT) {
+			const auto d = convertImageDataToFile({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, fileFormat);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("file_format") = FileFormat::DEFAULT);
+
+		ImageConversion.def("convert_file_to_image_data", [](const py::bytes& fileData) -> std::tuple<py::bytes, ImageFormat, int, int, int> {
+			ImageFormat format;
+			int width, height, frame;
+			const auto d = convertFileToImageData({reinterpret_cast<const std::byte*>(fileData.data()), fileData.size()}, format, width, height, frame);
+			return {py::bytes{d.data(), d.size()}, format, width, height, frame};
+		}, py::arg("file_data"));
+
+		py::enum_<ResizeEdge>(ImageConversion, "ResizeEdge")
+				.value("CLAMP",   ResizeEdge::CLAMP)
+				.value("REFLECT", ResizeEdge::REFLECT)
+				.value("WRAP",    ResizeEdge::WRAP)
+				.value("ZERO",    ResizeEdge::ZERO)
+				.export_values();
+
+		py::enum_<ResizeFilter>(ImageConversion, "ResizeFilter")
+				.value("DEFAULT",       ResizeFilter::DEFAULT)
+				.value("BOX",           ResizeFilter::BOX)
+				.value("BILINEAR",      ResizeFilter::BILINEAR)
+				.value("CUBIC_BSPLINE", ResizeFilter::CUBIC_BSPLINE)
+				.value("CATMULLROM",    ResizeFilter::CATMULLROM)
+				.value("MITCHELL",      ResizeFilter::MITCHELL)
+				.export_values();
+
+		py::enum_<ResizeMethod>(ImageConversion, "ResizeMethod")
+				.value("NONE",                 ResizeMethod::NONE)
+				.value("POWER_OF_TWO_BIGGER",  ResizeMethod::POWER_OF_TWO_BIGGER)
+				.value("POWER_OF_TWO_SMALLER", ResizeMethod::POWER_OF_TWO_SMALLER)
+				.value("POWER_OF_TWO_NEAREST", ResizeMethod::POWER_OF_TWO_NEAREST)
+				.export_values();
+
+		ImageConversion.def("get_resized_dim", &getResizedDim, py::arg("n"), py::arg("resize_method"));
+		ImageConversion.def("get_resized_dims", [](uint16_t width, ResizeMethod widthResize, uint16_t height, ResizeMethod heightResize) -> std::pair<uint16_t, uint16_t> {
+			setResizedDims(width, widthResize, height, heightResize);
+			return {width, height};
+		}, py::arg("width"), py::arg("resize_width"), py::arg("height"), py::arg("resize_height"));
+
+		ImageConversion.def("resize_image_data", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t newWidth, uint16_t height, uint16_t newHeight, bool srgb, ResizeFilter filter, ResizeEdge edge = ResizeEdge::CLAMP) {
+			const auto d = resizeImageData({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, newWidth, height, newHeight, srgb, filter, edge);
+			return py::bytes{d.data(), d.size()};
+		}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("new_width"), py::arg("height"), py::arg("new_height"), py::arg("srgb"), py::arg("filter"), py::arg("edge") = ResizeEdge::CLAMP);
+
+		ImageConversion.def("resize_image_data_strict", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t newWidth, ResizeMethod widthResize, uint16_t height, uint16_t newHeight, ResizeMethod heightResize, bool srgb, ResizeFilter filter, ResizeEdge edge = ResizeEdge::CLAMP) -> std::tuple<py::bytes, int, int> {
+			uint16_t widthOut, heightOut;
+			const auto d = resizeImageDataStrict({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, newWidth, widthOut, widthResize, height, newHeight, heightOut, heightResize, srgb, filter, edge);
+			return {py::bytes{d.data(), d.size()}, widthOut, heightOut};
+		}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("new_width"), py::arg("width_resize"), py::arg("height"), py::arg("new_height"), py::arg("height_resize"), py::arg("srgb"), py::arg("filter"), py::arg("edge") = ResizeEdge::CLAMP);
+
+		// Skip extractChannelFromImageData, difficult to bind
+		// Skip applyChannelToImageData, difficult to bind
+	}
+
+	py::class_<PPL::Image>(vtfpp, "PPLImage")
+			.def_ro("width",  &PPL::Image::width)
+			.def_ro("height", &PPL::Image::height)
+			.def_prop_ro("data", [](const PPL::Image& self) {
+				return py::bytes{self.data.data(), self.data.size()};
+			});
+
+	py::class_<PPL>(vtfpp, "PPL")
+			.def(py::init<uint32_t, ImageFormat, uint32_t>(), py::arg("checksum"), py::arg("format") = ImageFormat::RGB888, py::arg("version") = 0)
+			.def("__init__", [](PPL* self, const py::bytes& pplData) {
+				return new(self) PPL{{reinterpret_cast<const std::byte*>(pplData.data()), pplData.size()}};
+			}, py::arg("ppl_data"))
+			.def(py::init<const std::string&>(), py::arg("path"))
+			.def("__bool__", &PPL::operator bool, py::is_operator())
+			.def_prop_rw("version", &PPL::getVersion, &PPL::setVersion)
+			.def_prop_rw("checksum", &PPL::getChecksum, &PPL::setChecksum)
+			.def_prop_rw("format", &PPL::getFormat, &PPL::setFormat)
+			.def("has_image_for_lod", &PPL::hasImageForLOD, py::arg("lod"))
+			.def_prop_ro("image_lods", &PPL::getImageLODs)
+			.def("get_image_raw", [](const PPL& self, uint32_t lod = 0) -> std::optional<PPL::Image> {
+				const auto* image = self.getImageRaw(lod);
+				if (!image) {
+					return std::nullopt;
+				}
+				return *image;
+			}, py::arg("lod"))
+			.def("get_image_as", &PPL::getImageAs, py::arg("new_format"), py::arg("lod"))
+			.def("get_image_as_rgb888", &PPL::getImageAsRGB888, py::arg("lod"))
+			.def("set_image", [](PPL& self, const py::bytes& imageData, ImageFormat format, uint32_t width, uint32_t height, uint32_t lod = 0) {
+				self.setImage({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, lod);
+			}, py::arg("imageData"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("lod") = 0)
+			.def("set_image_resized", [](PPL& self, const py::bytes& imageData, ImageFormat format, uint32_t width, uint32_t height, uint32_t resizedWidth, uint32_t resizedHeight, uint32_t lod = 0, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR) {
+				self.setImage({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, resizedWidth, resizedHeight, lod, filter);
+			}, py::arg("imageData"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("resized_width"), py::arg("resized_height"), py::arg("lod") = 0, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR)
+			.def("set_image_from_file", py::overload_cast<const std::string&, uint32_t>(&PPL::setImage), py::arg("image_path"), py::arg("lod") = 0)
+			.def("set_image_resized_from_file", py::overload_cast<const std::string&, uint32_t, uint32_t, uint32_t, ImageConversion::ResizeFilter>(&PPL::setImage), py::arg("image_path"), py::arg("resized_width"), py::arg("resized_height"), py::arg("lod") = 0, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR)
+			.def("save_image", [](const PPL& self, uint32_t lod = 0, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) {
+				const auto d = self.saveImageToFile(lod, fileFormat);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("lod") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("save_image_to_file", py::overload_cast<const std::string&, uint32_t, ImageConversion::FileFormat>(&PPL::saveImageToFile, py::const_), py::arg("image_path"), py::arg("lod") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("bake", [](PPL& self) {
+				const auto d = self.bake();
+				return py::bytes{d.data(), d.size()};
+			})
+			.def("bake_to_file", py::overload_cast<const std::string&>(&PPL::bake), py::arg("ppl_path"));
+
+	vtfpp.attr("VTF_SIGNATURE") = VTF_SIGNATURE;
+
+	py::enum_<CompressionMethod>(vtfpp, "CompressionMethod")
+			.value("DEFLATE", CompressionMethod::DEFLATE)
+			.value("ZSTD",    CompressionMethod::ZSTD)
+			.export_values();
+
+	py::enum_<Resource::Type>(vtfpp, "ResourceType")
+			.value("UNKNOWN",             Resource::TYPE_UNKNOWN)
+			.value("THUMBNAIL_DATA",      Resource::TYPE_THUMBNAIL_DATA)
+			.value("IMAGE_DATA",          Resource::TYPE_IMAGE_DATA)
+			.value("PARTICLE_SHEET_DATA", Resource::TYPE_PARTICLE_SHEET_DATA)
+			.value("CRC",                 Resource::TYPE_CRC)
+			.value("LOD_CONTROL_INFO",    Resource::TYPE_LOD_CONTROL_INFO)
+			.value("EXTENDED_FLAGS",      Resource::TYPE_EXTENDED_FLAGS)
+			.value("KEYVALUES_DATA",      Resource::TYPE_KEYVALUES_DATA)
+			.value("AUX_COMPRESSION",     Resource::TYPE_AUX_COMPRESSION)
+			.export_values();
+
+	py::enum_<Resource::Flags>(vtfpp, "ResourceFlags")
+			.value("NONE",       Resource::FLAG_NONE)
+			.value("LOCAL_DATA", Resource::FLAG_LOCAL_DATA)
+			.export_values();
+
+	// Skip Resource, mostly useless outside C++
+
+	py::enum_<VTF::Flags>(vtfpp, "VTFFlags")
+			.value("NONE",                                    VTF::FLAG_NONE)
+			.value("POINT_SAMPLE",                            VTF::FLAG_POINT_SAMPLE)
+			.value("TRILINEAR",                               VTF::FLAG_TRILINEAR)
+			.value("CLAMP_S",                                 VTF::FLAG_CLAMP_S)
+			.value("CLAMP_T",                                 VTF::FLAG_CLAMP_T)
+			.value("ANISOTROPIC",                             VTF::FLAG_ANISOTROPIC)
+			.value("HINT_DXT5",                               VTF::FLAG_HINT_DXT5)
+			.value("SRGB",                                    VTF::FLAG_SRGB)
+			.value("NO_COMPRESS",                             VTF::FLAG_NO_COMPRESS)
+			.value("NORMAL",                                  VTF::FLAG_NORMAL)
+			.value("NO_MIP",                                  VTF::FLAG_NO_MIP)
+			.value("NO_LOD",                                  VTF::FLAG_NO_LOD)
+			.value("LOAD_LOWEST_MIPS",                        VTF::FLAG_LOAD_LOWEST_MIPS)
+			.value("PROCEDURAL",                              VTF::FLAG_PROCEDURAL)
+			.value("ONE_BIT_ALPHA",                           VTF::FLAG_ONE_BIT_ALPHA)
+			.value("MULTI_BIT_ALPHA",                         VTF::FLAG_MULTI_BIT_ALPHA)
+			.value("ENVMAP",                                  VTF::FLAG_ENVMAP)
+			.value("RENDERTARGET",                            VTF::FLAG_RENDERTARGET)
+			.value("DEPTH_RENDERTARGET",                      VTF::FLAG_DEPTH_RENDERTARGET)
+			.value("NO_DEBUG_OVERRIDE",                       VTF::FLAG_NO_DEBUG_OVERRIDE)
+			.value("SINGLE_COPY",                             VTF::FLAG_SINGLE_COPY)
+			.value("ONE_OVER_MIP_LEVEL_IN_ALPHA",             VTF::FLAG_ONE_OVER_MIP_LEVEL_IN_ALPHA)
+			.value("PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL", VTF::FLAG_PREMULTIPLY_COLOR_BY_ONE_OVER_MIP_LEVEL)
+			.value("NORMAL_TO_DUDV",                          VTF::FLAG_NORMAL_TO_DUDV)
+			.value("ALPHA_TEST_MIP_GENERATION",               VTF::FLAG_ALPHA_TEST_MIP_GENERATION)
+			.value("NO_DEPTH_BUFFER",                         VTF::FLAG_NO_DEPTH_BUFFER)
+			.value("NICE_FILTERED",                           VTF::FLAG_NICE_FILTERED)
+			.value("CLAMP_U",                                 VTF::FLAG_CLAMP_U)
+			.value("VERTEX_TEXTURE",                          VTF::FLAG_VERTEX_TEXTURE)
+			.value("SSBUMP",                                  VTF::FLAG_SSBUMP)
+			.value("UNFILTERABLE_OK",                         VTF::FLAG_UNFILTERABLE_OK)
+			.value("BORDER",                                  VTF::FLAG_BORDER)
+			.value("SPECVAR_RED",                             VTF::FLAG_SPECVAR_RED)
+			.value("SPECVAR_ALPHA",                           VTF::FLAG_SPECVAR_ALPHA)
+			.export_values();
+
+	py::class_<VTF::CreationOptions>(vtfpp, "VTFCreationOptions")
+			.def(py::init<>())
+			.def_rw("major_version",        &VTF::CreationOptions::majorVersion)
+			.def_rw("minor_version",        &VTF::CreationOptions::minorVersion)
+			.def_rw("output_format",        &VTF::CreationOptions::outputFormat)
+			.def_rw("width_resize_method",  &VTF::CreationOptions::widthResizeMethod)
+			.def_rw("height_resize_method", &VTF::CreationOptions::heightResizeMethod)
+			.def_rw("filter",               &VTF::CreationOptions::filter)
+			.def_rw("flags",                &VTF::CreationOptions::flags)
+			.def_rw("initial_frame_count",  &VTF::CreationOptions::initialFrameCount)
+			.def_rw("start_frame",          &VTF::CreationOptions::startFrame)
+			.def_rw("is_cubemap",           &VTF::CreationOptions::isCubeMap)
+			.def_rw("has_spheremap",        &VTF::CreationOptions::hasSphereMap)
+			.def_rw("initial_slice_count",  &VTF::CreationOptions::initialSliceCount)
+			.def_rw("create_mips",          &VTF::CreationOptions::createMips)
+			.def_rw("create_thumbnail",     &VTF::CreationOptions::createThumbnail)
+			.def_rw("create_reflectivity",  &VTF::CreationOptions::createReflectivity)
+			.def_rw("compression_level",    &VTF::CreationOptions::compressionLevel)
+			.def_rw("compression_method",   &VTF::CreationOptions::compressionMethod)
+			.def_rw("bumpmap_scale",        &VTF::CreationOptions::bumpMapScale);
+
+	py::class_<VTF>(vtfpp, "VTF")
+			.def_ro_static("FLAG_MASK_GENERATED", &VTF::FLAG_MASK_GENERATED)
+			.def_ro_static("FORMAT_UNCHANGED",    &VTF::FORMAT_UNCHANGED)
+			.def_ro_static("FORMAT_DEFAULT",      &VTF::FORMAT_DEFAULT)
+			.def_ro_static("MAX_RESOURCES",       &VTF::MAX_RESOURCES)
+			.def(py::init<>())
+			.def("__init__", [](VTF* self, const py::bytes& vtfData, bool parseHeaderOnly = false) {
+				return new(self) VTF{std::span{reinterpret_cast<const std::byte*>(vtfData.data()), vtfData.size()}, parseHeaderOnly};
+			}, py::arg("vtf_data"), py::arg("parse_header_only") = false)
+			.def(py::init<const std::string&, bool>(), py::arg("vtf_path"), py::arg("parse_header_only") = false)
+			.def("__bool__", &VTF::operator bool, py::is_operator())
+			.def_static("create_and_bake", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, const std::string& vtfPath, VTF::CreationOptions options) {
+				VTF::create({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, vtfPath, options);
+			}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_static("create_blank_and_bake", py::overload_cast<ImageFormat, uint16_t, uint16_t, const std::string&, VTF::CreationOptions>(&VTF::create), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_static("create", [](const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, VTF::CreationOptions options) {
+				return VTF::create({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, options);
+			}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_static("create_blank", py::overload_cast<ImageFormat, uint16_t, uint16_t, VTF::CreationOptions>(&VTF::create), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_static("create_from_file_and_bake", py::overload_cast<const std::string&, const std::string&, VTF::CreationOptions>(&VTF::create), py::arg("image_path"), py::arg("vtf_path"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_static("create_from_file", py::overload_cast<const std::string&, VTF::CreationOptions>(&VTF::create), py::arg("image_path"), py::arg("creation_options") = VTF::CreationOptions{})
+			.def_prop_rw("version_major", &VTF::getMajorVersion, &VTF::setMajorVersion)
+			.def_prop_rw("version_minor", &VTF::getMinorVersion, &VTF::setMinorVersion)
+			.def_prop_rw("image_width_resize_method", &VTF::getImageWidthResizeMethod, &VTF::setImageWidthResizeMethod)
+			.def_prop_rw("image_height_resize_method", &VTF::getImageHeightResizeMethod, &VTF::setImageHeightResizeMethod)
+			.def_prop_ro("width", &VTF::getWidth)
+			.def("width_for_mip", [](const VTF& self, uint8_t mip = 0) { return self.getWidth(mip); }, py::arg("mip") = 0)
+			.def_prop_ro("height", &VTF::getHeight)
+			.def("height_for_mip", [](const VTF& self, uint8_t mip = 0) { return self.getHeight(mip); }, py::arg("mip") = 0)
+			.def("set_size", &VTF::setSize, py::arg("width"), py::arg("height"), py::arg("filter"))
+			.def_prop_rw("flags", &VTF::getFlags, &VTF::setFlags)
+			.def("add_flags", &VTF::addFlags, py::arg("flags"))
+			.def("remove_flags", &VTF::removeFlags, py::arg("flags"))
+			.def_prop_ro("format", &VTF::getFormat)
+			.def("set_format", &VTF::setFormat, py::arg("new_format"), py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR)
+			.def_prop_rw("mip_count", &VTF::getMipCount, &VTF::setMipCount)
+			.def("set_recommended_mip_count", &VTF::setRecommendedMipCount)
+			.def("compute_mips", &VTF::computeMips, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR)
+			.def_prop_rw("frame_count", &VTF::getFrameCount, &VTF::setFrameCount)
+			.def_prop_ro("face_count", &VTF::getFaceCount)
+			.def("set_face_count", &VTF::setFaceCount, py::arg("is_cubemap"), py::arg("has_spheremap") = false)
+			.def_prop_rw("slice_count", &VTF::getSliceCount, &VTF::setSliceCount)
+			.def("set_frame_face_and_slice_count", &VTF::setFrameFaceAndSliceCount, py::arg("new_frame_count"), py::arg("is_cubemap"), py::arg("has_spheremap") = false, py::arg("new_slice_count") = 1)
+			.def_prop_rw("start_frame", &VTF::getStartFrame, &VTF::setStartFrame)
+			.def_prop_rw("reflectivity", &VTF::getReflectivity, &VTF::setReflectivity)
+			.def("compute_reflectivity", &VTF::computeReflectivity)
+			.def_prop_rw("bumpmap_scale", &VTF::getBumpMapScale, &VTF::setBumpMapScale)
+			.def_prop_ro("thumbnail_format", &VTF::getThumbnailFormat)
+			.def_prop_ro("thumbnail_width", &VTF::getThumbnailWidth)
+			.def_prop_ro("thumbnail_height", &VTF::getThumbnailHeight)
+			// Skip getResources
+			// Skip getResource
+			.def("set_particle_sheet_resource", [](VTF& self, const py::bytes& value) { return self.setParticleSheetResource({reinterpret_cast<const std::byte*>(value.data()), value.size()}); }, py::arg("value"))
+			.def("remove_particle_sheet_resource", &VTF::removeParticleSheetResource)
+			.def("set_crc_resource", &VTF::setCRCResource, py::arg("value"))
+			.def("remove_crc_resource", &VTF::removeCRCResource)
+			.def("set_lod_resource", &VTF::setLODResource, py::arg("u"), py::arg("v"))
+			.def("remove_lod_resource", &VTF::removeLODResource)
+			.def("set_extended_flags_resource", &VTF::setExtendedFlagsResource, py::arg("value"))
+			.def("remove_extended_flags_resource", &VTF::removeExtendedFlagsResource)
+			.def("set_keyvalues_data_resource", &VTF::setKeyValuesDataResource, py::arg("value"))
+			.def("remove_keyvalues_data_resource", &VTF::removeKeyValuesDataResource)
+			.def_prop_rw("compression_level", &VTF::getCompressionLevel, &VTF::setCompressionLevel)
+			.def_prop_rw("compression_method", &VTF::getCompressionMethod, &VTF::setCompressionMethod)
+			.def("has_image_data", &VTF::hasImageData)
+			.def("image_data_is_srgb", &VTF::imageDataIsSRGB)
+			.def("get_image_data_raw", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) {
+				const auto d = self.getImageDataRaw(mip, frame, face, slice);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0)
+			.def("get_image_data_as", [](const VTF& self, ImageFormat newFormat, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) {
+				const auto d = self.getImageDataAs(newFormat, mip, frame, face, slice);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("new_format"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0)
+			.def("get_image_data_as_rgba8888", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) {
+				const auto d = self.getImageDataAsRGBA8888(mip, frame, face, slice);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0)
+			.def("set_image", [](VTF& self, const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height, ImageConversion::ResizeFilter filter = ImageConversion::ResizeFilter::BILINEAR, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0) {
+				return self.setImage({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height, filter, mip, frame, face, slice);
+			}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"), py::arg("filter"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0)
+			.def("set_image_from_file", py::overload_cast<const std::string&, ImageConversion::ResizeFilter, uint8_t, uint16_t, uint8_t, uint16_t>(&VTF::setImage), py::arg("image_path"), py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0)
+			.def("save_image", [](const VTF& self, uint8_t mip = 0, uint16_t frame = 0, uint8_t face = 0, uint16_t slice = 0, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) {
+				const auto d = self.saveImageToFile(mip, frame, face, slice, fileFormat);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("save_image_to_file", py::overload_cast<const std::string&, uint8_t, uint16_t, uint8_t, uint16_t, ImageConversion::FileFormat>(&VTF::saveImageToFile, py::const_), py::arg("image_path"), py::arg("mip") = 0, py::arg("frame") = 0, py::arg("face") = 0, py::arg("slice") = 0, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("has_thumbnail_data", &VTF::hasThumbnailData)
+			.def("get_thumbnail_data_raw", [](const VTF& self) {
+				const auto d = self.getThumbnailDataRaw();
+				return py::bytes{d.data(), d.size()};
+			})
+			.def("get_thumbnail_data_as", [](const VTF& self, ImageFormat newFormat) {
+				const auto d = self.getThumbnailDataAs(newFormat);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("new_format"))
+			.def("get_thumbnail_data_as_rgba8888", [](const VTF& self) {
+				const auto d = self.getThumbnailDataAsRGBA8888();
+				return py::bytes{d.data(), d.size()};
+			})
+			.def("set_thumbnail", [](VTF& self, const py::bytes& imageData, ImageFormat format, uint16_t width, uint16_t height) {
+				return self.setThumbnail({reinterpret_cast<const std::byte*>(imageData.data()), imageData.size()}, format, width, height);
+			}, py::arg("image_data"), py::arg("format"), py::arg("width"), py::arg("height"))
+			.def("compute_thumbnail", &VTF::computeThumbnail, py::arg("filter") = ImageConversion::ResizeFilter::BILINEAR)
+			.def("remove_thumbnail", &VTF::removeThumbnail)
+			.def("save_thumbnail", [](const VTF& self, ImageConversion::FileFormat fileFormat = ImageConversion::FileFormat::DEFAULT) {
+				const auto d = self.saveThumbnailToFile(fileFormat);
+				return py::bytes{d.data(), d.size()};
+			}, py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("save_thumbnail_to_file", py::overload_cast<const std::string&, ImageConversion::FileFormat>(&VTF::saveThumbnailToFile, py::const_), py::arg("image_path"), py::arg("file_format") = ImageConversion::FileFormat::DEFAULT)
+			.def("bake", [](const VTF& self) {
+				const auto d = self.bake();
+				return py::bytes{d.data(), d.size()};
+			})
+			.def("bake_to_file", py::overload_cast<const std::string&>(&VTF::bake, py::const_), py::arg("vtf_path"));
+}
+
+} // namespace vtfpp
diff --git a/src/bsppp/_bsppp.cmake b/src/bsppp/_bsppp.cmake
index aeb0cada1..0a6fa67c2 100644
--- a/src/bsppp/_bsppp.cmake
+++ b/src/bsppp/_bsppp.cmake
@@ -1,4 +1,5 @@
 add_pretty_parser(bsppp
+        DEPS sourcepp_parser
         SOURCES
         "${CMAKE_CURRENT_SOURCE_DIR}/include/bsppp/bsppp.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/bsppp/LumpData.h"
diff --git a/src/bsppp/bsppp.cpp b/src/bsppp/bsppp.cpp
index a6b36c79b..df0566558 100644
--- a/src/bsppp/bsppp.cpp
+++ b/src/bsppp/bsppp.cpp
@@ -202,7 +202,7 @@ void BSP::writeLump(BSPLump lumpIndex, std::span<const std::byte> data, bool con
 
 			// If we have the space to add padding (we should), then do so
 			// This should never fail for well-constructed BSP files
-			auto padding = math::getPaddingForAlignment(4, currentOffset);
+			auto padding = math::paddingForAlignment(4, currentOffset);
 			if (padding && i < lumpIDs.size() - 1 && currentOffset + padding <= this->header.lumps[lumpIDs[i + 1]].offset) {
 				currentOffset += padding;
 			}
diff --git a/src/gamepp/gamepp.cpp b/src/gamepp/gamepp.cpp
index b44892da9..d07b4aef5 100644
--- a/src/gamepp/gamepp.cpp
+++ b/src/gamepp/gamepp.cpp
@@ -13,7 +13,7 @@ using namespace sourcepp;
 #include <Windows.h>
 
 std::optional<GameInstance> GameInstance::find(std::string_view windowNameOverride) {
-	GameInstance instance;
+	GameInstance instance{};
 
 	if (!windowNameOverride.empty()) {
 		instance.hwnd = FindWindowA(windowNameOverride.data(), nullptr);
@@ -34,7 +34,7 @@ std::optional<GameInstance> GameInstance::find(std::string_view windowNameOverri
 std::string GameInstance::getWindowTitle() const {
 	// This should be large enough
 	std::string title(512, '\0');
-	if (auto size = GetWindowTextA(reinterpret_cast<HWND>(this->hwnd), title.data(), title.length())) {
+	if (auto size = GetWindowTextA(reinterpret_cast<HWND>(this->hwnd), title.data(), static_cast<int>(title.length()))) {
 		title.resize(size);
 		return title;
 	}
diff --git a/src/kvpp/_kvpp.cmake b/src/kvpp/_kvpp.cmake
index 559d30749..aa6f21a75 100644
--- a/src/kvpp/_kvpp.cmake
+++ b/src/kvpp/_kvpp.cmake
@@ -1,4 +1,5 @@
 add_pretty_parser(kvpp
+        DEPS sourcepp_parser
         SOURCES
         "${CMAKE_CURRENT_SOURCE_DIR}/include/kvpp/KV1.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/kvpp/kvpp.h"
diff --git a/src/mdlpp/_mdlpp.cmake b/src/mdlpp/_mdlpp.cmake
index 86f313920..ebca313bc 100644
--- a/src/mdlpp/_mdlpp.cmake
+++ b/src/mdlpp/_mdlpp.cmake
@@ -1,4 +1,5 @@
 add_pretty_parser(mdlpp
+        DEPS sourcepp_parser
         PRECOMPILED_HEADERS
         "${CMAKE_CURRENT_SOURCE_DIR}/include/mdlpp/structs/Generic.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/mdlpp/structs/MDL.h"
diff --git a/src/mdlpp/structs/MDL.cpp b/src/mdlpp/structs/MDL.cpp
index 53132f466..b986e986a 100644
--- a/src/mdlpp/structs/MDL.cpp
+++ b/src/mdlpp/structs/MDL.cpp
@@ -87,7 +87,7 @@ bool MDL::open(const std::byte* data, std::size_t size) {
 			.read(bone.procType)
 			.read(bone.procIndex)
 			.read(bone.physicsBone);
-		parser::binary::readStringAtOffset(stream, bone.surfacePropName, std::ios::cur, sizeof(int32_t) * 12 + sizeof(math::Vec3f) * 4 + sizeof(math::Quat) * 2 + sizeof(math::Matrix<3,4>) + sizeof(Bone::Flags));
+		parser::binary::readStringAtOffset(stream, bone.surfacePropName, std::ios::cur, sizeof(int32_t) * 12 + sizeof(math::Vec3f) * 4 + sizeof(math::Quat) * 2 + sizeof(math::Mat3x4f) + sizeof(Bone::Flags));
 		stream.read(bone.contents);
 
 		// _unused0
diff --git a/src/sourcepp/_sourcepp.cmake b/src/sourcepp/_sourcepp.cmake
index fd4a50f8d..25695c85b 100644
--- a/src/sourcepp/_sourcepp.cmake
+++ b/src/sourcepp/_sourcepp.cmake
@@ -1,34 +1,17 @@
 list(APPEND ${PROJECT_NAME}_HEADERS
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/Adler32.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/CRC32.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/MD5.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/RSA.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/String.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Angles.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Float.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Integer.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Matrix.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/math/Vector.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Binary.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Text.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/FS.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Macros.h"
-        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/String.h")
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Math.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/String.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/Templates.h")
 
 add_library(${PROJECT_NAME} STATIC
         ${${PROJECT_NAME}_HEADERS}
-        "${CMAKE_CURRENT_LIST_DIR}/crypto/Adler32.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/crypto/CRC32.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/crypto/MD5.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/crypto/RSA.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/crypto/String.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/parser/Binary.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/parser/Text.cpp"
         "${CMAKE_CURRENT_LIST_DIR}/FS.cpp"
         "${CMAKE_CURRENT_LIST_DIR}/String.cpp")
 
 target_precompile_headers(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_HEADERS})
 
-target_link_libraries(${PROJECT_NAME} PUBLIC bufferstream cryptopp::cryptopp)
+target_link_libraries(${PROJECT_NAME} PUBLIC bufferstream sourcepp_half)
 
 target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
diff --git a/src/sourcepp/crypto/_crypto.cmake b/src/sourcepp/crypto/_crypto.cmake
new file mode 100644
index 000000000..eab77edff
--- /dev/null
+++ b/src/sourcepp/crypto/_crypto.cmake
@@ -0,0 +1,18 @@
+list(APPEND ${PROJECT_NAME}_crypto_HEADERS
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/Adler32.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/CRC32.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/MD5.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/RSA.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/crypto/String.h")
+
+add_library(${PROJECT_NAME}_crypto STATIC
+        ${${PROJECT_NAME}_crypto_HEADERS}
+        "${CMAKE_CURRENT_LIST_DIR}/Adler32.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/CRC32.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/MD5.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/RSA.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/String.cpp")
+
+target_precompile_headers(${PROJECT_NAME}_crypto PUBLIC ${${PROJECT_NAME}_crypto_HEADERS})
+
+target_link_libraries(${PROJECT_NAME}_crypto PUBLIC ${PROJECT_NAME} cryptopp::cryptopp)
diff --git a/src/sourcepp/parser/_parser.cmake b/src/sourcepp/parser/_parser.cmake
new file mode 100644
index 000000000..3ba7ce021
--- /dev/null
+++ b/src/sourcepp/parser/_parser.cmake
@@ -0,0 +1,12 @@
+list(APPEND ${PROJECT_NAME}_parser_HEADERS
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Binary.h"
+        "${CMAKE_CURRENT_SOURCE_DIR}/include/sourcepp/parser/Text.h")
+
+add_library(${PROJECT_NAME}_parser STATIC
+        ${${PROJECT_NAME}_parser_HEADERS}
+        "${CMAKE_CURRENT_LIST_DIR}/Binary.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/Text.cpp")
+
+target_precompile_headers(${PROJECT_NAME}_parser PUBLIC ${${PROJECT_NAME}_parser_HEADERS})
+
+target_link_libraries(${PROJECT_NAME}_parser PUBLIC ${PROJECT_NAME})
diff --git a/src/toolpp/CmdSeq.cpp b/src/toolpp/CmdSeq.cpp
index a2e06a3ea..1c9f924e2 100644
--- a/src/toolpp/CmdSeq.cpp
+++ b/src/toolpp/CmdSeq.cpp
@@ -12,96 +12,102 @@ using namespace toolpp;
 
 namespace {
 
-std::vector<std::byte> bakeBinary(const CmdSeq& cmdSeq) {
-	std::vector<std::byte> out;
-	BufferStream writer{out};
-
-	writer
-		.write("Worldcraft Command Sequences\r\n\x1a", 31)
-		.write<float>(cmdSeq.getVersion())
-		.write<uint32_t>(cmdSeq.getSequences().size());
-
-	for (const auto& [seqName, seqCommands] : cmdSeq.getSequences()) {
-		writer
-			.write(seqName, true, 128)
-			.write<uint32_t>(seqCommands.size());
+CmdSeq::Command::Special specialCmdFromString(std::string_view specialCmd) {
+	using enum CmdSeq::Command::Special;
+	if (string::iequals(specialCmd, "change_dir")) {
+		return CHANGE_DIRECTORY;
+	}
+	if (string::iequals(specialCmd, "copy_file")) {
+		return COPY_FILE;
+	}
+	if (string::iequals(specialCmd, "delete_file")) {
+		return DELETE_FILE;
+	}
+	if (string::iequals(specialCmd, "rename_file")) {
+		return RENAME_FILE;
+	}
+	if (string::iequals(specialCmd, "copy_file_if_exists")) {
+		return COPY_FILE_IF_EXISTS;
+	}
+	return NONE;
+}
 
-		for (const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] : seqCommands) {
-			writer
-				.write<uint32_t>(enabled)
-				.write(special)
-				.write(executable, true, 260)
-				.write(arguments, true, 260)
-				.write<uint32_t>(true)
-				.write<uint32_t>(ensureFileExists)
-				.write(pathToTheoreticallyExistingFile, true, 260)
-				.write<uint32_t>(useProcessWindow);
+} // namespace
 
-			if (cmdSeq.getVersion() > 0.15f) {
-				writer.write<uint32_t>(waitForKeypress);
-			}
-		}
+std::string CmdSeq::Command::getSpecialDisplayNameFor(Special special) {
+	switch (special) {
+		case Special::NONE:
+			break;
+		case Special::CHANGE_DIRECTORY:
+			return "Change Directory";
+		case Special::COPY_FILE:
+			return "Copy File";
+		case Special::DELETE_FILE:
+			return "Delete File";
+		case Special::RENAME_FILE:
+			return "Rename File";
+		case Special::COPY_FILE_IF_EXISTS:
+			return "Copy File If It Exists";
 	}
-
-	out.resize(writer.size());
-	return out;
+	return "None";
 }
 
-std::vector<std::byte> bakeKeyValues(const CmdSeq& cmdSeq) {
-	KV1Writer kv;
-	auto& kvFile = kv.addChild("Command Sequences");
-	for (const auto& [seqName, seqCommands] : cmdSeq.getSequences()) {
-		auto& kvSequence = kvFile.addChild(seqName);
-		for (int i = 1; i <= seqCommands.size(); i++) {
-			const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands[i - 1];
-			auto& kvCommand = kvSequence.addChild(std::to_string(i));
-			kvCommand["enabled"] = enabled;
-			kvCommand["special_cmd"] = static_cast<int>(special);
-			kvCommand["run"] = executable;
-			kvCommand["params"] = arguments;
-			kvCommand["ensure_check"] = ensureFileExists;
-			kvCommand["ensure_fn"] = pathToTheoreticallyExistingFile;
-			kvCommand["use_process_wnd"] = useProcessWindow;
-			kvCommand["no_wait"] = waitForKeypress;
-		}
+std::string CmdSeq::Command::getExecutableDisplayName() const {
+	if (this->special != Command::Special::NONE) {
+		return getSpecialDisplayNameFor(this->special);
 	}
-
-	const auto kvStr = kv.bake();
-	std::vector<std::byte> out;
-	out.resize(kvStr.length());
-	std::memcpy(out.data(), kvStr.data(), kvStr.length());
-	return out;
+	return this->executable;
 }
 
-} // namespace
-
-CmdSeq::CmdSeq(std::string path_)
-		: version(0.f)
-		, path(std::move(path_)) {
+CmdSeq::CmdSeq(const std::string& path)
+		: type(Type::INVALID)
+		, version(0.2f) {
 	{
 		FileStream reader{path};
 		if (!reader) {
 			return;
 		}
 		if (auto binStr = reader.seek_in(0).read_string(10); binStr == "Worldcraft") {
-			this->usingKeyValues = false;
+			this->type = Type::BINARY;
 		} else {
 			auto kvStr = reader.seek_in(0).read_string(19);
 			string::toLower(kvStr);
 			if (kvStr == "\"command sequences\"") {
-				this->usingKeyValues = true;
+				this->type = Type::KEYVALUES_STRATA;
 			} else {
 				return;
 			}
 		}
 	}
-	if (this->usingKeyValues) {
-		this->parseKeyValues(path);
-	} else {
-		this->parseBinary(path);
+	switch (this->type) {
+		using enum Type;
+		case INVALID:
+			break;
+		case BINARY:
+			this->parseBinary(path);
+			break;
+		case KEYVALUES_STRATA:
+			this->parseKeyValuesStrata(path);
+			break;
 	}
 }
 
+CmdSeq::CmdSeq(Type type_)
+		: type(type_)
+		, version(0.2f) {}
+
+CmdSeq::operator bool() const {
+	return this->type != Type::INVALID;
+}
+
+CmdSeq::Type CmdSeq::getType() const {
+	return this->type;
+}
+
+void CmdSeq::setType(Type type_) {
+	this->type = type_;
+}
+
 float CmdSeq::getVersion() const {
 	return this->version;
 }
@@ -132,6 +138,9 @@ void CmdSeq::parseBinary(const std::string& path) {
 			auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands.emplace_back();
 			enabled = reader.read<int32_t>() & 0xFF;
 			special = reader.read<Command::Special>();
+			if (special == static_cast<Command::Special>(Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS)) {
+				special = Command::Special::COPY_FILE_IF_EXISTS;
+			}
 			executable = reader.read_string(260);
 			arguments = reader.read_string(260);
 			reader.skip_in<int32_t>();
@@ -145,7 +154,7 @@ void CmdSeq::parseBinary(const std::string& path) {
 	}
 }
 
-void CmdSeq::parseKeyValues(const std::string& path) {
+void CmdSeq::parseKeyValuesStrata(const std::string& path) {
 	this->version = 0.2f;
 
 	const KV1 cmdSeq{fs::readFileText(path)};
@@ -156,7 +165,15 @@ void CmdSeq::parseKeyValues(const std::string& path) {
 		for (const auto& kvCommand : kvSequence.getChildren()) {
 			auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands.emplace_back();
 			string::toBool(kvCommand["enabled"].getValue(), enabled);
-			string::toInt(kvCommand["special_cmd"].getValue(), reinterpret_cast<std::underlying_type_t<Command::Special>&>(special));
+			const auto specialCmd = kvCommand["special_cmd"].getValue();
+			if (parser::text::isNumber(specialCmd)) {
+				string::toInt(specialCmd, reinterpret_cast<std::underlying_type_t<Command::Special>&>(special));
+				if (special == Command::SPECIAL_COPY_FILE_IF_EXISTS_ALIAS) {
+					special = Command::Special::COPY_FILE_IF_EXISTS;
+				}
+			} else {
+				special = ::specialCmdFromString(specialCmd);
+			}
 			executable = kvCommand["run"].getValue();
 			arguments = kvCommand["params"].getValue();
 			string::toBool(kvCommand["ensure_check"].getValue(), ensureFileExists);
@@ -175,33 +192,85 @@ const std::vector<CmdSeq::Sequence>& CmdSeq::getSequences() const {
 	return this->sequences;
 }
 
-std::vector<std::byte> CmdSeq::bake() const {
-	return this->bake(this->usingKeyValues);
+std::vector<std::byte> CmdSeq::bakeBinary() const {
+	std::vector<std::byte> out;
+	BufferStream writer{out};
+
+	writer
+		.write("Worldcraft Command Sequences\r\n\x1a", 31)
+		.write<float>(this->getVersion())
+		.write<uint32_t>(this->getSequences().size());
+
+	for (const auto& [seqName, seqCommands] : this->getSequences()) {
+		writer
+			.write(seqName, true, 128)
+			.write<uint32_t>(seqCommands.size());
+
+		for (const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] : seqCommands) {
+			writer
+				.write<uint32_t>(enabled)
+				.write(special)
+				.write(executable, true, 260)
+				.write(arguments, true, 260)
+				.write<uint32_t>(true)
+				.write<uint32_t>(ensureFileExists)
+				.write(pathToTheoreticallyExistingFile, true, 260)
+				.write<uint32_t>(useProcessWindow);
+
+			if (this->getVersion() > 0.15f) {
+				writer.write<uint32_t>(waitForKeypress);
+			}
+		}
+	}
+
+	out.resize(writer.size());
+	return out;
 }
 
-std::vector<std::byte> CmdSeq::bake(bool overrideUsingKeyValues) const {
-	if (overrideUsingKeyValues) {
-		return ::bakeKeyValues(*this);
+std::vector<std::byte> CmdSeq::bakeKeyValuesStrata() const {
+	KV1Writer kv;
+	auto& kvFile = kv.addChild("Command Sequences");
+	for (const auto& [seqName, seqCommands] : this->getSequences()) {
+		auto& kvSequence = kvFile.addChild(seqName);
+		for (int i = 1; i <= seqCommands.size(); i++) {
+			const auto& [enabled, special, executable, arguments, ensureFileExists, pathToTheoreticallyExistingFile, useProcessWindow, waitForKeypress] = seqCommands[i - 1];
+			auto& kvCommand = kvSequence.addChild(std::to_string(i));
+			kvCommand["enabled"] = enabled;
+			kvCommand["special_cmd"] = static_cast<int>(special);
+			kvCommand["run"] = executable;
+			kvCommand["params"] = arguments;
+			kvCommand["ensure_check"] = ensureFileExists;
+			kvCommand["ensure_fn"] = pathToTheoreticallyExistingFile;
+			kvCommand["use_process_wnd"] = useProcessWindow;
+			kvCommand["no_wait"] = waitForKeypress;
+		}
 	}
-	return ::bakeBinary(*this);
+
+	const auto kvStr = kv.bake();
+	std::vector<std::byte> out;
+	out.resize(kvStr.length());
+	std::memcpy(out.data(), kvStr.data(), kvStr.length());
+	return out;
 }
 
-bool CmdSeq::bake(const std::string& path_) {
-	return this->bake(path_, this->usingKeyValues);
+std::vector<std::byte> CmdSeq::bake() const {
+	switch (this->type) {
+		using enum Type;
+		case INVALID:
+			return {};
+		case BINARY:
+			return this->bakeBinary();
+		case KEYVALUES_STRATA:
+			return this->bakeKeyValuesStrata();
+	}
+	return {};
 }
 
-bool CmdSeq::bake(const std::string& path_, bool overrideUsingKeyValues) {
-	FileStream writer{path_};
+bool CmdSeq::bake(const std::string& path) const {
+	FileStream writer{path};
 	if (!writer) {
 		return false;
 	}
-	this->path = path_;
-
-	writer.seek_out(0);
-	if (overrideUsingKeyValues) {
-		writer.write(::bakeKeyValues(*this));
-	} else {
-		writer.write(::bakeBinary(*this));
-	}
+	writer.seek_out(0).write(this->bake());
 	return true;
 }
diff --git a/src/toolpp/FGD.cpp b/src/toolpp/FGD.cpp
index f9f955ca8..d106e71c3 100644
--- a/src/toolpp/FGD.cpp
+++ b/src/toolpp/FGD.cpp
@@ -8,9 +8,11 @@
 
 #include <sourcepp/parser/Text.h>
 #include <sourcepp/FS.h>
+#include <sourcepp/Math.h>
 #include <sourcepp/String.h>
 
 using namespace sourcepp;
+using namespace std::string_view_literals;
 using namespace toolpp;
 
 namespace {
@@ -467,11 +469,11 @@ void writeOptionalKeyValueStrings(BufferStream& writer, std::initializer_list<st
 	static constexpr auto writeOptionalString = [](BufferStream& stream, const std::string& str) {
 		if (!str.empty()) {
 			stream
-				.write(" : \"", 4)
+				.write(" : \""sv, false)
 				.write(str, false)
 				.write('\"');
 		} else {
-			stream.write(" :", 2);
+			stream.write(" :"sv, false);
 		}
 	};
 	for (auto revString = std::rbegin(strings); revString != std::rend(strings); ++revString) {
@@ -605,66 +607,66 @@ FGDWriter FGDWriter::begin() {
 
 FGDWriter& FGDWriter::include(const std::string& fgdPath) {
 	this->writer
-		.write("@include \"", 10)
+		.write("@include \""sv, false)
 		.write(fgdPath, false)
-		.write("\"\n\n", 3);
+		.write("\"\n\n"sv, false);
 	return *this;
 }
 
 FGDWriter& FGDWriter::version(int version) {
 	this->writer
-		.write("@version(", 9)
+		.write("@version("sv, false)
 		.write(std::to_string(version), false)
-		.write(")\n\n", 3);
+		.write(")\n\n"sv, false);
 	return *this;
 }
 
 FGDWriter& FGDWriter::mapSize(math::Vec2i mapSize) {
 	this->writer
-	    .write("@mapsize(", 9)
+	    .write("@mapsize("sv, false)
 	    .write(std::to_string(mapSize[0]), false)
-	    .write(", ", 2)
+	    .write(", "sv, false)
 	    .write(std::to_string(mapSize[1]), false)
-	    .write(")\n\n", 3);
+	    .write(")\n\n"sv, false);
 	return *this;
 }
 
 FGDWriter& FGDWriter::materialExclusionDirs(const std::vector<std::string>& dirs) {
-	this->writer.write("@MaterialExclusion\n[\n", 21);
+	this->writer.write("@MaterialExclusion\n[\n"sv, false);
 	for (const auto& dir : dirs) {
 		this->writer << '\t' << '\"';
 		this->writer.write(dir, false);
 		this->writer << '\"' << '\n';
 	}
-	this->writer.write("]\n\n", 3);
+	this->writer.write("]\n\n"sv, false);
 	return *this;
 }
 
 FGDWriter::AutoVisGroupWriter FGDWriter::beginAutoVisGroup(const std::string& parentName) {
 	this->writer
-		.write("@AutoVisGroup = \"", 17)
+		.write("@AutoVisGroup = \""sv, false)
 		.write(parentName, false)
-		.write("\"\n[\n", 4);
+		.write("\"\n[\n"sv, false);
 	return AutoVisGroupWriter{*this};
 }
 
 FGDWriter::AutoVisGroupWriter& FGDWriter::AutoVisGroupWriter::visGroup(const std::string& name, const std::vector<std::string>& entities) {
 	this->parent.writer
-	    .write("\t\"", 2)
+	    .write("\t\""sv, false)
 	    .write(name, false)
-	    .write("\"\n\t[\n", 5);
+	    .write("\"\n\t[\n"sv, false);
 	for (const auto& entity : entities) {
 		this->parent.writer
-			.write("\t\t\"", 3)
+			.write("\t\t\""sv, false)
 			.write(entity, false)
-			.write("\"\n", 2);
+			.write("\"\n"sv, false);
 	}
-	this->parent.writer.write("\t]\n", 3);
+	this->parent.writer.write("\t]\n"sv, false);
 	return *this;
 }
 
 FGDWriter& FGDWriter::AutoVisGroupWriter::endAutoVisGroup() const {
-	this->parent.writer.write("]\n\n", 3);
+	this->parent.writer.write("]\n\n"sv, false);
 	return this->parent;
 }
 
@@ -684,20 +686,20 @@ FGDWriter::EntityWriter FGDWriter::beginEntity(const std::string& classType, con
 		}
 	}
 	this->writer
-		.write("= ", 2)
+		.write("= "sv, false)
 		.write(name, false)
-		.write(" :", 2);
+		.write(" :"sv, false);
 	// Put the description on the same line if it's short
 	if (description.size() < 32) {
 		this->writer
-			.write(" \"", 2)
+			.write(" \""sv, false)
 			.write(description, false);
 	} else {
 		this->writer
-			.write("\n\t\"", 3)
+			.write("\n\t\""sv, false)
 			.write(description, false);
 	}
-	this->writer.write("\"\n[\n", 4);
+	this->writer.write("\"\n[\n"sv, false);
 	return EntityWriter{*this};
 }
 
@@ -709,10 +711,10 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::keyValue(const std::string& na
 		.write(valueType, false)
 		.write(')');
 	if (readOnly) {
-		this->parent.writer.write(" readonly", 9);
+		this->parent.writer.write(" readonly"sv, false);
 	}
 	if (report) {
-		this->parent.writer.write(" report", 7);
+		this->parent.writer.write(" report"sv, false);
 	}
 	::writeOptionalKeyValueStrings(this->parent.writer, {displayName, valueDefault, description});
 	this->parent.writer << '\n';
@@ -723,30 +725,30 @@ FGDWriter::EntityWriter::KeyValueChoicesWriter FGDWriter::EntityWriter::beginKey
 	this->parent.writer
 	    .write('\t')
 	    .write(name, false)
-	    .write("(choices)", 9);
+	    .write("(choices)"sv, false);
 	if (readOnly) {
-		this->parent.writer.write(" readonly", 9);
+		this->parent.writer.write(" readonly"sv, false);
 	}
 	if (report) {
-		this->parent.writer.write(" report", 7);
+		this->parent.writer.write(" report"sv, false);
 	}
 	::writeOptionalKeyValueStrings(this->parent.writer, {displayName, valueDefault, description});
-	this->parent.writer.write(" =\n\t[\n", 6);
+	this->parent.writer.write(" =\n\t[\n"sv, false);
 	return KeyValueChoicesWriter{*this};
 }
 
 FGDWriter::EntityWriter::KeyValueChoicesWriter& FGDWriter::EntityWriter::KeyValueChoicesWriter::choice(const std::string& value, const std::string& displayName) {
 	this->parent.parent.writer
-		.write("\t\t\"", 3)
+		.write("\t\t\""sv, false)
 		.write(value, false)
-		.write("\" : \"", 5)
+		.write("\" : \""sv, false)
 		.write(displayName, false)
-		.write("\"\n", 2);
+		.write("\"\n"sv, false);
 	return *this;
 }
 
 FGDWriter::EntityWriter& FGDWriter::EntityWriter::KeyValueChoicesWriter::endKeyValueChoices() const {
-	this->parent.parent.writer.write("\t]\n", 3);
+	this->parent.parent.writer.write("\t]\n"sv, false);
 	return this->parent;
 }
 
@@ -754,29 +756,29 @@ FGDWriter::EntityWriter::KeyValueFlagsWriter FGDWriter::EntityWriter::beginKeyVa
 	this->parent.writer
 	    .write('\t')
 	    .write(name, false)
-	    .write("(flags)", 7);
+	    .write("(flags)"sv, false);
 	if (readOnly) {
-		this->parent.writer.write(" readonly", 9);
+		this->parent.writer.write(" readonly"sv, false);
 	}
 	if (report) {
-		this->parent.writer.write(" report", 7);
+		this->parent.writer.write(" report"sv, false);
 	}
 	::writeOptionalKeyValueStrings(this->parent.writer, {displayName, description});
-	this->parent.writer.write(" =\n\t[\n", 6);
+	this->parent.writer.write(" =\n\t[\n"sv, false);
 	return KeyValueFlagsWriter{*this};
 }
 
 FGDWriter::EntityWriter::KeyValueFlagsWriter& FGDWriter::EntityWriter::KeyValueFlagsWriter::flag(uint64_t value, const std::string& displayName, bool enabledByDefault, const std::string& description) {
 	this->parent.parent.writer
-		.write("\t\t", 2)
+		.write("\t\t"sv, false)
 	    .write(std::to_string(value), false)
-	    .write(" : \"", 4)
+	    .write(" : \""sv, false)
 	    .write(displayName, false)
-	    .write("\" : ", 4)
+	    .write("\" : "sv, false)
 	    .write(std::to_string(enabledByDefault), false);
 	if (!description.empty()) {
 		this->parent.parent.writer
-		    .write(" : \"", 4)
+		    .write(" : \""sv, false)
 		    .write(description, false)
 		    .write('\"');
 	}
@@ -785,21 +787,21 @@ FGDWriter::EntityWriter::KeyValueFlagsWriter& FGDWriter::EntityWriter::KeyValueF
 }
 
 FGDWriter::EntityWriter& FGDWriter::EntityWriter::KeyValueFlagsWriter::endKeyValueFlags() const {
-	this->parent.parent.writer.write("\t]\n", 3);
+	this->parent.parent.writer.write("\t]\n"sv, false);
 	return this->parent;
 }
 
 FGDWriter::EntityWriter& FGDWriter::EntityWriter::input(const std::string& name, const std::string& valueType, const std::string& description) {
 	this->parent.writer
 	    .write('\t')
-		.write("input ", 6)
+		.write("input "sv, false)
 	    .write(name, false)
 	    .write('(')
 	    .write(valueType, false)
 	    .write(')');
 	if (!description.empty()) {
 		this->parent.writer
-		    .write(" : \"", 4)
+		    .write(" : \""sv, false)
 		    .write(description, false)
 		    .write('\"');
 	}
@@ -810,14 +812,14 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::input(const std::string& name,
 FGDWriter::EntityWriter& FGDWriter::EntityWriter::output(const std::string& name, const std::string& valueType, const std::string& description) {
 	this->parent.writer
 	    .write('\t')
-	    .write("output ", 7)
+	    .write("output "sv, false)
 	    .write(name, false)
 	    .write('(')
 	    .write(valueType, false)
 	    .write(')');
 	if (!description.empty()) {
 		this->parent.writer
-		    .write(" : \"", 4)
+		    .write(" : \""sv, false)
 		    .write(description, false)
 		    .write('\"');
 	}
@@ -826,18 +828,18 @@ FGDWriter::EntityWriter& FGDWriter::EntityWriter::output(const std::string& name
 }
 
 FGDWriter& FGDWriter::EntityWriter::endEntity() const {
-	this->parent.writer.write("]\n\n", 3);
+	this->parent.writer.write("]\n\n"sv, false);
 	return this->parent;
 }
 
-std::string FGDWriter::bake() {
-	this->backingData.resize(this->writer.tell());
-	if (this->backingData.ends_with("\n\n")) {
-		this->backingData.pop_back();
+std::string FGDWriter::bake() const {
+	std::string_view out{this->backingData.data(), this->writer.tell()};
+	while (out.ends_with("\n\n")) {
+		out = out.substr(0, out.size() - 1);
 	}
-	return this->backingData;
+	return std::string{out};
 }
 
-bool FGDWriter::bake(const std::string& fgdPath) {
+bool FGDWriter::bake(const std::string& fgdPath) const {
 	return fs::writeFileText(fgdPath, this->bake());
 }
diff --git a/src/toolpp/_toolpp.cmake b/src/toolpp/_toolpp.cmake
index d8d9342c1..cf49ee17d 100644
--- a/src/toolpp/_toolpp.cmake
+++ b/src/toolpp/_toolpp.cmake
@@ -1,5 +1,5 @@
 add_pretty_parser(toolpp
-        DEPS sourcepp::kvpp
+        DEPS sourcepp_parser sourcepp::kvpp
         SOURCES
         "${CMAKE_CURRENT_SOURCE_DIR}/include/toolpp/CmdSeq.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/toolpp/FGD.h"
diff --git a/src/vpkpp/_vpkpp.cmake b/src/vpkpp/_vpkpp.cmake
index b1746cb12..7abac1e68 100644
--- a/src/vpkpp/_vpkpp.cmake
+++ b/src/vpkpp/_vpkpp.cmake
@@ -1,6 +1,6 @@
 add_pretty_parser(vpkpp
-        DEPS cryptopp::cryptopp MINIZIP::minizip sourcepp::bsppp sourcepp::kvpp
-        DEPS_INTERFACE tsl::hat_trie
+        DEPS libzstd_static MINIZIP::minizip sourcepp_crypto sourcepp_parser sourcepp::bsppp sourcepp::kvpp
+        DEPS_PUBLIC tsl::hat_trie
         PRECOMPILED_HEADERS
         "${CMAKE_CURRENT_SOURCE_DIR}/include/vpkpp/format/BSP.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/vpkpp/format/FPX.h"
@@ -31,9 +31,6 @@ add_pretty_parser(vpkpp
         "${CMAKE_CURRENT_LIST_DIR}/format/ZIP.cpp"
         "${CMAKE_CURRENT_LIST_DIR}/PackFile.cpp")
 
-target_include_directories(vpkpp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ext/hat-trie/include")
-
 if(SOURCEPP_VPKPP_SUPPORT_VPK_V54)
-    target_link_libraries(vpkpp PRIVATE libzstd_static)
     target_compile_definitions(vpkpp PRIVATE VPKPP_SUPPORT_VPK_V54)
 endif()
diff --git a/src/vpkpp/format/BSP.cpp b/src/vpkpp/format/BSP.cpp
index a9f02dc6a..a0c11b928 100644
--- a/src/vpkpp/format/BSP.cpp
+++ b/src/vpkpp/format/BSP.cpp
@@ -113,7 +113,7 @@ bool BSP::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 		if (!writer) {
 			return false;
 		}
-		writer.writeLump(BSPLump::PAKFILE, fs::readFileBuffer(this->tempZIPPath));
+		writer.writeLump(BSPLump::PAKFILE, fs::readFileBuffer(this->tempZIPPath), false);
 	}
 
 	// Rename and reopen the ZIP
diff --git a/src/vpkpp/format/PCK.cpp b/src/vpkpp/format/PCK.cpp
index e3b7e21d5..0c74a5b83 100644
--- a/src/vpkpp/format/PCK.cpp
+++ b/src/vpkpp/format/PCK.cpp
@@ -34,7 +34,7 @@ std::unique_ptr<PackFile> PCK::create(const std::string& path, uint32_t version,
 
 		if (version > 1) {
 			stream
-				.write(FlagsV2::FLAG_NONE)
+				.write(FLAG_DIR_NONE)
 				.write<uint64_t>(0);
 		}
 
@@ -59,7 +59,7 @@ std::unique_ptr<PackFile> PCK::open(const std::string& path, const EntryCallback
 
 	if (auto signature = reader.read<uint32_t>(); signature != PCK_SIGNATURE) {
 		// PCK might be embedded
-		reader.seek_in(-static_cast<int64_t>(sizeof(uint32_t)), std::ios::end);
+		reader.seek_in(sizeof(uint32_t), std::ios::end);
 		if (auto endSignature = reader.read<uint32_t>(); endSignature != PCK_SIGNATURE) {
 			return nullptr;
 		}
@@ -80,20 +80,20 @@ std::unique_ptr<PackFile> PCK::open(const std::string& path, const EntryCallback
 	reader.read(pck->header.godotVersionMinor);
 	reader.read(pck->header.godotVersionPatch);
 
-	pck->header.flags = FLAG_NONE;
+	pck->header.flags = FLAG_DIR_NONE;
 	std::size_t extraEntryContentsOffset = 0;
 	if (pck->header.packVersion > 1) {
-		pck->header.flags = reader.read<FlagsV2>();
+		pck->header.flags = reader.read<FlagsDirV2>();
 		extraEntryContentsOffset = reader.read<uint64_t>();
 	}
 
-	if (pck->header.flags & FLAG_ENCRYPTED) {
+	if (pck->header.flags & FLAG_DIR_ENCRYPTED) {
 		// File directory is encrypted
 		return nullptr;
 	}
-	if (pck->header.flags & FLAG_RELATIVE_FILE_DATA) {
+	if (pck->header.flags & FLAG_DIR_RELATIVE_FILE_DATA) {
 		extraEntryContentsOffset += pck->startOffset;
-		pck->header.flags = static_cast<FlagsV2>(pck->header.flags & ~FLAG_RELATIVE_FILE_DATA);
+		pck->header.flags = static_cast<FlagsDirV2>(pck->header.flags & ~FLAG_DIR_RELATIVE_FILE_DATA);
 	}
 
 	// Reserved
@@ -115,6 +115,9 @@ std::unique_ptr<PackFile> PCK::open(const std::string& path, const EntryCallback
 
 		if (pck->header.packVersion > 1) {
 			entry.flags = reader.read<uint32_t>();
+			if (entry.flags & FLAG_FILE_REMOVED) {
+				continue;
+			}
 		}
 
 		pck->entries.emplace(entryPath, entry);
@@ -141,7 +144,7 @@ std::optional<std::vector<std::byte>> PCK::readEntry(const std::string& path_) c
 	}
 
 	// It's baked into the file on disk
-	if (entry->flags & FLAG_ENCRYPTED) {
+	if (entry->flags & FLAG_FILE_ENCRYPTED) {
 		// File is encrypted
 		return std::nullopt;
 	}
@@ -182,7 +185,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 			entry->offset = fileData.size();
 
 			fileData.insert(fileData.end(), binData->begin(), binData->end());
-			const auto padding = math::getPaddingForAlignment(PCK_FILE_DATA_PADDING, static_cast<int>(entry->length));
+			const auto padding = math::paddingForAlignment(PCK_FILE_DATA_PADDING, static_cast<int>(entry->length));
 			for (int i = 0; i < padding; i++) {
 				fileData.push_back(static_cast<std::byte>(0));
 			}
@@ -236,7 +239,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 		this->dataOffset = stream.tell_out();
 		for (const auto& path : std::views::keys(entriesToBake)) {
 			const auto entryPath = std::string{PCK_PATH_PREFIX} + path;
-			const auto padding = math::getPaddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast<int>(entryPath.length()));
+			const auto padding = math::paddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast<int>(entryPath.length()));
 			this->dataOffset +=
 					sizeof(uint32_t) +             // Path length
 					entryPath.length() + padding + // Path
@@ -251,7 +254,7 @@ bool PCK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 		// Directory
 		for (const auto& [path, entry] : entriesToBake) {
 			const auto entryPath = std::string{PCK_PATH_PREFIX} + path;
-			const auto padding = math::getPaddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast<int>(entryPath.length()));
+			const auto padding = math::paddingForAlignment(PCK_DIRECTORY_STRING_PADDING, static_cast<int>(entryPath.length()));
 			stream.write(static_cast<uint32_t>(entryPath.length() + padding));
 			stream.write(entryPath, false, entryPath.length() + padding);
 
@@ -297,7 +300,7 @@ PCK::operator std::string() const {
 	if (this->startOffset > 0) {
 		out += " | Embedded";
 	}
-	if (this->header.flags & FLAG_ENCRYPTED) {
+	if (this->header.flags & FLAG_DIR_ENCRYPTED) {
 		out += " | Encrypted";
 	}
 	return out;
diff --git a/src/vpkpp/format/VPK.cpp b/src/vpkpp/format/VPK.cpp
index 2034a7cc9..60865f6af 100644
--- a/src/vpkpp/format/VPK.cpp
+++ b/src/vpkpp/format/VPK.cpp
@@ -55,20 +55,21 @@ VPK::VPK(const std::string& fullFilePath_)
 }
 
 std::unique_ptr<PackFile> VPK::create(const std::string& path, uint32_t version) {
-	if (version != 1 && version != 2 && version != 54) {
+	if (version != 0 && version != 1 && version != 2 && version != 54) {
 		return nullptr;
 	}
 
 	{
 		FileStream stream{path, FileStream::OPT_TRUNCATE | FileStream::OPT_CREATE_IF_NONEXISTENT};
 
-		Header1 header1{};
-		header1.signature = VPK_SIGNATURE;
-		header1.version = version;
-		header1.treeSize = 1;
-		stream.write(header1);
-
-		if (version == 2 || version == 54) {
+		if (version > 0) {
+			Header1 header1{};
+			header1.signature = VPK_SIGNATURE;
+			header1.version = version;
+			header1.treeSize = 1;
+			stream.write(header1);
+		}
+		if (version > 1) {
 			Header2 header2{};
 			header2.fileDataSectionSize = 0;
 			header2.archiveMD5SectionSize = 0;
@@ -106,12 +107,28 @@ std::unique_ptr<PackFile> VPK::openInternal(const std::string& path, const Entry
 	reader.seek_in(0);
 	reader.read(vpk->header1);
 	if (vpk->header1.signature != VPK_SIGNATURE) {
-		// File is not a VPK
-		return nullptr;
+		reader.seek_in(3, std::ios::end);
+		if (reader.read<char>() == '\0' && reader.read<char>() == '\0' && reader.read<char>() == '\0') {
+			// hack: if file is 9 bytes long it's probably an empty VTMB VPK and we should bail so that code can pick it up
+			// either way a 9 byte long VPK should not have any files in it
+			if (std::filesystem::file_size(vpk->fullFilePath) == 9) {
+				return nullptr;
+			}
+
+			// File is one of those shitty ancient VPKs
+			vpk->header1.signature = VPK_SIGNATURE;
+			vpk->header1.version = 0;
+			vpk->header1.treeSize = 0;
+
+			reader.seek_in(0);
+		} else {
+			// File is not a VPK
+			return nullptr;
+		}
 	}
 	if (vpk->hasExtendedHeader()) {
 		reader.read(vpk->header2);
-	} else if (vpk->header1.version != 1) {
+	} else if (vpk->header1.version != 0 && vpk->header1.version != 1) {
 		// Apex Legends, Titanfall, etc. are not supported
 		return nullptr;
 	}
@@ -312,8 +329,8 @@ std::optional<std::vector<std::byte>> VPK::readEntry(const std::string& path_) c
 	}
 
 	const auto entryLength = (this->hasCompression() && entry->compressedLength) ? entry->compressedLength : entry->length;
-	if (!entryLength) {
-		return {};
+	if (entryLength == 0) {
+		return std::vector<std::byte>{};
 	}
 	std::vector out(entryLength, static_cast<std::byte>(0));
 
@@ -559,9 +576,11 @@ bool VPK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 	outDir.seek_out(0);
 
 	// Dummy header
-	outDir.write(this->header1);
-	if (this->hasExtendedHeader()) {
-		outDir.write(this->header2);
+	if (this->header1.version > 0) {
+		outDir.write(this->header1);
+		if (this->hasExtendedHeader()) {
+			outDir.write(this->header2);
+		}
 	}
 
 	// File tree data
@@ -730,6 +749,12 @@ bool VPK::bake(const std::string& outputDir_, BakeOptions options, const EntryCa
 		this->footer2.signature.clear();
 	}
 
+	// Ancient crap VPK with no header
+	if (this->header1.version == 0) {
+		PackFile::setFullFilePath(outputDir);
+		return true;
+	}
+
 	// Write new headers
 	outDir.seek_out(0);
 	outDir.write(this->header1);
@@ -861,10 +886,8 @@ uint32_t VPK::getVersion() const {
 }
 
 void VPK::setVersion(uint32_t version) {
-	if (version != 1 && version != 2 && version != 54) {
-		return;
-	}
-	if (::isFPX(this) || version == this->header1.version) {
+	// Version must be supported, we cannot be an FPX, and version must be different
+	if ((version != 0 && version != 1 && version != 2 && version != 54) || ::isFPX(this) || version == this->header1.version) {
 		return;
 	}
 	this->header1.version = version;
diff --git a/src/vpkpp/format/VPK_VTMB.cpp b/src/vpkpp/format/VPK_VTMB.cpp
index 28e570123..fac822986 100644
--- a/src/vpkpp/format/VPK_VTMB.cpp
+++ b/src/vpkpp/format/VPK_VTMB.cpp
@@ -59,7 +59,7 @@ std::unique_ptr<PackFile> VPK_VTMB::open(const std::string& path, const EntryCal
 
 void VPK_VTMB::openNumbered(uint32_t archiveIndex, const std::string& path, const EntryCallback& callback) {
 	FileStream reader{path};
-	reader.seek_in(-static_cast<int64_t>(sizeof(uint32_t) * 2 + sizeof(uint8_t)), std::ios::end);
+	reader.seek_in(sizeof(uint32_t) * 2 + sizeof(uint8_t), std::ios::end);
 
 	auto fileCount = reader.read<uint32_t>();
 	auto dirOffset = reader.read<uint32_t>();
diff --git a/src/vtfpp/ImageConversion.cpp b/src/vtfpp/ImageConversion.cpp
index 30b7922a3..b12908f74 100644
--- a/src/vtfpp/ImageConversion.cpp
+++ b/src/vtfpp/ImageConversion.cpp
@@ -2,17 +2,21 @@
 
 #include <algorithm>
 #include <bit>
+#include <cstdlib>
 #include <cstring>
 #include <memory>
+#include <ranges>
 #include <span>
+#include <string_view>
+#include <unordered_map>
 
 #ifdef SOURCEPP_BUILD_WITH_TBB
 #include <execution>
 #endif
 
 #include <Compressonator.h>
-#include <sourcepp/math/Float.h>
 #include <sourcepp/Macros.h>
+#include <sourcepp/Math.h>
 
 #define STB_IMAGE_IMPLEMENTATION
 #define STB_IMAGE_STATIC
@@ -29,6 +33,14 @@
 #define STBI_WRITE_NO_STDIO
 #include <stb_image_write.h>
 
+#define TINYEXR_IMPLEMENTATION 1
+#ifdef SOURCEPP_BUILD_WITH_THREADS
+#define TINYEXR_USE_THREAD 1
+#else
+#define TINYEXR_USE_THREAD 0
+#endif
+#include <tinyexr.h>
+
 using namespace sourcepp;
 using namespace vtfpp;
 
@@ -284,7 +296,7 @@ namespace {
 			return {(r), (g), (b), (a)}; \
 		})
 #ifdef SOURCEPP_BUILD_WITH_TBB
-	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq)
+	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq)
 #else
 	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a)
 #endif
@@ -345,7 +357,7 @@ namespace {
 #ifdef SOURCEPP_BUILD_WITH_TBB
 	#define VTFPP_CONVERT(InputType, ...) \
 		std::span<ImagePixel::InputType> newDataSpan{reinterpret_cast<ImagePixel::InputType*>(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \
-		std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::InputType { \
+		std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::InputType { \
 			return __VA_ARGS__; \
 		})
 #else
@@ -414,7 +426,7 @@ namespace {
 			return { static_cast<uint16_t>(r), static_cast<uint16_t>(g), static_cast<uint16_t>(b), static_cast<uint16_t>(a) }; \
 		})
 #ifdef SOURCEPP_BUILD_WITH_TBB
-	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq)
+	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq)
 #else
 	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a)
 #endif
@@ -484,7 +496,7 @@ namespace {
 #ifdef SOURCEPP_BUILD_WITH_TBB
 	#define VTFPP_CONVERT(InputType, ...) \
 		std::span<ImagePixel::InputType> newDataSpan{reinterpret_cast<ImagePixel::InputType*>(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \
-		std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::InputType { \
+		std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::InputType { \
 			return __VA_ARGS__; \
 		})
 #else
@@ -528,7 +540,7 @@ namespace {
 		std::span<const ImagePixel::InputType> imageDataSpan{reinterpret_cast<const ImagePixel::InputType*>(imageData.data()), imageData.size() / sizeof(ImagePixel::InputType)}; \
 		std::transform(__VA_ARGS__ __VA_OPT__(,) imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::InputType pixel) -> ImagePixel::RGBA32323232F { return {(r), (g), (b), (a)}; })
 #ifdef SOURCEPP_BUILD_WITH_TBB
-	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::unseq)
+	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a, std::execution::par_unseq)
 #else
 	#define VTFPP_CONVERT(InputType, r, g, b, a) VTFPP_CONVERT_DETAIL(InputType, r, g, b, a)
 #endif
@@ -537,12 +549,12 @@ namespace {
 
 	switch (format) {
 		using enum ImageFormat;
-		VTFPP_CASE_CONVERT_AND_BREAK(R32F,          pixel.r,             0.f,                 0.f,                 1.f);
-		VTFPP_CASE_CONVERT_AND_BREAK(RG3232F,       pixel.r,             pixel.g,             0.f,                 1.f);
-		VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F,    pixel.r,             pixel.g,             pixel.b,             1.f);
-		VTFPP_CASE_CONVERT_AND_BREAK(R16F,          pixel.r.toFloat32(), 0.f,                 0.f,                 1.f);
-		VTFPP_CASE_CONVERT_AND_BREAK(RG1616F,       pixel.r.toFloat32(), pixel.g.toFloat32(), 0.f,                 1.f);
-		VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, pixel.r.toFloat32(), pixel.g.toFloat32(), pixel.b.toFloat32(), pixel.a.toFloat32());
+		VTFPP_CASE_CONVERT_AND_BREAK(R32F,          pixel.r, 0.f,     0.f,     1.f);
+		VTFPP_CASE_CONVERT_AND_BREAK(RG3232F,       pixel.r, pixel.g, 0.f,     1.f);
+		VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F,    pixel.r, pixel.g, pixel.b, 1.f);
+		VTFPP_CASE_CONVERT_AND_BREAK(R16F,          pixel.r, 0.f,     0.f,     1.f);
+		VTFPP_CASE_CONVERT_AND_BREAK(RG1616F,       pixel.r, pixel.g, 0.f,     1.f);
+		VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, pixel.r, pixel.g, pixel.b, pixel.a);
 		default: SOURCEPP_DEBUG_BREAK; break;
 	}
 
@@ -571,7 +583,7 @@ namespace {
 #ifdef SOURCEPP_BUILD_WITH_TBB
 	#define VTFPP_CONVERT(InputType, ...) \
 		std::span<ImagePixel::InputType> newDataSpan{reinterpret_cast<ImagePixel::InputType*>(newData.data()), newData.size() / sizeof(ImagePixel::InputType)}; \
-		std::transform(std::execution::unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::InputType { \
+		std::transform(std::execution::par_unseq, imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::InputType { \
 			return __VA_ARGS__; \
 		})
 #else
@@ -589,9 +601,9 @@ namespace {
 		VTFPP_CASE_CONVERT_AND_BREAK(R32F,          {pixel.r});
 		VTFPP_CASE_CONVERT_AND_BREAK(RG3232F,       {pixel.r, pixel.g});
 		VTFPP_CASE_CONVERT_AND_BREAK(RGB323232F,    {pixel.r, pixel.g, pixel.b});
-		VTFPP_CASE_CONVERT_AND_BREAK(R16F,          {pixel.r});
-		VTFPP_CASE_CONVERT_AND_BREAK(RG1616F,       {pixel.r, pixel.g});
-		VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, {pixel.r, pixel.g, pixel.b, pixel.a});
+		VTFPP_CASE_CONVERT_AND_BREAK(R16F,          {half{pixel.r}});
+		VTFPP_CASE_CONVERT_AND_BREAK(RG1616F,       {half{pixel.r}, half{pixel.g}});
+		VTFPP_CASE_CONVERT_AND_BREAK(RGBA16161616F, {half{pixel.r}, half{pixel.g}, half{pixel.b}, half{pixel.a}});
 		default: SOURCEPP_DEBUG_BREAK; break;
 	}
 
@@ -610,10 +622,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA32323232F*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA32323232F)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA8888*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA8888*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::RGBA32323232F {
 		return {
@@ -636,10 +648,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA8888*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA8888)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA32323232F*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA32323232F*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::RGBA8888 {
 		return {
@@ -662,10 +674,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA16161616*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA16161616)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA8888*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA8888*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA8888)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA8888 pixel) -> ImagePixel::RGBA16161616 {
 		return {
@@ -688,10 +700,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA8888*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA8888)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA16161616*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA16161616*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::RGBA8888 {
 		return {
@@ -714,10 +726,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA16161616*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA16161616)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA32323232F*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA32323232F*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA32323232F)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA32323232F pixel) -> ImagePixel::RGBA16161616 {
 		return {
@@ -740,10 +752,10 @@ namespace {
 	newData.resize(imageData.size() / (ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / 8) * (ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / 8));
 	std::span newDataSpan{reinterpret_cast<ImagePixel::RGBA32323232F*>(newData.data()), newData.size() / sizeof(ImagePixel::RGBA32323232F)};
 
-	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA16161616*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)}; \
+	std::span imageDataSpan{reinterpret_cast<const ImagePixel::RGBA16161616*>(imageData.data()), imageData.size() / sizeof(ImagePixel::RGBA16161616)};
 	std::transform(
 #ifdef SOURCEPP_BUILD_WITH_TBB
-			std::execution::unseq,
+			std::execution::par_unseq,
 #endif
 			imageDataSpan.begin(), imageDataSpan.end(), newDataSpan.begin(), [](ImagePixel::RGBA16161616 pixel) -> ImagePixel::RGBA32323232F {
 		return {
@@ -773,14 +785,13 @@ std::vector<std::byte> ImageConversion::convertImageDataToFormat(std::span<const
 	const ImageFormat intermediaryOldFormat = ImageFormatDetails::containerFormat(oldFormat);
 	if (ImageFormatDetails::compressed(oldFormat)) {
 		newData = ::convertImageDataUsingCompressonator(imageData, oldFormat, intermediaryOldFormat, width, height);
-	} else if (intermediaryOldFormat == ImageFormat::RGBA8888) {
-		newData = ::convertImageDataToRGBA8888(imageData, oldFormat);
-	} else if (intermediaryOldFormat == ImageFormat::RGBA16161616) {
-		newData = ::convertImageDataToRGBA16161616(imageData, oldFormat);
-	} else if (intermediaryOldFormat == ImageFormat::RGBA32323232F) {
-		newData = ::convertImageDataToRGBA32323232F(imageData, oldFormat);
 	} else {
-		return {};
+		switch (intermediaryOldFormat) {
+			case ImageFormat::RGBA8888:      newData = ::convertImageDataToRGBA8888(imageData, oldFormat);      break;
+			case ImageFormat::RGBA16161616:  newData = ::convertImageDataToRGBA16161616(imageData, oldFormat);  break;
+			case ImageFormat::RGBA32323232F: newData = ::convertImageDataToRGBA32323232F(imageData, oldFormat); break;
+			default:                         return {};
+		}
 	}
 
 	if (intermediaryOldFormat == newFormat) {
@@ -824,14 +835,13 @@ std::vector<std::byte> ImageConversion::convertImageDataToFormat(std::span<const
 
 	if (ImageFormatDetails::compressed(newFormat)) {
 		newData = ::convertImageDataUsingCompressonator(newData, intermediaryNewFormat, newFormat, width, height);
-	} else if (intermediaryNewFormat == ImageFormat::RGBA8888) {
-		newData = ::convertImageDataFromRGBA8888(newData, newFormat);
-	} else if (intermediaryNewFormat == ImageFormat::RGBA16161616) {
-		newData = ::convertImageDataFromRGBA16161616(newData, newFormat);
-	} else if (intermediaryNewFormat == ImageFormat::RGBA32323232F) {
-		newData = ::convertImageDataFromRGBA32323232F(newData, newFormat);
 	} else {
-		return {};
+		switch (intermediaryNewFormat) {
+			case ImageFormat::RGBA8888:      newData = ::convertImageDataFromRGBA8888(newData, newFormat);      break;
+			case ImageFormat::RGBA16161616:  newData = ::convertImageDataFromRGBA16161616(newData, newFormat);  break;
+			case ImageFormat::RGBA32323232F: newData = ::convertImageDataFromRGBA32323232F(newData, newFormat); break;
+			default:                         return {};
+		}
 	}
 
 	return newData;
@@ -864,6 +874,11 @@ std::vector<std::byte> ImageConversion::convertSeveralImageDataToFormat(std::spa
 	return out;
 }
 
+ImageConversion::FileFormat ImageConversion::getDefaultFileFormatForImageFormat(ImageFormat format) {
+	using enum FileFormat;
+	return ImageFormatDetails::decimal(format) ? EXR : PNG;
+}
+
 std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const std::byte> imageData, ImageFormat format, uint16_t width, uint16_t height, FileFormat fileFormat) {
 	if (imageData.empty() || format == ImageFormat::EMPTY) {
 		return {};
@@ -874,11 +889,7 @@ std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const s
 	};
 
 	if (fileFormat == FileFormat::DEFAULT) {
-		if (ImageFormatDetails::decimal(format)) {
-			fileFormat = FileFormat::HDR;
-		} else {
-			fileFormat = FileFormat::PNG;
-		}
+		fileFormat = getDefaultFileFormatForImageFormat(format);
 	}
 	switch (fileFormat) {
 		case FileFormat::PNG: {
@@ -886,9 +897,9 @@ std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const s
 				stbi_write_png_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, imageData.data(), 0);
 			} else if (format == ImageFormat::RGBA8888) {
 				stbi_write_png_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, imageData.data(), 0);
-			} else if (ImageFormatDetails::large(format)) {
-				const auto rgba = convertImageDataToFormat(imageData, format, ImageFormat::RGBA16161616, width, height);
-				stbi_write_png_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / (8 * sizeof(uint16_t)), rgba.data(), 0);
+			} else if (ImageFormatDetails::opaque(format)) {
+				const auto rgb = convertImageDataToFormat(imageData, format, ImageFormat::RGB888, width, height);
+				stbi_write_png_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, rgb.data(), 0);
 			} else {
 				const auto rgba = convertImageDataToFormat(imageData, format, ImageFormat::RGBA8888, width, height);
 				stbi_write_png_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, rgba.data(), 0);
@@ -909,6 +920,9 @@ std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const s
 				stbi_write_bmp_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, imageData.data());
 			} else if (format == ImageFormat::RGBA8888) {
 				stbi_write_bmp_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, imageData.data());
+			} else if (ImageFormatDetails::opaque(format)) {
+				const auto rgb = convertImageDataToFormat(imageData, format, ImageFormat::RGB888, width, height);
+				stbi_write_bmp_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, rgb.data());
 			} else {
 				const auto rgba = convertImageDataToFormat(imageData, format, ImageFormat::RGBA8888, width, height);
 				stbi_write_bmp_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, rgba.data());
@@ -920,9 +934,9 @@ std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const s
 				stbi_write_tga_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, imageData.data());
 			} else if (format == ImageFormat::RGBA8888) {
 				stbi_write_tga_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, imageData.data());
-			} else if (ImageFormatDetails::large(format)) {
-				const auto rgba = convertImageDataToFormat(imageData, format, ImageFormat::RGBA16161616, width, height);
-				stbi_write_tga_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA16161616) / (8 * sizeof(uint16_t)), rgba.data());
+			} else if (ImageFormatDetails::opaque(format)) {
+				const auto rgb = convertImageDataToFormat(imageData, format, ImageFormat::RGB888, width, height);
+				stbi_write_tga_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB888) / 8, rgb.data());
 			} else {
 				const auto rgba = convertImageDataToFormat(imageData, format, ImageFormat::RGBA8888, width, height);
 				stbi_write_tga_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA8888) / 8, rgba.data());
@@ -930,12 +944,137 @@ std::vector<std::byte> ImageConversion::convertImageDataToFile(std::span<const s
 			break;
 		}
 		case FileFormat::HDR: {
-			if (format == ImageFormat::RGBA32323232F) {
-				stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / (8 * sizeof(float)), reinterpret_cast<const float*>(imageData.data()));
+			if (format == ImageFormat::RGB323232F) {
+				stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB323232F) / (8 * sizeof(float)), reinterpret_cast<const float*>(imageData.data()));
 			} else {
-				auto hdr = convertImageDataToFormat(imageData, format, ImageFormat::RGBA32323232F, width, height);
-				stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGBA32323232F) / (8 * sizeof(float)), reinterpret_cast<float*>(hdr.data()));
+				const auto hdr = convertImageDataToFormat(imageData, format, ImageFormat::RGB323232F, width, height);
+				stbi_write_hdr_to_func(stbWriteFunc, &out, width, height, ImageFormatDetails::bpp(ImageFormat::RGB323232F) / (8 * sizeof(float)), reinterpret_cast<const float*>(hdr.data()));
+			}
+			break;
+		}
+		case FileFormat::EXR: {
+			EXRHeader header;
+			InitEXRHeader(&header);
+
+			std::vector<std::byte> rawData;
+			if (!ImageFormatDetails::decimal(format) || ImageFormatDetails::compressed(format)) {
+				if (ImageFormatDetails::transparent(format)) {
+					rawData = convertImageDataToFormat(imageData, format, ImageFormat::RGBA32323232F, width, height);
+					format = ImageFormat::RGBA32323232F;
+				} else {
+					rawData = convertImageDataToFormat(imageData, format, ImageFormat::RGB323232F, width, height);
+					format = ImageFormat::RGB323232F;
+				}
+			} else {
+				rawData = {imageData.begin(), imageData.end()};
+			}
+
+			header.num_channels = (ImageFormatDetails::red(format) > 0) + (ImageFormatDetails::green(format) > 0) + (ImageFormatDetails::blue(format) > 0) + (ImageFormatDetails::alpha(format) > 0);
+			header.channels = static_cast<EXRChannelInfo*>(std::malloc(header.num_channels * sizeof(EXRChannelInfo)));
+			header.pixel_types = static_cast<int*>(malloc(header.num_channels * sizeof(int)));
+			header.requested_pixel_types = static_cast<int*>(malloc(header.num_channels * sizeof(int)));
+
+			switch (header.num_channels) {
+				case 4:
+					header.channels[0].name[0] = 'A';
+					header.channels[1].name[0] = 'B';
+					header.channels[2].name[0] = 'G';
+					header.channels[3].name[0] = 'R';
+					break;
+				case 3:
+					header.channels[0].name[0] = 'B';
+					header.channels[1].name[0] = 'G';
+					header.channels[2].name[0] = 'R';
+					break;
+				case 2:
+					header.channels[0].name[0] = 'G';
+					header.channels[1].name[0] = 'R';
+					break;
+				case 1:
+					header.channels[0].name[0] = 'R';
+					break;
+				default:
+					FreeEXRHeader(&header);
+					return {};
+			}
+			for (int i = 0; i < header.num_channels; i++) {
+				header.channels[i].name[1] = '\0';
+			}
+
+			int pixelType = (ImageFormatDetails::red(format) / 8) == sizeof(half) ? TINYEXR_PIXELTYPE_HALF : TINYEXR_PIXELTYPE_FLOAT;
+			for (int i = 0; i < header.num_channels; i++) {
+				header.pixel_types[i] = pixelType;
+				header.requested_pixel_types[i] = pixelType;
+			}
+
+			std::vector<std::vector<std::byte>> images(header.num_channels);
+			std::vector<void*> imagePtrs(header.num_channels);
+			switch (header.num_channels) {
+				case 4:
+					if (pixelType == TINYEXR_PIXELTYPE_HALF) {
+						images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::a);
+						images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::b);
+						images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::g);
+						images[3] = extractChannelFromImageData(imageData, &ImagePixel::RGBA16161616F::r);
+					} else {
+						images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::a);
+						images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::b);
+						images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::g);
+						images[3] = extractChannelFromImageData(imageData, &ImagePixel::RGBA32323232F::r);
+					}
+					break;
+				case 3:
+					if (pixelType == TINYEXR_PIXELTYPE_HALF) {
+						// We should not be here!
+						FreeEXRHeader(&header);
+						return {};
+					}
+					images[0] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::b);
+					images[1] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::g);
+					images[2] = extractChannelFromImageData(imageData, &ImagePixel::RGB323232F::r);
+					break;
+				case 2:
+					if (pixelType == TINYEXR_PIXELTYPE_HALF) {
+						images[0] = extractChannelFromImageData(imageData, &ImagePixel::RG1616F::g);
+						images[1] = extractChannelFromImageData(imageData, &ImagePixel::RG1616F::r);
+					} else {
+						images[0] = extractChannelFromImageData(imageData, &ImagePixel::RG3232F::g);
+						images[1] = extractChannelFromImageData(imageData, &ImagePixel::RG3232F::r);
+					}
+					break;
+				case 1:
+					images[0] = rawData;
+					break;
+				default:
+					FreeEXRHeader(&header);
+					return {};
+			}
+			for (int i = 0; i < header.num_channels; i++) {
+				imagePtrs[i] = images[i].data();
 			}
+
+			EXRImage image;
+			InitEXRImage(&image);
+			image.width = width;
+			image.height = height;
+			image.images = reinterpret_cast<unsigned char**>(imagePtrs.data());
+			image.num_channels = header.num_channels;
+
+			unsigned char* data = nullptr;
+			const char* err = nullptr;
+
+			size_t size = SaveEXRImageToMemory(&image, &header, &data, &err);
+			if (err) {
+				FreeEXRErrorMessage(err);
+				FreeEXRHeader(&header);
+				return {};
+			}
+			if (data) {
+				out = {reinterpret_cast<std::byte*>(data), reinterpret_cast<std::byte*>(data) + size};
+				std::free(data);
+			}
+
+			FreeEXRHeader(&header);
 			break;
 		}
 		case FileFormat::DEFAULT:
@@ -953,7 +1092,167 @@ std::vector<std::byte> ImageConversion::convertFileToImageData(std::span<const s
 	int channels = 0;
 	frameCount = 1;
 
-	// Floating point single frame image
+	// EXR
+	if (EXRVersion version; ParseEXRVersionFromMemory(&version, reinterpret_cast<const unsigned char*>(fileData.data()), fileData.size()) == TINYEXR_SUCCESS) {
+		if (version.multipart || version.non_image) {
+			return {};
+		}
+
+		EXRHeader header;
+		InitEXRHeader(&header);
+		const char* err = nullptr;
+		if (ParseEXRHeaderFromMemory(&header, &version, reinterpret_cast<const unsigned char*>(fileData.data()), fileData.size(), &err) != TINYEXR_SUCCESS) {
+			FreeEXRErrorMessage(err);
+			return {};
+		}
+
+		// Sanity check
+		if (header.num_channels < 1) {
+			FreeEXRHeader(&header);
+			return {};
+		}
+
+		// Define the channel names we support (RGBA, greyscale)
+		std::unordered_map<std::string_view, int> channelIndices{{"R", -1}, {"G",  -1}, {"B",  -1}, {"A", -1}, {"Y", -1}};
+
+		// Get channel type (EXR supports different types per channel, we do not)
+		// Rather than bailing we ask EXR to convert the lowest precision data
+		auto channelType = header.pixel_types[0];
+		for (int i = 1; i < header.num_channels; i++) {
+			// UINT -> HALF -> FLOAT
+			if (header.pixel_types[i] > channelType && channelIndices.contains(header.channels[i].name)) {
+				channelType = header.pixel_types[i];
+			}
+		}
+		// requested_pixel_types field only supports floats
+		if (channelType == TINYEXR_PIXELTYPE_UINT) {
+			channelType = TINYEXR_PIXELTYPE_HALF;
+		}
+
+		// Determine proper format to use
+		for (int i = 0; i < header.num_channels; i++) {
+			if (channelIndices.contains(header.channels[i].name)) {
+				channelIndices[header.channels[i].name] = i;
+			}
+		}
+		if (channelIndices["Y"] >= 0) {
+			if (channelIndices["A"] >= 0) {
+				format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RGBA16161616F : ImageFormat::RGBA32323232F;
+			} else {
+				if (channelType == TINYEXR_PIXELTYPE_HALF) {
+					// VTF has no RGB161616F
+					channelType = TINYEXR_PIXELTYPE_FLOAT;
+				}
+				format = ImageFormat::RGB323232F;
+			}
+			channelIndices["R"] = channelIndices["Y"];
+			channelIndices["G"] = channelIndices["Y"];
+			channelIndices["B"] = channelIndices["Y"];
+		} else if (channelIndices["A"] >= 0) {
+			format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RGBA16161616F : ImageFormat::RGBA32323232F;
+		} else if (channelIndices["B"] >= 0) {
+			if (channelType == TINYEXR_PIXELTYPE_HALF) {
+				// VTF has no RGB161616F
+				channelType = TINYEXR_PIXELTYPE_FLOAT;
+			}
+			format = ImageFormat::RGB323232F;
+		} else if (channelIndices["G"] >= 0) {
+			format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::RG1616F : ImageFormat::RG3232F;
+		} else if (channelIndices["R"] >= 0) {
+			format = channelType == TINYEXR_PIXELTYPE_HALF ? ImageFormat::R16F : ImageFormat::R32F;
+		} else {
+			FreeEXRHeader(&header);
+			return {};
+		}
+
+		// Now that channelType has stopped changing, we can set it properly
+		for (int i = 0; i < header.num_channels; i++) {
+			if (header.pixel_types[i] != channelType && channelIndices.contains(header.channels[i].name)) {
+				header.requested_pixel_types[i] = channelType;
+			}
+		}
+
+		EXRImage image;
+		InitEXRImage(&image);
+		if (LoadEXRImageFromMemory(&image, &header, reinterpret_cast<const unsigned char*>(fileData.data()), fileData.size(), &err) != TINYEXR_SUCCESS) {
+			FreeEXRErrorMessage(err);
+			FreeEXRHeader(&header);
+			return {};
+		}
+
+		width = image.width;
+		height = image.height;
+
+		// Merge channel data into a single buffer
+		std::vector<std::byte> combinedChannels(width * height * (ImageFormatDetails::bpp(format) / 8));
+		const auto populateBuffer = [
+			hasRed=ImageFormatDetails::red(format) > 0,
+			hasGreen=ImageFormatDetails::green(format) > 0,
+			hasBlue=ImageFormatDetails::blue(format) > 0,
+			hasAlpha=ImageFormatDetails::alpha(format) > 0,
+			width,
+			height,
+			&header,
+			r=channelIndices["R"],
+			g=channelIndices["G"],
+			b=channelIndices["B"],
+			a=channelIndices["A"],
+			&image,
+			&combinedChannels
+		]<typename C> {
+			const auto channelCount = hasRed + hasGreen + hasBlue + hasAlpha;
+			std::span out{reinterpret_cast<C*>(combinedChannels.data()), combinedChannels.size() / sizeof(C)};
+			if (header.tiled) {
+				for (int t = 0; t < image.num_tiles; t++) {
+					auto** src = reinterpret_cast<C**>(image.tiles[t].images);
+					for (int j = 0; j < header.tile_size_y; j++) {
+						for (int i = 0; i < header.tile_size_x; i++) {
+							const auto ii = static_cast<uint64_t>(image.tiles[t].offset_x) * header.tile_size_x + i;
+							const auto jj = static_cast<uint64_t>(image.tiles[t].offset_y) * header.tile_size_y + j;
+							const auto idx = ii + jj * image.width;
+
+							if (ii >= image.width || jj >= image.height) {
+								continue;
+							}
+
+							const auto srcIdx = j * static_cast<uint64_t>(header.tile_size_x) + i;
+							if (r >= 0)        out[idx * channelCount + 0] = src[r][srcIdx];
+							else if (hasRed)   out[idx * channelCount + 0] = 0.f;
+							if (g >= 0)        out[idx * channelCount + 1] = src[g][srcIdx];
+							else if (hasGreen) out[idx * channelCount + 1] = 0.f;
+							if (b >= 0)        out[idx * channelCount + 2] = src[b][srcIdx];
+							else if (hasBlue)  out[idx * channelCount + 2] = 0.f;
+							if (a >= 0)        out[idx * channelCount + 3] = src[a][srcIdx];
+							else if (hasAlpha) out[idx * channelCount + 3] = 1.f;
+						}
+					}
+				}
+			} else {
+				auto** src = reinterpret_cast<C**>(image.images);
+				for (uint64_t i = 0; i < width * height; i++) {
+					if (r >= 0)        out[i * channelCount + 0] = src[r][i];
+					else if (hasRed)   out[i * channelCount + 0] = 0.f;
+					if (g >= 0)        out[i * channelCount + 1] = src[g][i];
+					else if (hasGreen) out[i * channelCount + 1] = 0.f;
+					if (b >= 0)        out[i * channelCount + 2] = src[b][i];
+					else if (hasBlue)  out[i * channelCount + 2] = 0.f;
+					if (a >= 0)        out[i * channelCount + 3] = src[a][i];
+					else if (hasAlpha) out[i * channelCount + 3] = 1.f;
+				}
+			}
+		};
+		if (channelType == TINYEXR_PIXELTYPE_HALF) {
+			populateBuffer.operator()<half>();
+		} else {
+			populateBuffer.operator()<float>();
+		}
+
+		FreeEXRImage(&image);
+		FreeEXRHeader(&header);
+		return combinedChannels;
+	}
+
+	// HDR
 	if (stbi_is_hdr_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()))) {
 		const std::unique_ptr<float, void(*)(void*)> stbImage{
 			stbi_loadf_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()), &width, &height, &channels, 0),
@@ -965,6 +1264,7 @@ std::vector<std::byte> ImageConversion::convertFileToImageData(std::span<const s
 
 		switch (channels) {
 			case 1:  format = ImageFormat::R32F;          break;
+			case 2:  format = ImageFormat::RG3232F;       break;
 			case 3:  format = ImageFormat::RGB323232F;    break;
 			case 4:  format = ImageFormat::RGBA32323232F; break;
 			default: return {};
@@ -973,26 +1273,7 @@ std::vector<std::byte> ImageConversion::convertFileToImageData(std::span<const s
 		return {reinterpret_cast<std::byte*>(stbImage.get()), reinterpret_cast<std::byte*>(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)};
 	}
 
-	// 16-bit single-frame image
-	if (stbi_is_16_bit_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()))) {
-		const std::unique_ptr<stbi_us, void(*)(void*)> stbImage{
-			stbi_load_16_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()), &width, &height, &channels, 0),
-			&stbi_image_free,
-		};
-		if (!stbImage) {
-			return {};
-		}
-
-		if (channels == 4) {
-			format = ImageFormat::RGBA16161616;
-		} else {
-			return {};
-		}
-
-		return {reinterpret_cast<std::byte*>(stbImage.get()), reinterpret_cast<std::byte*>(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)};
-	}
-
-	// 8-bit or less multi-frame image
+	// GIF
 	if (fileData.size() >= 3 && static_cast<char>(fileData[0]) == 'G' && static_cast<char>(fileData[1]) == 'I' && static_cast<char>(fileData[2]) == 'F') {
 		const std::unique_ptr<stbi_uc, void(*)(void*)> stbImage{
 			stbi_load_gif_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()), nullptr, &width, &height, &frameCount, &channels, 0),
@@ -1013,6 +1294,75 @@ std::vector<std::byte> ImageConversion::convertFileToImageData(std::span<const s
 		return {reinterpret_cast<std::byte*>(stbImage.get()), reinterpret_cast<std::byte*>(stbImage.get() + (ImageFormatDetails::getDataLength(format, width, height) * frameCount))};
 	}
 
+	// 16-bit single-frame image
+	if (stbi_is_16_bit_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()))) {
+		const std::unique_ptr<stbi_us, void(*)(void*)> stbImage{
+			stbi_load_16_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()), &width, &height, &channels, 0),
+			&stbi_image_free,
+		};
+		if (!stbImage) {
+			return {};
+		}
+
+		if (channels == 4) {
+			format = ImageFormat::RGBA16161616;
+		} else if (channels >= 1 && channels < 4) {
+			// There are no other 16-bit integer formats in Source, so we have to do a conversion here
+			format = ImageFormat::RGBA16161616;
+
+			std::vector<std::byte> out(ImageFormatDetails::getDataLength(format, width, height));
+			std::span<ImagePixel::RGBA16161616> outPixels{reinterpret_cast<ImagePixel::RGBA16161616*>(out.data()), out.size() / sizeof(ImagePixel::RGBA16161616)};
+
+			switch (channels) {
+				case 1: {
+					std::span<uint16_t> inPixels{reinterpret_cast<uint16_t*>(stbImage.get()), outPixels.size()};
+					std::transform(
+#ifdef SOURCEPP_BUILD_WITH_TBB
+						std::execution::par_unseq,
+#endif
+						inPixels.begin(), inPixels.end(), outPixels.begin(), [](uint16_t pixel) -> ImagePixel::RGBA16161616 {
+						return {pixel, 0, 0, 0xffff};
+					});
+				}
+				case 2: {
+					struct RG1616 {
+						uint16_t r;
+						uint16_t g;
+					};
+					std::span<RG1616> inPixels{reinterpret_cast<RG1616*>(stbImage.get()), outPixels.size()};
+					std::transform(
+#ifdef SOURCEPP_BUILD_WITH_TBB
+						std::execution::par_unseq,
+#endif
+						inPixels.begin(), inPixels.end(), outPixels.begin(), [](RG1616 pixel) -> ImagePixel::RGBA16161616 {
+						return {pixel.r, pixel.g, 0, 0xffff};
+					});
+				}
+				case 3: {
+					struct RGB161616 {
+						uint16_t r;
+						uint16_t g;
+						uint16_t b;
+					};
+					std::span<RGB161616> inPixels{reinterpret_cast<RGB161616*>(stbImage.get()), outPixels.size()};
+					std::transform(
+#ifdef SOURCEPP_BUILD_WITH_TBB
+						std::execution::par_unseq,
+#endif
+						inPixels.begin(), inPixels.end(), outPixels.begin(), [](RGB161616 pixel) -> ImagePixel::RGBA16161616 {
+						return {pixel.r, pixel.g, pixel.b, 0xffff};
+					});
+				}
+				default:
+					return {};
+			}
+		} else {
+			return {};
+		}
+
+		return {reinterpret_cast<std::byte*>(stbImage.get()), reinterpret_cast<std::byte*>(stbImage.get()) + ImageFormatDetails::getDataLength(format, width, height)};
+	}
+
 	// 8-bit or less single frame image
 	const std::unique_ptr<stbi_uc, void(*)(void*)> stbImage{
 		stbi_load_from_memory(reinterpret_cast<const stbi_uc*>(fileData.data()), static_cast<int>(fileData.size()), &width, &height, &channels, 0),
diff --git a/src/vtfpp/PPL.cpp b/src/vtfpp/PPL.cpp
index c300de2c6..2f3b45db8 100644
--- a/src/vtfpp/PPL.cpp
+++ b/src/vtfpp/PPL.cpp
@@ -200,7 +200,7 @@ std::vector<std::byte> PPL::bake() {
 		}
 		const auto seekPoint = writer.tell();
 		writer.seek_u(currentOffset).write(image.data);
-		const auto alignment = math::getPaddingForAlignment(ALIGNMENT, writer.tell());
+		const auto alignment = math::paddingForAlignment(ALIGNMENT, writer.tell());
 		for (int i = 0; i < alignment; i++) {
 			writer.write<uint8_t>(0);
 		}
diff --git a/src/vtfpp/VTF.cpp b/src/vtfpp/VTF.cpp
index 44dd6383b..f3f36c803 100644
--- a/src/vtfpp/VTF.cpp
+++ b/src/vtfpp/VTF.cpp
@@ -11,6 +11,7 @@
 
 #include <BufferStream.h>
 #include <miniz.h>
+#include <zstd.h>
 
 #include <vtfpp/ImageConversion.h>
 
@@ -19,25 +20,66 @@ using namespace vtfpp;
 
 namespace {
 
-std::vector<std::byte> compressData(std::span<const std::byte> data, int level) {
-	mz_ulong compressedSize = mz_compressBound(data.size());
-	std::vector<std::byte> out(compressedSize);
+std::vector<std::byte> compressData(std::span<const std::byte> data, int16_t level, CompressionMethod method) {
+	switch (method) {
+		using enum CompressionMethod;
+		case DEFLATE: {
+			mz_ulong compressedSize = mz_compressBound(data.size());
+			std::vector<std::byte> out(compressedSize);
+
+			int status = MZ_OK;
+			while ((status = mz_compress2(reinterpret_cast<unsigned char*>(out.data()), &compressedSize, reinterpret_cast<const unsigned char*>(data.data()), data.size(), level)) == MZ_BUF_ERROR) {
+				compressedSize *= 2;
+				out.resize(compressedSize);
+			}
 
-	int status = MZ_OK;
-	while ((status = mz_compress2(reinterpret_cast<unsigned char*>(out.data()), &compressedSize, reinterpret_cast<const unsigned char*>(data.data()), data.size(), level)) == MZ_BUF_ERROR) {
-		compressedSize *= 2;
-		out.resize(compressedSize);
-	}
+			if (status != MZ_OK) {
+				return {};
+			}
+			out.resize(compressedSize);
+			return out;
+		}
+		case ZSTD: {
+			if (level < 0) {
+				level = 6;
+			}
 
-	if (status != MZ_OK) {
-		return {};
+			auto expectedSize = ZSTD_compressBound(data.size());
+			std::vector<std::byte> out(expectedSize);
+
+			auto compressedSize = ZSTD_compress(out.data(), expectedSize, data.data(), data.size(), level);
+			if (ZSTD_isError(compressedSize)) {
+				return {};
+			}
+
+			out.resize(compressedSize);
+			return out;
+		}
 	}
-	out.resize(compressedSize);
-	return out;
+	return {};
 }
 
 } // namespace
 
+const std::array<Resource::Type, 8>& Resource::getOrder() {
+	static constinit std::array<Type, 8> typeArray{
+		TYPE_THUMBNAIL_DATA,
+		TYPE_IMAGE_DATA,
+		TYPE_PARTICLE_SHEET_DATA,
+		TYPE_CRC,
+		TYPE_LOD_CONTROL_INFO,
+		TYPE_EXTENDED_FLAGS,
+		TYPE_KEYVALUES_DATA,
+		TYPE_AUX_COMPRESSION,
+	};
+	static bool unsorted = true;
+	if (unsorted) {
+		std::sort(typeArray.begin(), typeArray.end());
+		unsorted = false;
+	}
+	return typeArray;
+}
+
 Resource::ConvertedData Resource::convertData() const {
 	switch (this->type) {
 		case TYPE_CRC:
@@ -146,17 +188,17 @@ VTF::VTF(std::vector<std::byte>&& vtfData, bool parseHeaderOnly)
 
 		Resource* lastResource = nullptr;
 		for (int i = 0; i < resourceCount; i++) {
-			auto& [type, flags, data] = this->resources.emplace_back();
+			auto& [type, flags_, data_] = this->resources.emplace_back();
 
 			auto typeAndFlags = stream.read<uint32_t>();
 			type = static_cast<Resource::Type>(typeAndFlags & 0xffffff); // last 3 bytes
-			flags = static_cast<Resource::Flags>(typeAndFlags >> 24); // first byte
-			data = stream.read_span<std::byte>(4);
+			flags_ = static_cast<Resource::Flags>(typeAndFlags >> 24); // first byte
+			data_ = stream.read_span<std::byte>(4);
 
-			if (!(flags & Resource::FLAG_LOCAL_DATA)) {
+			if (!(flags_ & Resource::FLAG_LOCAL_DATA)) {
 				if (lastResource) {
 					auto lastOffset = *reinterpret_cast<uint32_t*>(lastResource->data.data());
-					auto currentOffset = *reinterpret_cast<uint32_t*>(data.data());
+					auto currentOffset = *reinterpret_cast<uint32_t*>(data_.data());
 
 					auto curPos = stream.tell();
 					stream.seek(lastOffset);
@@ -191,9 +233,20 @@ VTF::VTF(std::vector<std::byte>&& vtfData, bool parseHeaderOnly)
 								if (uint32_t newOffset, newLength; ImageFormatDetails::getDataPosition(newOffset, newLength, this->format, i, this->mipCount, j, this->frameCount, k, faceCount, this->width, this->height, 0, this->getSliceCount())) {
 									// Keep in mind that slices are compressed together
 									mz_ulong decompressedImageDataSize = newLength * this->sliceCount;
-									if (mz_uncompress(reinterpret_cast<unsigned char*>(decompressedImageData.data() + newOffset), &decompressedImageDataSize, reinterpret_cast<const unsigned char*>(imageResource->data.data() + oldOffset), oldLength) != MZ_OK) {
-										this->opened = false;
-										return;
+									switch (auxResource->getDataAsAuxCompressionMethod()) {
+										using enum CompressionMethod;
+										case DEFLATE:
+											if (mz_uncompress(reinterpret_cast<unsigned char*>(decompressedImageData.data() + newOffset), &decompressedImageDataSize, reinterpret_cast<const unsigned char*>(imageResource->data.data() + oldOffset), oldLength) != MZ_OK) {
+												this->opened = false;
+												return;
+											}
+											break;
+										case ZSTD:
+											if (auto decompressedSize = ZSTD_decompress(reinterpret_cast<unsigned char*>(decompressedImageData.data() + newOffset), decompressedImageDataSize, reinterpret_cast<const unsigned char*>(imageResource->data.data() + oldOffset), oldLength); ZSTD_isError(decompressedSize) || decompressedSize != decompressedImageDataSize) {
+												this->opened = false;
+												return;
+											}
+											break;
 									}
 								}
 								oldOffset += oldLength;
@@ -205,7 +258,7 @@ VTF::VTF(std::vector<std::byte>&& vtfData, bool parseHeaderOnly)
 			}
 		}
 	} else {
-		stream.skip(math::getPaddingForAlignment(16, stream.tell()));
+		stream.skip(math::paddingForAlignment(16, stream.tell()));
 		this->opened = stream.tell() == headerSize;
 
 		this->resources.reserve(2);
@@ -227,7 +280,8 @@ VTF::VTF(std::vector<std::byte>&& vtfData, bool parseHeaderOnly)
 	}
 
 	if (const auto* resource = this->getResource(Resource::TYPE_AUX_COMPRESSION)) {
-		this->compressionLevel = static_cast<int8_t>(resource->getDataAsAuxCompressionLevel());
+		this->compressionLevel = resource->getDataAsAuxCompressionLevel();
+		this->compressionMethod = resource->getDataAsAuxCompressionMethod();
 		this->removeResourceInternal(Resource::TYPE_AUX_COMPRESSION);
 	}
 }
@@ -263,13 +317,14 @@ VTF& VTF::operator=(const VTF& other) {
 
 	this->resources.clear();
 	for (const auto& [otherType, otherFlags, otherData] : other.resources) {
-		auto& [type, flags, data] = this->resources.emplace_back();
+		auto& [type, flags_, data_] = this->resources.emplace_back();
 		type = otherType;
-		flags = otherFlags;
-		data = {this->data.data() + (otherData.data() - other.data.data()), otherData.size()};
+		flags_ = otherFlags;
+		data_ = {this->data.data() + (otherData.data() - other.data.data()), otherData.size()};
 	}
 
 	this->compressionLevel = other.compressionLevel;
+	this->compressionMethod = other.compressionMethod;
 	this->imageWidthResizeMethod = other.imageWidthResizeMethod;
 	this->imageHeightResizeMethod = other.imageHeightResizeMethod;
 
@@ -315,6 +370,7 @@ void VTF::createInternal(VTF& writer, CreationOptions options) {
 	}
 	writer.setFormat(options.outputFormat);
 	writer.setCompressionLevel(options.compressionLevel);
+	writer.setCompressionMethod(options.compressionMethod);
 }
 
 void VTF::create(std::span<const std::byte> imageData, ImageFormat format, uint16_t width, uint16_t height, const std::string& vtfPath, CreationOptions options) {
@@ -410,6 +466,14 @@ void VTF::setImageResizeMethods(ImageConversion::ResizeMethod imageWidthResizeMe
 	this->imageHeightResizeMethod = imageHeightResizeMethod_;
 }
 
+void VTF::setImageWidthResizeMethod(ImageConversion::ResizeMethod imageWidthResizeMethod_) {
+	this->imageWidthResizeMethod = imageWidthResizeMethod_;
+}
+
+void VTF::setImageHeightResizeMethod(ImageConversion::ResizeMethod imageHeightResizeMethod_) {
+	this->imageHeightResizeMethod = imageHeightResizeMethod_;
+}
+
 uint16_t VTF::getWidth(uint8_t mip) const {
 	return mip > 0 ? ImageDimensions::getMipDim(mip, this->width) : this->width;
 }
@@ -485,7 +549,7 @@ void VTF::setFormat(ImageFormat newFormat, ImageConversion::ResizeFilter filter)
 		newMipCount = recommendedCount;
 	}
 	if (ImageFormatDetails::compressed(newFormat)) {
-		this->regenerateImageData(newFormat, this->width + math::getPaddingForAlignment(4, this->width), this->height + math::getPaddingForAlignment(4, this->height), newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter);
+		this->regenerateImageData(newFormat, this->width + math::paddingForAlignment(4, this->width), this->height + math::paddingForAlignment(4, this->height), newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter);
 	} else {
 		this->regenerateImageData(newFormat, this->width, this->height, newMipCount, this->frameCount, this->getFaceCount(), this->sliceCount, filter);
 	}
@@ -551,7 +615,7 @@ void VTF::computeMips(ImageConversion::ResizeFilter filter) {
 					}
 #ifdef SOURCEPP_BUILD_WITH_THREADS
 				}));
-				if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency() * 2) {
+				if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency()) {
 					for (auto& future : futures) {
 						future.get();
 					}
@@ -689,7 +753,7 @@ void VTF::computeReflectivity() {
 					futures.push_back(std::async(std::launch::async, [this, j, k, l] {
 						return getReflectivityForImage(*this, j, k, l);
 					}));
-					if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency() * 2) {
+					if (std::thread::hardware_concurrency() > 0 && futures.size() >= std::thread::hardware_concurrency()) {
 						for (auto& future : futures) {
 							this->reflectivity += future.get();
 						}
@@ -784,7 +848,7 @@ void VTF::setResourceInternal(Resource::Type type, std::span<const std::byte> da
 	this->data.clear();
 	BufferStream writer{this->data};
 
-	for (auto resourceType : Resource::TYPE_ARRAY_ORDER) {
+	for (auto resourceType : Resource::getOrder()) {
 		if (!resourceData.contains(resourceType)) {
 			continue;
 		}
@@ -933,7 +997,7 @@ void VTF::removeExtendedFlagsResource() {
 	this->removeResourceInternal(Resource::TYPE_EXTENDED_FLAGS);
 }
 
-void VTF::setKeyValuesData(const std::string& value) {
+void VTF::setKeyValuesDataResource(const std::string& value) {
 	std::vector<std::byte> keyValuesData;
 	BufferStream writer{keyValuesData};
 
@@ -944,18 +1008,26 @@ void VTF::setKeyValuesData(const std::string& value) {
 	this->setResourceInternal(Resource::TYPE_KEYVALUES_DATA, keyValuesData);
 }
 
-void VTF::removeKeyValuesData() {
+void VTF::removeKeyValuesDataResource() {
 	this->removeResourceInternal(Resource::TYPE_KEYVALUES_DATA);
 }
 
-uint8_t VTF::getCompressionLevel() const {
+int16_t VTF::getCompressionLevel() const {
 	return this->compressionLevel;
 }
 
-void VTF::setCompressionLevel(uint8_t newCompressionLevel) {
+void VTF::setCompressionLevel(int16_t newCompressionLevel) {
 	this->compressionLevel = newCompressionLevel;
 }
 
+CompressionMethod VTF::getCompressionMethod() const {
+	return this->compressionMethod;
+}
+
+void VTF::setCompressionMethod(CompressionMethod newCompressionMethod) {
+	this->compressionMethod = newCompressionMethod;
+}
+
 bool VTF::hasImageData() const {
 	return this->format != ImageFormat::EMPTY && this->width > 0 && this->height > 0;
 }
@@ -986,12 +1058,16 @@ std::vector<std::byte> VTF::getImageDataAsRGBA8888(uint8_t mip, uint16_t frame,
 }
 
 bool VTF::setImage(std::span<const std::byte> imageData_, ImageFormat format_, uint16_t width_, uint16_t height_, ImageConversion::ResizeFilter filter, uint8_t mip, uint16_t frame, uint8_t face, uint16_t slice) {
+	if (imageData_.empty()) {
+		return false;
+	}
+
 	if (!this->hasImageData()) {
 		uint16_t resizedWidth = width_, resizedHeight = height_;
 		ImageConversion::setResizedDims(resizedWidth, this->imageWidthResizeMethod, resizedHeight, this->imageHeightResizeMethod);
 		if (ImageFormatDetails::compressed(format_)) {
-			resizedWidth += math::getPaddingForAlignment(4, resizedWidth);
-			resizedHeight += math::getPaddingForAlignment(4, resizedHeight);
+			resizedWidth += math::paddingForAlignment(4, resizedWidth);
+			resizedHeight += math::paddingForAlignment(4, resizedHeight);
 		}
 		if (const auto newMipCount = ImageDimensions::getRecommendedMipCountForDims(format_, resizedWidth, resizedHeight); newMipCount <= mip) {
 			mip = newMipCount - 1;
@@ -1013,11 +1089,13 @@ bool VTF::setImage(std::span<const std::byte> imageData_, ImageFormat format_, u
 	}
 	if (uint32_t offset, length; ImageFormatDetails::getDataPosition(offset, length, this->format, mip, this->mipCount, frame, this->frameCount, face, faceCount, this->width, this->height, slice, this->sliceCount)) {
 		std::vector<std::byte> image{imageData_.begin(), imageData_.end()};
-		if (this->format != format_) {
-			image = ImageConversion::convertImageDataToFormat(image, format_, this->format, this->width, this->height);
+		const auto newWidth = ImageDimensions::getMipDim(mip, this->width);
+		const auto newHeight = ImageDimensions::getMipDim(mip, this->height);
+		if (width_ != newWidth || height_ != newHeight) {
+			image = ImageConversion::resizeImageData(image, format_, width_, newWidth, height_, newHeight, this->imageDataIsSRGB(), filter);
 		}
-		if (width_ != ImageDimensions::getMipDim(mip, this->width) || height_ != ImageDimensions::getMipDim(mip, this->height)) {
-			image = ImageConversion::resizeImageData(image, this->format, width_, ImageDimensions::getMipDim(mip, this->width), height_, ImageDimensions::getMipDim(mip, this->height), this->imageDataIsSRGB(), filter);
+		if (format_ != this->format) {
+			image = ImageConversion::convertImageDataToFormat(image, format_, this->format, newWidth, newHeight);
 		}
 		std::memcpy(imageResource->data.data() + offset, image.data(), image.size());
 	}
@@ -1030,7 +1108,7 @@ bool VTF::setImage(const std::string& imagePath, ImageConversion::ResizeFilter f
 	auto imageData_ = ImageConversion::convertFileToImageData(fs::readFileBuffer(imagePath), inputFormat, inputWidth, inputHeight, inputFrameCount);
 
 	// Unable to decode file
-	if (inputFormat == ImageFormat::EMPTY || !inputWidth || !inputHeight || !inputFrameCount) {
+	if (imageData_.empty() || inputFormat == ImageFormat::EMPTY || !inputWidth || !inputHeight || !inputFrameCount) {
 		return false;
 	}
 
@@ -1088,6 +1166,16 @@ std::vector<std::byte> VTF::getThumbnailDataAsRGBA8888() const {
 	return this->getThumbnailDataAs(ImageFormat::RGBA8888);
 }
 
+void VTF::setThumbnail(std::span<const std::byte> imageData_, ImageFormat format_, uint16_t width_, uint16_t height_) {
+	if (format_ != this->thumbnailFormat) {
+		this->setResourceInternal(Resource::TYPE_THUMBNAIL_DATA, ImageConversion::convertImageDataToFormat(imageData_, format_, this->thumbnailFormat, width_, height_));
+	} else {
+		this->setResourceInternal(Resource::TYPE_THUMBNAIL_DATA, imageData_);
+	}
+	this->thumbnailWidth = width_;
+	this->thumbnailHeight = height_;
+}
+
 void VTF::computeThumbnail(ImageConversion::ResizeFilter filter) {
 	if (!this->hasImageData()) {
 		return;
@@ -1145,7 +1233,7 @@ std::vector<std::byte> VTF::bake() const {
 	}
 
 	if (this->minorVersion < 3) {
-		const auto headerAlignment = math::getPaddingForAlignment(16, writer.tell());
+		const auto headerAlignment = math::paddingForAlignment(16, writer.tell());
 		for (uint16_t i = 0; i < headerAlignment; i++) {
 			writer.write<std::byte>({});
 		}
@@ -1169,19 +1257,20 @@ std::vector<std::byte> VTF::bake() const {
 				auxCompressionResourceData.resize((this->mipCount * this->frameCount * faceCount + 2) * sizeof(uint32_t));
 				BufferStream auxWriter{auxCompressionResourceData, false};
 
-				// Format of aux resource is as follows, with each item being a 4 byte integer:
+				// Format of aux resource is as follows, with each item of unspecified type being a 4 byte integer:
 				// - Size of resource in bytes, not counting this int
-				// - Compression level
+				// - Compression level, method (2 byte integers)
 				// - (X times) Size of each mip-face-frame combo
 				auxWriter
 					.write<uint32_t>(auxCompressionResourceData.size() - sizeof(uint32_t))
-					.write<uint32_t>(this->compressionLevel);
+					.write(this->compressionLevel)
+					.write(this->compressionMethod);
 
 				for (int i = this->mipCount - 1; i >= 0; i--) {
 					for (int j = 0; j < this->frameCount; j++) {
 						for (int k = 0; k < faceCount; k++) {
 							if (uint32_t offset, length; ImageFormatDetails::getDataPosition(offset, length, this->format, i, this->mipCount, j, this->frameCount, k, faceCount, this->width, this->height, 0, this->sliceCount)) {
-								auto compressedData = ::compressData({imageResource->data.data() + offset, length * this->sliceCount}, this->compressionLevel);
+								auto compressedData = ::compressData({imageResource->data.data() + offset, length * this->sliceCount}, this->compressionLevel, this->compressionMethod);
 								compressedImageResourceData.insert(compressedImageResourceData.end(), compressedData.begin(), compressedData.end());
 								auxWriter.write<uint32_t>(compressedData.size());
 							}
@@ -1192,7 +1281,9 @@ std::vector<std::byte> VTF::bake() const {
 		}
 
 		writer
-			.write<uint24_t>(0) // padding
+			.write<uint8_t>(0) // padding
+			.write<uint8_t>(0) // padding
+			.write<uint8_t>(0) // padding
 			.write<uint32_t>(this->getResources().size() + hasAuxCompression)
 			.write<uint64_t>(0); // padding
 
@@ -1212,7 +1303,7 @@ std::vector<std::byte> VTF::bake() const {
 			writer_.write(data);
 			writer_.seek_u(resourceOffsetPos).write<uint32_t>(resourceOffsetValue);
 		};
-		for (const auto resourceType : Resource::TYPE_ARRAY_ORDER) {
+		for (const auto resourceType : Resource::getOrder()) {
 			if (hasAuxCompression && resourceType == Resource::TYPE_AUX_COMPRESSION) {
 				writeNonLocalResource(writer, resourceType, auxCompressionResourceData);
 			} else if (hasAuxCompression && resourceType == Resource::TYPE_IMAGE_DATA) {
diff --git a/src/vtfpp/_vtfpp.cmake b/src/vtfpp/_vtfpp.cmake
index 0d3606a92..7ccabf2e1 100644
--- a/src/vtfpp/_vtfpp.cmake
+++ b/src/vtfpp/_vtfpp.cmake
@@ -1,6 +1,5 @@
 add_pretty_parser(vtfpp
-        DEPS miniz
-        DEPS_INTERFACE sourcepp_stb
+        DEPS miniz libzstd_static sourcepp_parser sourcepp_stb sourcepp_tinyexr
         PRECOMPILED_HEADERS
         "${CMAKE_CURRENT_SOURCE_DIR}/include/vtfpp/ImageConversion.h"
         "${CMAKE_CURRENT_SOURCE_DIR}/include/vtfpp/ImageFormats.h"
@@ -14,8 +13,6 @@ add_pretty_parser(vtfpp
         "${CMAKE_CURRENT_LIST_DIR}/SHT.cpp"
         "${CMAKE_CURRENT_LIST_DIR}/VTF.cpp")
 
-target_include_directories(vtfpp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ext/stb/include")
-
 sourcepp_add_tbb(vtfpp)
 sourcepp_add_threads(vtfpp)
 target_link_compressonator(vtfpp)
diff --git a/test/toolpp.cpp b/test/toolpp.cpp
index 19ff0830c..07a9f6a13 100644
--- a/test/toolpp.cpp
+++ b/test/toolpp.cpp
@@ -8,23 +8,31 @@ using namespace toolpp;
 
 TEST(toolpp, cmdSeqOpenBinary) {
 	CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/binary.wc"};
+	ASSERT_TRUE(cmdSeq);
+	ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::BINARY);
 	ASSERT_EQ(cmdSeq.getSequences().size(), 8);
 }
 
-TEST(toolpp, cmdSeqOpenKeyValues) {
+TEST(toolpp, cmdSeqOpenKeyValuesStrata) {
 	CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/keyvalues.wc"};
+	ASSERT_TRUE(cmdSeq);
+	ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::KEYVALUES_STRATA);
 	ASSERT_EQ(cmdSeq.getSequences().size(), 4);
 }
 
 TEST(toolpp, cmdSeqBakeBinary) {
 	CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/binary.wc"};
+	ASSERT_TRUE(cmdSeq);
+	ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::BINARY);
 	auto existingData = fs::readFileBuffer(ASSET_ROOT "toolpp/cmdseq/binary.wc");
 	auto bakedData = cmdSeq.bake();
 	ASSERT_EQ(existingData, bakedData);
 }
 
-TEST(toolpp, cmdSeqBakeKeyValues) {
+TEST(toolpp, cmdSeqBakeKeyValuesStrata) {
 	CmdSeq cmdSeq{ASSET_ROOT "toolpp/cmdseq/keyvalues.wc"};
+	ASSERT_TRUE(cmdSeq);
+	ASSERT_EQ(cmdSeq.getType(), CmdSeq::Type::KEYVALUES_STRATA);
 	auto existingData = fs::readFileBuffer(ASSET_ROOT "toolpp/cmdseq/keyvalues.wc");
 	auto bakedData = cmdSeq.bake();
 	ASSERT_EQ(existingData, bakedData);
diff --git a/test/vtfpp.cpp b/test/vtfpp.cpp
index a3b7b40f3..425be5391 100644
--- a/test/vtfpp.cpp
+++ b/test/vtfpp.cpp
@@ -836,6 +836,7 @@ TEST(vtfpp, read_v76_c9) {
 	EXPECT_EQ(vtf.getThumbnailWidth(), 16);
 	EXPECT_EQ(vtf.getThumbnailHeight(), 16);
 	EXPECT_EQ(vtf.getCompressionLevel(), 9);
+	EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::DEFLATE);
 
 	// Resources
 	EXPECT_EQ(vtf.getResources().size(), 2);
@@ -876,7 +877,8 @@ TEST(vtfpp, write_v76_c6) {
 	EXPECT_EQ(vtf.getThumbnailFormat(), ImageFormat::DXT1);
 	EXPECT_EQ(vtf.getThumbnailWidth(), 16);
 	EXPECT_EQ(vtf.getThumbnailHeight(), 16);
-	EXPECT_EQ(vtf.getCompressionLevel(), 6);
+	EXPECT_EQ(vtf.getCompressionLevel(), -1);
+	EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::ZSTD);
 }
 
 TEST(vtfpp, read_v76_nomip_c9) {
@@ -903,6 +905,7 @@ TEST(vtfpp, read_v76_nomip_c9) {
 	EXPECT_EQ(vtf.getThumbnailWidth(), 16);
 	EXPECT_EQ(vtf.getThumbnailHeight(), 16);
 	EXPECT_EQ(vtf.getCompressionLevel(), 9);
+	EXPECT_EQ(vtf.getCompressionMethod(), CompressionMethod::DEFLATE);
 
 	// Resources
 	EXPECT_EQ(vtf.getResources().size(), 2);