improvements gamma transfer, Rec 709, libwebp version changed

awxkee · Oct 12, 2023 · c29f126 · c29f126
1 parent 3b55b4c
commit c29f126
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 49 deletions.
diff --git a/Package.swift b/Package.swift
@@ -18,7 +18,7 @@ let package = Package(
         .package(url: "https://github.com/awxkee/libaom.swift.git", "1.0.0"..<"1.1.0"),
         .package(url: "https://github.com/awxkee/libdav1d.swift.git", "1.0.0"..<"1.1.0"),
         .package(url: "https://github.com/awxkee/libyuv.swift.git", "1.0.0"..<"1.1.0"),
-        .package(url: "https://github.com/awxkee/libwebp-ios.git", "1.0.0"..<"1.1.0"),
+        .package(url: "https://github.com/awxkee/libwebp-ios.git", "1.1.0"..<"1.2.0"),
         .package(url: "https://github.com/kean/Nuke.git", "12.0.0"..<"13.0.0")
     ],
     targets: [
@@ -39,7 +39,7 @@ let package = Package(
                     .product(name: "libaom", package: "libaom.swift"),
                                .product(name: "libdav1d", package: "libdav1d.swift"),
                                .product(name: "libyuv", package: "libyuv.swift"),
-                    .product(name: "libwebp", package: "libwebp-ios")],
+                    .product(name: "libwebp-ios", package: "libwebp-ios")],
                 publicHeadersPath: "include",
                 cSettings: [
                     .define("AVIF_CODEC_AOM_ENCODE", to: "1"),

diff --git a/Sources/avifc/AVIFImageXForm.mm b/Sources/avifc/AVIFImageXForm.mm
@@ -222,6 +222,26 @@ - (_Nullable CGImageRef)formCGImage:(nonnull avifDecoder*)decoder scale:(CGFloat
                                    U16:depth > 8 depth:depth half:depth > 8
                              primaries:lumaPrimaries components:components
                        gammaCorrection:gamma function:function matrix:nullptr];
+        } else if (colorPrimaries == AVIF_COLOR_PRIMARIES_BT709 /* Rec 709 */ &&
+                   (transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_SMPTE2084
+                    || transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_HLG
+                    || transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_SMPTE428)) {
+            float lumaPrimaries[3] = { 0.2627f, 0.6780f, 0.0593f };
+            ColorGammaCorrection gamma = Rec709;
+            TransferFunction function;
+            if (transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_SMPTE2084) {
+                function = PQ;
+            } else if (transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_HLG) {
+                function = HLG;
+            } else {
+                function = SMPTE428;
+            }
+            colorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_709);
+            [HDRColorTransfer transfer:reinterpret_cast<uint8_t*>(pixelsData)
+                                stride:stride width:newWidth height:newHeight
+                                   U16:depth > 8 depth:depth half:depth > 8
+                             primaries:lumaPrimaries components:components
+                       gammaCorrection:gamma function:function matrix:nullptr];
         } else if (colorPrimaries == AVIF_COLOR_PRIMARIES_SMPTE432 /* Display P3 */ &&
                  transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_LINEAR) {
             CGColorSpaceRef p3linear = NULL;
@@ -234,7 +254,7 @@ - (_Nullable CGImageRef)formCGImage:(nonnull avifDecoder*)decoder scale:(CGFloat
         } else if (transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_SMPTE2084
                    || transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_HLG
                    || transferCharacteristics == AVIF_TRANSFER_CHARACTERISTICS_SMPTE428) {
-            // IF Transfer function but we don't know the color space the we will convert it always to display P3
+            // IF Transfer function but we don't know the color space the we will convert it always to Display P3
             float lumaPrimaries[3] = { 0.2627f, 0.6780f, 0.0593f };
             ColorGammaCorrection gamma = DisplayP3;
             TransferFunction function;

diff --git a/Sources/avifc/Color/Gamma.cpp b/Sources/avifc/Color/Gamma.cpp
@@ -30,6 +30,14 @@
 #pragma clang fp contract(on) exceptions(ignore) reassociate(on)
 #endif
 
+float LinearITUR709ToITUR709(const float linear) {
+    if (linear <= 0.018f) {
+        return 4.5f * linear;
+    } else {
+        return 1.099f * powf_c(linear, 0.45f) - 0.099f;
+    }
+}
+
 float LinearSRGBToSRGB(const float linearValue) {
     if (linearValue <= 0.0031308) {
         return 12.92f * linearValue;

diff --git a/Sources/avifc/Color/Gamma.hpp b/Sources/avifc/Color/Gamma.hpp
@@ -41,6 +41,7 @@ constexpr float alphaRec2020 = 1.09929682680944f;
 
 float LinearSRGBToSRGB(const float linearValue);
 float LinearRec2020ToRec2020(const float linear);
+float LinearITUR709ToITUR709(const float linear);
 float dciP3PQGammaCorrection(const float linear);
 
 #if __arm64__
@@ -51,61 +52,52 @@ float dciP3PQGammaCorrection(const float linear);
 static inline float32x4_t LinearITUR709ToITUR709(const float32x4_t linear) {
     const float32x4_t level = vdupq_n_f32(0.018);
 
-    uint32x4_t mask = vcgtq_f32(linear, level);
-    uint32x4_t maskHigh = vcltq_f32(linear, level);
+    const uint32x4_t mask = vcgtq_f32(linear, level);
+    const uint32x4_t maskHigh = vcltq_f32(linear, level);
 
-    float32x4_t low = vbslq_f32(mask, vdupq_n_f32(0), linear);
-    float32x4_t high = vbslq_f32(maskHigh, vdupq_n_f32(0), linear);
+    float32x4_t low = linear;
+    float32x4_t high = linear;
     low = vmulq_n_f32(low, 4.5f);
 
     high = vsubq_f32(vmulq_n_f32(vpowq_f32(high, 0.45f), 1.099f), vdupq_n_f32(0.099f));
+
+    low = vbslq_f32(mask, vdupq_n_f32(0), low);
+    high = vbslq_f32(maskHigh, vdupq_n_f32(0), high);
+
     float32x4_t result = vmaxq_f32(vaddq_f32(low, high), vdupq_n_f32(0));
     return result;
 }
 
 static inline float32x4_t LinearSRGBToSRGB(const float32x4_t linear) {
     const float32x4_t level = vdupq_n_f32(0.0031308);
 
-    uint32x4_t mask = vcgtq_f32(linear, level);
-    uint32x4_t maskHigh = vcltq_f32(linear, level);
+    const uint32x4_t mask = vcgtq_f32(linear, level);
+    const uint32x4_t maskHigh = vcltq_f32(linear, level);
 
-    float32x4_t low = vbslq_f32(mask, vdupq_n_f32(0), linear);
-    float32x4_t high = vbslq_f32(maskHigh, vdupq_n_f32(0), linear);
+    float32x4_t low = linear;
+    float32x4_t high = linear;
     low = vmulq_n_f32(low, 12.92f);
 
     high = vsubq_f32(vmulq_n_f32(vpowq_f32(high, 1.0f/2.4f), 1.055f), vdupq_n_f32(0.055f));
+    low = vbslq_f32(mask, vdupq_n_f32(0), low);
+    high = vbslq_f32(maskHigh, vdupq_n_f32(0), high);
     float32x4_t result = vmaxq_f32(vaddq_f32(low, high), vdupq_n_f32(0));
     return result;
 }
 
 static inline float32x4_t LinearRec2020ToRec2020(const float32x4_t linear) {
-    uint32x4_t mask = vcgtq_f32(linear, vdupq_n_f32(betaRec2020));
-    uint32x4_t maskHigh = vcltq_f32(linear, vdupq_n_f32(betaRec2020));
-
-    float32x4_t low = vbslq_f32(mask, vdupq_n_f32(0), linear);
-    float32x4_t high = vbslq_f32(maskHigh, vdupq_n_f32(0), linear);
+    const uint32x4_t mask = vcgtq_f32(linear, vdupq_n_f32(betaRec2020));
+    const uint32x4_t maskHigh = vcltq_f32(linear, vdupq_n_f32(betaRec2020));
 
+    float32x4_t low = linear;
+    float32x4_t high = linear;
     low = vmulq_n_f32(low, 4.5f);
     constexpr float fk = alphaRec2020 - 1;
     high = vsubq_f32(vmulq_n_f32(vpowq_f32(high, 0.45f), alphaRec2020), vdupq_n_f32(fk));
-
+    low = vbslq_f32(mask, vdupq_n_f32(0), low);
+    high = vbslq_f32(maskHigh, vdupq_n_f32(0), high);
     return vaddq_f32(low, high);
 }
-
-__attribute__((always_inline))
-static inline float32x4_t applyMatrixNEON(vector<vector<float>> matrix, const float32x4_t v) {
-    const float32x4_t row1 = { matrix[0][0], matrix[0][1], matrix[0][2], 0.0f };
-    const float32x4_t row2 = { matrix[1][0], matrix[1][1], matrix[1][2], 0.0f };
-    const float32x4_t row3 = { matrix[2][0], matrix[2][1], matrix[2][2], 0.0f };
-    const float32x4_t v1 = vmulq_f32(v, row1);
-    const float32x4_t v2 = vmulq_f32(v, row2);
-    const float32x4_t v3 = vmulq_f32(v, row3);
-    const float r = vsumq_f32(v1);
-    const float g = vsumq_f32(v2);
-    const float b = vsumq_f32(v3);
-    const float32x4_t res = { r, g, b, 0.0f };
-    return res;
-}
 #endif
 
 #endif /* Gamma_hpp */
diff --git a/Sources/avifc/HDRColorTransfer.h b/Sources/avifc/HDRColorTransfer.h
@@ -29,7 +29,7 @@
 #import "Colorspace.h"
 
 enum ColorGammaCorrection {
-    Linear, Rec2020, DisplayP3
+    Linear, Rec2020, DisplayP3, Rec709
 };
 
 enum TransferFunction {

diff --git a/Sources/avifc/HDRColorTransfer.mm b/Sources/avifc/HDRColorTransfer.mm
@@ -133,6 +133,10 @@ void TransferROW_U16HFloats(uint16_t *data, ColorGammaCorrection gammaCorrection
         data[0] = half(clamp(LinearSRGBToSRGB(r), 0.0f, 1.0f)).data_;
         data[1] = half(clamp(LinearSRGBToSRGB(g), 0.0f, 1.0f)).data_;
         data[2] = half(clamp(LinearSRGBToSRGB(b), 0.0f, 1.0f)).data_;
+    } else if (gammaCorrection == Rec709) {
+        data[0] = half(clamp(LinearITUR709ToITUR709(r), 0.0f, 1.0f)).data_;
+        data[1] = half(clamp(LinearITUR709ToITUR709(g), 0.0f, 1.0f)).data_;
+        data[2] = half(clamp(LinearITUR709ToITUR709(b), 0.0f, 1.0f)).data_;
     } else {
         data[0] = half(clamp(r, 0.0f, 1.0f)).data_;
         data[1] = half(clamp(g, 0.0f, 1.0f)).data_;
@@ -142,11 +146,6 @@ void TransferROW_U16HFloats(uint16_t *data, ColorGammaCorrection gammaCorrection
 
 #if __arm64__
 
-__attribute__((always_inline))
-inline float32x4_t dcpi3GammaCorrection(float32x4_t linear) {
-    return vpowq_f32(linear, 1.0f/2.6f);
-}
-
 __attribute__((always_inline))
 inline void SetPixelsRGB(float16x4_t rgb, uint16_t *vector, int components) {
     uint16x4_t t = vreinterpret_u16_f16(rgb);
@@ -219,6 +218,11 @@ inline float32x4x4_t Transfer(float32x4_t rChan, float32x4_t gChan,
         r.val[1] = vclampq_n_f32(LinearSRGBToSRGB(r.val[1]), 0.0f, 1.0f);
         r.val[2] = vclampq_n_f32(LinearSRGBToSRGB(r.val[2]), 0.0f, 1.0f);
         r.val[3] = vclampq_n_f32(LinearSRGBToSRGB(r.val[3]), 0.0f, 1.0f);
+    } else if (gammaCorrection == Rec709) {
+        r.val[0] = vclampq_n_f32(LinearITUR709ToITUR709(r.val[0]), 0.0f, 1.0f);
+        r.val[1] = vclampq_n_f32(LinearITUR709ToITUR709(r.val[1]), 0.0f, 1.0f);
+        r.val[2] = vclampq_n_f32(LinearITUR709ToITUR709(r.val[2]), 0.0f, 1.0f);
+        r.val[3] = vclampq_n_f32(LinearITUR709ToITUR709(r.val[3]), 0.0f, 1.0f);
     } else {
         r.val[0] = vclampq_n_f32(r.val[0], 0.0f, 1.0f);
         r.val[1] = vclampq_n_f32(r.val[1], 0.0f, 1.0f);
@@ -280,6 +284,10 @@ void TransferROW_U8(uint8_t *data, float maxColors,
         r = LinearSRGBToSRGB(r);
         g = LinearSRGBToSRGB(g);
         b = LinearSRGBToSRGB(b);
+    } else if (gammaCorrection == Rec709) {
+        r = LinearITUR709ToITUR709(r);
+        g = LinearITUR709ToITUR709(g);
+        b = LinearITUR709ToITUR709(b);
     }
 
     data[0] = (uint8_t) clamp((float) round(r * maxColors), 0.0f, maxColors);

diff --git a/Sources/avifc/NEMath.h b/Sources/avifc/NEMath.h
@@ -200,9 +200,6 @@ static inline float32x4_t vlog10q_f32(float32x4_t x)
     poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LOG10);
 
     return poly;
-//    static const float32x4_t CONST_LN10 = vdupq_n_f32(2.30258509299); // ln(2)
-//    const float32x4_t v = vlogq_f32(x);
-//    return vdivq_f32(v, CONST_LN10);
 }
 
 __attribute__((always_inline))
@@ -230,8 +227,7 @@ static inline float16x8_t vclampq_n_f16(const float16x8_t t, const float16_t min
     const float vMax = max;
     const float32x4_t low = vclampq_n_f32(vcvt_f32_f16(vget_low_f16(t)), vMin, vMax);
     const float32x4_t high = vclampq_n_f32(vcvt_f32_f16(vget_high_f16(t)), vMin, vMax);
-    const float16x8_t r = vcombine_f16(vcvt_f16_f32(low), vcvt_f16_f32(high));
-    return r;
+    return vcombine_f16(vcvt_f16_f32(low), vcvt_f16_f32(high));
 #endif
 }
 
@@ -255,12 +251,6 @@ static inline float32x4x4_t MatTransponseQF32(const float32x4x4_t matrix)
     return r;
 }
 
-__attribute__((always_inline))
-static inline float32x4_t vreinhardq_f32(const float32x4_t t) {
-    float32x4_t v = vaddq_f32(t, vdupq_n_f32(1.0f));
-    return vdivq_f32(t, v);
-}
-
 __attribute__((always_inline))
 static inline uint32x4_t vhtonlq_u32(const uint32x4_t hostlong) {
     uint8x8_t low = vreinterpret_u8_u32(vget_low_u32(hostlong));