Skip to content

Commit

Permalink
some HDR improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 13, 2023
1 parent b6bfcc4 commit 45548d6
Show file tree
Hide file tree
Showing 15 changed files with 79 additions and 111 deletions.
28 changes: 17 additions & 11 deletions Sources/avifc/Color/Colorspace.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ static const float DisplayP3Primaries[3][2] = { { 0.740, 0.270 }, { 0.220, 0.780

static const float Rec2020LumaPrimaries[3] = {0.2627f, 0.6780f, 0.0593f};
static const float Rec709LumaPrimaries[3] = {0.2126f, 0.7152f, 0.0722f};
static const float DisplayP3LumaPrimaries = 80;
static const float Rec709WhitePointNits = 100;
static const float DisplayP3LumaPrimaries[3] = {0.299f, 0.587f, 0.114f};
static const float DisplayP3WhitePointNits = 80;
static const float Rec709WhitePointNits = 100;
static const float Rec2020WhitePointNits = 203;

static const float IlluminantD65[2] = { 0.3127, 0.3290 };

Expand Down Expand Up @@ -157,22 +158,24 @@ class ColorSpaceMatrix {
const float32x4_t row2 = { matrix[3], matrix[4], matrix[5], 0.0f };
const float32x4_t row3 = { matrix[6], matrix[7], matrix[8], 0.0f };

return vaddq_f32(vaddq_f32(vmulq_f32(v, row1), vmulq_f32(v, row2)), vmulq_f32(v, row3));
float32x4_t r = { vaddvq_f32(vmulq_f32(v, row1)), vaddvq_f32(vmulq_f32(v, row2)), vaddvq_f32(vmulq_f32(v, row3)), 0.0f };
return r;
}

inline float32x4_t operator*(const float32x4_t v) {
const float32x4_t row1 = { matrix[0], matrix[1], matrix[2], 0.0f };
const float32x4_t row2 = { matrix[3], matrix[4], matrix[5], 0.0f };
const float32x4_t row3 = { matrix[6], matrix[7], matrix[8], 0.0f };

return vaddq_f32(vaddq_f32(vmulq_f32(v, row1), vmulq_f32(v, row2)), vmulq_f32(v, row3));
float32x4_t r = { vaddvq_f32(vmulq_f32(v, row1)), vaddvq_f32(vmulq_f32(v, row2)), vaddvq_f32(vmulq_f32(v, row3)), 0.0f };
return r;
}

inline float32x4x4_t operator*(const float32x4x4_t v) {
const float32x4_t r1 = vaddq_f32(vaddq_f32(vmulq_f32(v.val[0], row1), vmulq_f32(v.val[0], row2)), vmulq_f32(v.val[1], row3));
const float32x4_t r2 = vaddq_f32(vaddq_f32(vmulq_f32(v.val[1], row1), vmulq_f32(v.val[1], row2)), vmulq_f32(v.val[1], row3));
const float32x4_t r3 = vaddq_f32(vaddq_f32(vmulq_f32(v.val[2], row1), vmulq_f32(v.val[2], row2)), vmulq_f32(v.val[2], row3));
const float32x4_t r4 = vaddq_f32(vaddq_f32(vmulq_f32(v.val[3], row1), vmulq_f32(v.val[3], row2)), vmulq_f32(v.val[3], row3));
const float32x4_t r1 = { vaddvq_f32(vmulq_f32(v.val[0], row1)), vaddvq_f32(vmulq_f32(v.val[0], row2)), vaddvq_f32(vmulq_f32(v.val[0], row3)), 0.0f };
const float32x4_t r2 = { vaddvq_f32(vmulq_f32(v.val[1], row1)), vaddvq_f32(vmulq_f32(v.val[1], row2)), vaddvq_f32(vmulq_f32(v.val[1], row3)), 0.0f };
const float32x4_t r3= { vaddvq_f32(vmulq_f32(v.val[2], row1)), vaddvq_f32(vmulq_f32(v.val[2], row2)), vaddvq_f32(vmulq_f32(v.val[2], row3)), 0.0f };
const float32x4_t r4 = { vaddvq_f32(vmulq_f32(v.val[3], row1)), vaddvq_f32(vmulq_f32(v.val[3], row2)), vaddvq_f32(vmulq_f32(v.val[3], row3)), 0.0f };
float32x4x4_t r = { r1, r2, r3, r4 };
return r;
}
Expand Down Expand Up @@ -360,9 +363,12 @@ class ColorSpaceProfile {
float whitePointNits;
};

static ColorSpaceProfile* rec2020Profile = new ColorSpaceProfile(Rec2020Primaries, IlluminantD65, Rec2020LumaPrimaries, 203);
static ColorSpaceProfile* rec709Profile = new ColorSpaceProfile(Rec709Primaries, IlluminantD65, Rec709LumaPrimaries, 100);
static ColorSpaceProfile* displayP3Profile = new ColorSpaceProfile(DisplayP3Primaries, IlluminantD65, Rec709LumaPrimaries, 80);
static ColorSpaceProfile* rec2020Profile = new ColorSpaceProfile(Rec2020Primaries, IlluminantD65,
Rec2020LumaPrimaries, Rec2020WhitePointNits);
static ColorSpaceProfile* rec709Profile = new ColorSpaceProfile(Rec709Primaries, IlluminantD65,
Rec709LumaPrimaries, Rec709WhitePointNits);
static ColorSpaceProfile* displayP3Profile = new ColorSpaceProfile(DisplayP3Primaries, IlluminantD65,
DisplayP3LumaPrimaries, DisplayP3WhitePointNits);

template <typename T>
T lerp(const T& a, const T& b, float t) {
Expand Down
2 changes: 1 addition & 1 deletion Sources/avifc/Color/HLG.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static inline float32x4_t HLGToLinear(const float32x4_t v) {
const float32x4_t vDivVec = vrecpeq_f32(vdupq_n_f32(a));

float32x4_t high = vdivq_f32(vaddq_f32(vexpq_f32(vmulq_f32(vsubq_f32(v, vdupq_n_f32(c)), vDivVec)), vdupq_n_f32(b)), vdupq_n_f32(12.0f));
float32x4_t low = vmulq_f32(vmulq_f32(v, v), vdupq_n_f32(1.0f/3.0f));
float32x4_t low = vmulq_n_f32(vmulq_f32(v, v), 1.0f/3.0f);

low = vbslq_f32(mask, vdupq_n_f32(0), low);
high = vbslq_f32(maskHigh, vdupq_n_f32(0), high);
Expand Down
5 changes: 2 additions & 3 deletions Sources/avifc/Color/PQ.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const static float m1 = (2610.0f / 4096.0f) / 4.0f;
const static float m2 = (2523.0f / 4096.0f) * 128.0f;
const static float32x4_t c1 = vdupq_n_f32(3424.0f / 4096.0f);
const static float32x4_t c2 = vdupq_n_f32((2413.0f / 4096.0f) * 32.0f);
const static float32x4_t c3 = vdupq_n_f32((2392.0f / 4096.0f) * 32.0f);
const static float c3 = (2392.0f / 4096.0f) * 32.0f;
const static float m2Power = 1.0f / m2;
const static float m1Power = 1.0f / m1;

Expand All @@ -46,10 +46,9 @@ static inline float32x4_t ToLinearPQ(const float32x4_t v, const float sdrReferen
const float32x4_t rv = vmaxq_f32(v, zeros);
float32x4_t p = vpowq_f32(rv, m2Power);
const float lumaScale = 10000.0f / sdrReferencePoint;
return vcopysignq_f32(vmulq_n_f32(vpowq_f32(vdivq_f32(vmaxq_f32(vsubq_f32(p, c1), zeros), vmlsq_f32(c2, c3, p)), m1Power),
return vcopysignq_f32(vmulq_n_f32(vpowq_f32(vmulq_f32(vmaxq_f32(vsubq_f32(p, c1), zeros), vrecpeq_f32(vmlsq_n_f32(c2, p, c3))), m1Power),
lumaScale), rv);
}

#endif

static float ToLinearPQ(float v, const float sdrReferencePoint) {
Expand Down
4 changes: 3 additions & 1 deletion Sources/avifc/HDRColorTransfer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
#import "ToneMap/ReinhardToneMapper.hpp"
#import "ToneMap/ClampToneMapper.hpp"
#import "ToneMap/ReinhardJodieToneMapper.hpp"
#import "ToneMap/HableToneMapper.hpp"
#import "ToneMap/DragoToneMapper.hpp"
#import "half.hpp"
#import "Color/Gamma.hpp"
#import "Color/PQ.hpp"
Expand Down Expand Up @@ -308,7 +310,7 @@ +(void)transferNEONF16:(nonnull uint8_t*)data stride:(int)stride width:(int)widt

auto ptr16 = reinterpret_cast<uint16_t *>(ptr + y * stride);
int x;
for (x = 0; x + 8 < width / 2; x += 8) {
for (x = 0; x + 8 < width; x += 8) {
if (components == 4) {
float16x8x4_t rgbVector = vld4q_f16(reinterpret_cast<const float16_t *>(ptr16));

Expand Down
9 changes: 1 addition & 8 deletions Sources/avifc/NEMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,13 +271,6 @@ static inline float32x4_t vcopysignq_f32(const float32x4_t dst, const float32x4_
return vbslq_f32(mask, vnegq_f32(dst), dst);
}

__attribute__((always_inline))
static inline float vsumq_f32(const float32x4_t v) {
// float32x2_t r = vadd_f32(vget_high_f32(v), vget_low_f32(v));
// return vget_lane_f32(vpadd_f32(r, r), 0);
return vaddvq_f32(v);
}

__attribute__((always_inline))
static inline float32x2_t vsumq_f32x2(const float32x4_t v, const float32x4_t v1) {
// float32x2_t r = vadd_f32(vget_high_f32(v), vget_low_f32(v));
Expand Down Expand Up @@ -325,7 +318,7 @@ __attribute__((always_inline))
static inline float vsumq_f16(const float16x8_t v) {
const float32x4_t low = vcvt_f32_f16(vget_low_f16(v));
const float32x4_t high = vcvt_f32_f16(vget_high_f16(v));
return vsumq_f32(vaddq_f32(high, low));
return vaddvq_f32(vaddq_f32(high, low));
}

__attribute__((always_inline))
Expand Down
10 changes: 5 additions & 5 deletions Sources/avifc/ToneMap/ClampToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void ClampToneMapper::Execute(float& r, float& g, float &b) {

float32x4_t ClampToneMapper::Execute(const float32x4_t m) {
const float32x4_t v = vmulq_n_f32(m, exposure);
const float Lin = vsumq_f32(vmulq_f32(v, vLumaVec));
const float Lin = vaddvq_f32(vmulq_f32(v, vLumaVec));
if (Lin == 0) {
return v;
}
Expand Down Expand Up @@ -89,10 +89,10 @@ float32x4x4_t ClampToneMapper::Execute(const float32x4x4_t m) {
const float32x4_t Lout = vclampq_n_f32(vmulq_f32(Lin, vrecpeq_f32(vdupq_n_f32(Lmax_))), 0.0f, 1.0f);
const float32x4_t scale = vdivq_f32(Lout, Lin);
const float32x4x4_t r = {
vmulq_n_f32(exposured.val[0], vgetq_lane_f32(scale, 0)),
vmulq_n_f32(exposured.val[1], vgetq_lane_f32(scale, 1)),
vmulq_n_f32(exposured.val[2], vgetq_lane_f32(scale, 2)),
vmulq_n_f32(exposured.val[3], vgetq_lane_f32(scale, 3))
vmulq_laneq_f32(exposured.val[0], scale, 0),
vmulq_laneq_f32(exposured.val[1], scale, 1),
vmulq_laneq_f32(exposured.val[2], scale, 2),
vmulq_laneq_f32(exposured.val[3], scale, 3)
};
return r;
}
Expand Down
8 changes: 1 addition & 7 deletions Sources/avifc/ToneMap/DragoToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,9 @@ void DragoToneMapper::Execute(float& r, float& g, float &b) {

#if __arm64__

__attribute__((always_inline))
static inline float vsumq_f32Drago(const float32x4_t v) {
float32x2_t r = vadd_f32(vget_high_f32(v), vget_low_f32(v));
return vget_lane_f32(vpadd_f32(r, r), 0);
}

float32x4_t DragoToneMapper::Execute(const float32x4_t m) {
const float32x4_t v = vmulq_n_f32(m, exposure);
const float Lin = vsumq_f32Drago(vmulq_n_f32(vmulq_f32(v, vLumaVec), exposure));
const float Lin = vaddvq_f32(vmulq_n_f32(vmulq_f32(v, vLumaVec), exposure));
if (Lin == 0) {
return v;
}
Expand Down
1 change: 1 addition & 0 deletions Sources/avifc/ToneMap/HableToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
//

#include "HableToneMapper.hpp"
#include "NEMath.h"

#if defined(__clang__)
#pragma clang fp contract(fast) exceptions(ignore) reassociate(on)
Expand Down
22 changes: 11 additions & 11 deletions Sources/avifc/ToneMap/LogarithmicToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void LogarithmicToneMapper::Execute(float& r, float& g, float &b) {

float32x4_t LogarithmicToneMapper::Execute(const float32x4_t m) {
const float32x4_t v = vmulq_n_f32(m, exposure);
const float Lin = vsumq_f32(vmulq_f32(v, vLumaVec));
const float Lout = vgetq_lane_f32(vdivq_f32(vlog10q_f32(vdupq_n_f32(fabsf_c(1.0 + curve * Lin))), vDenVec), 0);
const float Lin = vaddvq_f32(vmulq_f32(v, vLumaVec));
const float Lout = vgetq_lane_f32(vmulq_f32(vlog10q_f32(vdupq_n_f32(fabsf_c(1.0 + curve * Lin))), vDenVec), 0);
const float scale = Lout / Lin;
if (scale == 1) {
return v;
Expand All @@ -76,21 +76,21 @@ float32x4x4_t LogarithmicToneMapper::Execute(const float32x4x4_t m) {
vmulq_n_f32(m.val[3], exposure),
};
float32x4_t Lin = {
vsumq_f32(vmulq_f32(exposured.val[0], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[1], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[2], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[3], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[0], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[1], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[2], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[3], vLumaVec)),
};
Lin = vsetq_if_f32(Lin, 0.0f, 1.0f);
const float32x4_t Lout = vsetq_if_f32(
vdivq_f32(vlog10q_f32(vabsq_f32(vmlaq_f32(vdupq_n_f32(1.0f), vdupq_n_f32(curve), Lin))), vDenVec),
vmulq_f32(vlog10q_f32(vabsq_f32(vmlaq_f32(vdupq_n_f32(1.0f), vdupq_n_f32(curve), Lin))), vDenVec),
0.0f, 1.0f);
const float32x4_t scale = vdivq_f32(Lout, Lin);
float32x4x4_t r = {
vmulq_n_f32(exposured.val[0], vgetq_lane_f32(scale, 0)),
vmulq_n_f32(exposured.val[1], vgetq_lane_f32(scale, 1)),
vmulq_n_f32(exposured.val[2], vgetq_lane_f32(scale, 2)),
vmulq_n_f32(exposured.val[3], vgetq_lane_f32(scale, 3))
vmulq_laneq_f32(exposured.val[0], scale, 0),
vmulq_laneq_f32(exposured.val[1], scale, 1),
vmulq_laneq_f32(exposured.val[2], scale, 2),
vmulq_laneq_f32(exposured.val[3], scale, 3)
};
return r;
}
Expand Down
4 changes: 2 additions & 2 deletions Sources/avifc/ToneMap/LogarithmicToneMapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class LogarithmicToneMapper: public ToneMapper {
den = log10(1.0 + curve * Lmax_);
#if __arm64__
vLumaVec = { lumaVec[0], lumaVec[1], lumaVec[2], 0.0f };
vDenVec = vdupq_n_f32(den);
vDenVec = vdupq_n_f32(1.0f /den);
#endif
}

Expand All @@ -56,7 +56,7 @@ class LogarithmicToneMapper: public ToneMapper {
den = log10(1.0 + curve * Lmax_);
#if __arm64__
vLumaVec = { lumaVec[0], lumaVec[1], lumaVec[2], 0.0f };
vDenVec = vdupq_n_f32(den);
vDenVec = vdupq_n_f32(1.0f / den);
#endif
}

Expand Down
50 changes: 15 additions & 35 deletions Sources/avifc/ToneMap/Rec2408ToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,49 +31,29 @@ using namespace std;

#if __arm64__

float Rec2408ToneMapper::SDR(float Lin) {
const float c1 = 107 / 128;
const float c2 = 2413 / 128;
const float c3 = 2392 / 128;
const float m1 = 1305 / 8192;
const float m2 = 2523 / 32;
const float v = pow(Lin / 10000, m1);
return pow((c1 + c2 * v) / (1 + c3 * v), m2);
}

float32x4_t Rec2408ToneMapper::SDR(float32x4_t Lin) {
const float c1 = 107 / 128;
const float c2 = 2413 / 128;
const float c3 = 2392 / 128;
const float m1 = 1305 / 8192;
const float m2 = 2523 / 32;
const float32x4_t v = vpowq_f32(vdivq_f32(Lin, vdupq_n_f32(10000)), m1);
return vpowq_f32(vdivq_f32(vmlaq_f32(vdupq_n_f32(c1), vdupq_n_f32(c2), v), vmlaq_f32(vdupq_n_f32(1), vdupq_n_f32(c3), v)), m2);
}

float32x4x4_t Rec2408ToneMapper::Execute(const float32x4x4_t m) {
const float32x4x4_t lumas = {
vmulq_f32(m.val[0], luma),
vmulq_f32(m.val[1], luma),
vmulq_f32(m.val[2], luma),
vmulq_f32(m.val[3], luma),
const float32x4_t lc = luma;
const float32x4_t Lin = {
vaddvq_f32(vmulq_f32(m.val[0], lc)),
vaddvq_f32(vmulq_f32(m.val[1], lc)),
vaddvq_f32(vmulq_f32(m.val[2], lc)),
vaddvq_f32(vmulq_f32(m.val[3], lc)),
};
const float32x4_t Lin = vsumq_f32x4(lumas.val[0], lumas.val[1], lumas.val[2], lumas.val[3]);
const float32x4_t Lout = vdivq_f32(vmlaq_f32(this->ones, this->aVec, Lin),
vmlaq_f32(this->ones, this->bVec, Lin));

const float32x4_t ones = vdupq_n_f32(1.f);
const float32x4_t Lout = vmulq_f32(vmlaq_n_f32(ones, Lin, this->a),
vrecpeq_f32(vmlaq_n_f32(ones, Lin, this->b)));
float32x4x4_t r = {
vmulq_n_f32(m.val[0], vgetq_lane_f32(Lout, 0)),
vmulq_n_f32(m.val[1], vgetq_lane_f32(Lout, 1)),
vmulq_n_f32(m.val[2], vgetq_lane_f32(Lout, 2)),
vmulq_n_f32(m.val[3], vgetq_lane_f32(Lout, 3))
vmulq_laneq_f32(m.val[0], Lout, 0),
vmulq_laneq_f32(m.val[1], Lout, 1),
vmulq_laneq_f32(m.val[2], Lout, 2),
vmulq_laneq_f32(m.val[3], Lout, 3)
};

return r;
}

float32x4_t Rec2408ToneMapper::Execute(const float32x4_t m) {
const float Lin = vsumq_f32(vmulq_f32(m, this->luma));
const float Lin = vaddvq_f32(vmulq_f32(m, this->luma));
if (Lin == 0) {
return m;
}
Expand Down
8 changes: 0 additions & 8 deletions Sources/avifc/ToneMap/Rec2408ToneMapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ class Rec2408ToneMapper: public ToneMapper {
this->b = 1.0f / (displayMaxBrightness/whitePoint);
memcpy(this->lumaCoefficients, lumaCoefficients, sizeof(float)*3);
#if __arm64__
this->aVec = vdupq_n_f32(a);
this->bVec = vdupq_n_f32(b);
this->ones = vdupq_n_f32(1.f);
this->luma = { lumaCoefficients[0], lumaCoefficients[1], lumaCoefficients[2], 0.0f };
#endif
}
Expand All @@ -63,13 +60,8 @@ class Rec2408ToneMapper: public ToneMapper {
float Ld;
float a;
float b;
float SDR(float Lin);
float lumaCoefficients[3];
#if __arm64__
float32x4_t SDR(float32x4_t Lin);
float32x4_t aVec;
float32x4_t bVec;
float32x4_t ones;
float32x4_t luma;
#endif
};
Expand Down
19 changes: 10 additions & 9 deletions Sources/avifc/ToneMap/ReinhardJodieToneMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void ReinhardJodieToneMapper::Execute(float& r, float& g, float& b) {

float32x4_t ReinhardJodieToneMapper::Execute(const float32x4_t m) {
const float32x4_t v = vmulq_n_f32(m, exposure);
const float luma = vsumq_f32(vmulq_f32(v, vLumaVec));
const float luma = vaddvq_f32(vmulq_f32(v, vLumaVec));

const float32x4_t tv = vdivq_f32(v, vaddq_f32(vdupq_n_f32(1.0f), v));
const float32x4_t in = vdivq_f32(v, vdupq_n_f32(1.0f + luma));
Expand All @@ -94,24 +94,25 @@ float32x4x4_t ReinhardJodieToneMapper::Execute(const float32x4x4_t m) {
vmulq_n_f32(m.val[3], exposure),
};
float32x4_t Lin = {
vsumq_f32(vmulq_f32(exposured.val[0], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[1], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[2], vLumaVec)),
vsumq_f32(vmulq_f32(exposured.val[3], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[0], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[1], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[2], vLumaVec)),
vaddvq_f32(vmulq_f32(exposured.val[3], vLumaVec)),
};
Lin = vaddq_f32(Lin, vdupq_n_f32(1.0f));
Lin = vrecpeq_f32(Lin);

const float32x4_t tv1 = vdivq_f32(exposured.val[0], vaddq_f32(vdupq_n_f32(1.0f), exposured.val[0]));
const float32x4_t in1 = vdivq_f32(exposured.val[0], vdupq_n_f32(vgetq_lane_f32(Lin, 0)));
const float32x4_t in1 = vmulq_laneq_f32(exposured.val[0], Lin, 0);

const float32x4_t tv2 = vdivq_f32(exposured.val[1], vaddq_f32(vdupq_n_f32(1.0f), exposured.val[1]));
const float32x4_t in2 = vdivq_f32(exposured.val[1], vdupq_n_f32(vgetq_lane_f32(Lin, 1)));
const float32x4_t in2 = vmulq_laneq_f32(exposured.val[1], Lin, 1);

const float32x4_t tv3 = vdivq_f32(exposured.val[2], vaddq_f32(vdupq_n_f32(1.0f), exposured.val[2]));
const float32x4_t in3 = vdivq_f32(exposured.val[2], vdupq_n_f32(vgetq_lane_f32(Lin, 2)));
const float32x4_t in3 = vmulq_laneq_f32(exposured.val[2], Lin, 2);

const float32x4_t tv4 = vdivq_f32(exposured.val[3], vaddq_f32(vdupq_n_f32(1.0f), exposured.val[3]));
const float32x4_t in4 = vdivq_f32(exposured.val[3], vdupq_n_f32(vgetq_lane_f32(Lin, 3)));
const float32x4_t in4 = vmulq_laneq_f32(exposured.val[3], Lin, 3);

const float32x4x4_t res = {
lerpNEON(in1, tv1, tv1),
Expand Down
2 changes: 1 addition & 1 deletion Sources/avifc/ToneMap/ReinhardJodieToneMapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

class ReinhardJodieToneMapper: public ToneMapper {
public:
ReinhardJodieToneMapper(const bool extended = true): lumaVec { 0.2126, 0.7152, 0.0722 }, lumaMaximum(1.0f), exposure(1.2f) {
ReinhardJodieToneMapper(const bool extended = true): lumaVec { 0.2126, 0.7152, 0.0722 }, lumaMaximum(1.0f), exposure(1.0f) {
useExtended = extended;
#if __arm64__
vLumaVec = { lumaVec[0], lumaVec[1], lumaVec[2], 0.0f };
Expand Down
Loading

0 comments on commit 45548d6

Please sign in to comment.