From 5069e5039b5015f9d99eb75f299345eff3b6b900 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Thu, 21 Nov 2024 18:48:49 +0100
Subject: [PATCH] updated blend function to optimized 8bit calculation

- efficient color blend calculation in fews operations possible
- omitting min / max checks makes it faster on average
- using 8bit for "blend" variable does not significantly influence the resulting color, just transition points are slightly shifted but yield very good results (and better than the original 16bit version using the old fastled math with improper rounding)
- updated drawCircle and drawLine to use 8bit directly instead of 16bit with a shift
---
 wled00/FX_2Dfcn.cpp  | 42 +++++++++++++++++++++---------------------
 wled00/colors.cpp    | 30 +++++++++++-------------------
 wled00/fcn_declare.h |  5 ++---
 3 files changed, 34 insertions(+), 43 deletions(-)

diff --git a/wled00/FX_2Dfcn.cpp b/wled00/FX_2Dfcn.cpp
index a8aa1e0f16..1d54ef4a40 100644
--- a/wled00/FX_2Dfcn.cpp
+++ b/wled00/FX_2Dfcn.cpp
@@ -513,25 +513,25 @@ void Segment::drawCircle(uint16_t cx, uint16_t cy, uint8_t radius, uint32_t col,
     unsigned oldFade = 0;
     while (x < y) {
       float yf = sqrtf(float(rsq - x*x)); // needs to be floating point
-      uint16_t fade = float(0xFFFF) * (ceilf(yf) - yf); // how much color to keep
+      uint16_t fade = float(0xFF) * (ceilf(yf) - yf); // how much color to keep
       if (oldFade > fade) y--;
       oldFade = fade;
-      setPixelColorXY(cx+x, cy+y, color_blend16(col, getPixelColorXY(cx+x, cy+y), fade));
-      setPixelColorXY(cx-x, cy+y, color_blend16(col, getPixelColorXY(cx-x, cy+y), fade));
-      setPixelColorXY(cx+x, cy-y, color_blend16(col, getPixelColorXY(cx+x, cy-y), fade));
-      setPixelColorXY(cx-x, cy-y, color_blend16(col, getPixelColorXY(cx-x, cy-y), fade));
-      setPixelColorXY(cx+y, cy+x, color_blend16(col, getPixelColorXY(cx+y, cy+x), fade));
-      setPixelColorXY(cx-y, cy+x, color_blend16(col, getPixelColorXY(cx-y, cy+x), fade));
-      setPixelColorXY(cx+y, cy-x, color_blend16(col, getPixelColorXY(cx+y, cy-x), fade));
-      setPixelColorXY(cx-y, cy-x, color_blend16(col, getPixelColorXY(cx-y, cy-x), fade));
-      setPixelColorXY(cx+x, cy+y-1, color_blend16(getPixelColorXY(cx+x, cy+y-1), col, fade));
-      setPixelColorXY(cx-x, cy+y-1, color_blend16(getPixelColorXY(cx-x, cy+y-1), col, fade));
-      setPixelColorXY(cx+x, cy-y+1, color_blend16(getPixelColorXY(cx+x, cy-y+1), col, fade));
-      setPixelColorXY(cx-x, cy-y+1, color_blend16(getPixelColorXY(cx-x, cy-y+1), col, fade));
-      setPixelColorXY(cx+y-1, cy+x, color_blend16(getPixelColorXY(cx+y-1, cy+x), col, fade));
-      setPixelColorXY(cx-y+1, cy+x, color_blend16(getPixelColorXY(cx-y+1, cy+x), col, fade));
-      setPixelColorXY(cx+y-1, cy-x, color_blend16(getPixelColorXY(cx+y-1, cy-x), col, fade));
-      setPixelColorXY(cx-y+1, cy-x, color_blend16(getPixelColorXY(cx-y+1, cy-x), col, fade));
+      setPixelColorXY(cx+x, cy+y, color_blend(col, getPixelColorXY(cx+x, cy+y), fade));
+      setPixelColorXY(cx-x, cy+y, color_blend(col, getPixelColorXY(cx-x, cy+y), fade));
+      setPixelColorXY(cx+x, cy-y, color_blend(col, getPixelColorXY(cx+x, cy-y), fade));
+      setPixelColorXY(cx-x, cy-y, color_blend(col, getPixelColorXY(cx-x, cy-y), fade));
+      setPixelColorXY(cx+y, cy+x, color_blend(col, getPixelColorXY(cx+y, cy+x), fade));
+      setPixelColorXY(cx-y, cy+x, color_blend(col, getPixelColorXY(cx-y, cy+x), fade));
+      setPixelColorXY(cx+y, cy-x, color_blend(col, getPixelColorXY(cx+y, cy-x), fade));
+      setPixelColorXY(cx-y, cy-x, color_blend(col, getPixelColorXY(cx-y, cy-x), fade));
+      setPixelColorXY(cx+x, cy+y-1, color_blend(getPixelColorXY(cx+x, cy+y-1), col, fade));
+      setPixelColorXY(cx-x, cy+y-1, color_blend(getPixelColorXY(cx-x, cy+y-1), col, fade));
+      setPixelColorXY(cx+x, cy-y+1, color_blend(getPixelColorXY(cx+x, cy-y+1), col, fade));
+      setPixelColorXY(cx-x, cy-y+1, color_blend(getPixelColorXY(cx-x, cy-y+1), col, fade));
+      setPixelColorXY(cx+y-1, cy+x, color_blend(getPixelColorXY(cx+y-1, cy+x), col, fade));
+      setPixelColorXY(cx-y+1, cy+x, color_blend(getPixelColorXY(cx-y+1, cy+x), col, fade));
+      setPixelColorXY(cx+y-1, cy-x, color_blend(getPixelColorXY(cx+y-1, cy-x), col, fade));
+      setPixelColorXY(cx-y+1, cy-x, color_blend(getPixelColorXY(cx-y+1, cy-x), col, fade));
       x++;
     }
   } else {
@@ -608,13 +608,13 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
     float gradient = x1-x0 == 0 ? 1.0f : float(y1-y0) / float(x1-x0);
     float intersectY = y0;
     for (int x = x0; x <= x1; x++) {
-      uint16_t keep = float(0xFFFF) * (intersectY-int(intersectY)); // how much color to keep
-      uint16_t seep = 0xFFFF - keep; // how much background to keep
+      uint8_t keep = float(0xFF) * (intersectY-int(intersectY)); // how much color to keep
+      uint8_t seep = 0xFF - keep; // how much background to keep
       int y = int(intersectY);
       if (steep) std::swap(x,y);  // temporaryly swap if steep
       // pixel coverage is determined by fractional part of y co-ordinate
-      setPixelColorXY(x, y, color_blend16(c, getPixelColorXY(x, y), keep));
-      setPixelColorXY(x+int(steep), y+int(!steep), color_blend16(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
+      setPixelColorXY(x, y, color_blend(c, getPixelColorXY(x, y), keep));
+      setPixelColorXY(x+int(steep), y+int(!steep), color_blend(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
       intersectY += gradient;
       if (steep) std::swap(x,y);  // restore if steep
     }
diff --git a/wled00/colors.cpp b/wled00/colors.cpp
index d7a3585791..8393d9b870 100644
--- a/wled00/colors.cpp
+++ b/wled00/colors.cpp
@@ -5,26 +5,18 @@
  */
 
 /*
- * base color blend function, used for 8bit and 16bit
+ * color blend function, based on FastLED blend function
+ * the calculation for each color is: result = (A*(amountOfA) + A + B*(amountOfB) + B) / 256 with amountOfA = 255 - amountOfB
  */
-uint32_t color_blend_base(uint32_t color1, uint32_t color2, uint16_t blend) {
-  if(blend == 0) return color1;
-  uint32_t w1 = W(color1);
-  uint32_t r1 = R(color1);
-  uint32_t g1 = G(color1);
-  uint32_t b1 = B(color1);
-
-  uint32_t w2 = W(color2);
-  uint32_t r2 = R(color2);
-  uint32_t g2 = G(color2);
-  uint32_t b2 = B(color2);
-
-  uint32_t w3 = ((w2 * blend) + (w1 * (0xFFFF - blend))) >> 16;
-  uint32_t r3 = ((r2 * blend) + (r1 * (0xFFFF - blend))) >> 16;
-  uint32_t g3 = ((g2 * blend) + (g1 * (0xFFFF - blend))) >> 16;
-  uint32_t b3 = ((b2 * blend) + (b1 * (0xFFFF - blend))) >> 16;
-
-  return RGBW32(r3, g3, b3, w3);
+uint32_t color_blend(uint32_t color1, uint32_t color2, uint8_t blend) {
+  // min / max blend checking is omitted: calls with 0 or 255 are rare, checking lowers overall performance
+  uint32_t rb1 = color1 & 0x00FF00FF;
+  uint32_t wg1 = (color1>>8) & 0x00FF00FF;
+  uint32_t rb2 = color2 & 0x00FF00FF;
+  uint32_t wg2 = (color2>>8) & 0x00FF00FF;
+  uint32_t rb3 = ((((rb1 << 8) | rb2) + (rb2 * blend) - (rb1 * blend)) >> 8) & 0x00FF00FF;
+  uint32_t wg3 = ((((wg1 << 8) | wg2) + (wg2 * blend) - (wg1 * blend))) & 0xFF00FF00;
+  return rb3 | wg3;
 }
 
 /*
diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h
index a1ee22a9d0..14b480c28d 100644
--- a/wled00/fcn_declare.h
+++ b/wled00/fcn_declare.h
@@ -78,9 +78,8 @@ class NeoGammaWLEDMethod {
 };
 #define gamma32(c) NeoGammaWLEDMethod::Correct32(c)
 #define gamma8(c)  NeoGammaWLEDMethod::rawGamma8(c)
-[[gnu::hot]] uint32_t color_blend_base(uint32_t c1, uint32_t c2 , uint16_t blend);
-inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { if (b == 0xFFFF) return c2; return color_blend_base(c1, c2, b); };
-inline uint32_t color_blend(uint32_t c1, uint32_t c2, uint8_t b) { if (b == 0xFF) return c2; return color_blend_base(c1, c2, (((uint16_t)b << 8))); };
+[[gnu::hot]] uint32_t color_blend(uint32_t c1, uint32_t c2 , uint8_t blend);
+inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { return color_blend(c1, c2, b >> 8); };
 [[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
 [[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
 CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);