From be9da14ed8090487a724df153edec38b9c2f6b38 Mon Sep 17 00:00:00 2001 From: Peter Marcisovsky Date: Fri, 26 Jul 2024 10:50:17 +0200 Subject: [PATCH] feature(lvgl_port): Initial support for SIMD rendering in LVGL - Assembly source files for LVGL blend API integrated into lvgl_port - Initial assembly assembly implementation of: - ARGB8888 simple fill for esp32s3 and esp32 - RGB565 simple fill for esp32 - Functionality and benchmark test app --- .gitignore | 1 + components/esp_lvgl_port/CMakeLists.txt | 21 + .../include/esp_lvgl_port_lv_blend.h | 90 ++ .../simd/lv_color_blend_to_argb8888_esp32.S | 81 ++ .../simd/lv_color_blend_to_argb8888_esp32s3.S | 328 ++++++ .../simd/lv_color_blend_to_rgb565_esp32.S | 149 +++ .../simd/lv_color_blend_to_rgb565_esp32s3.S | 149 +++ .../test_apps/{ => lvgl_port}/CMakeLists.txt | 0 .../{ => lvgl_port}/main/CMakeLists.txt | 0 .../{ => lvgl_port}/main/idf_component.yml | 4 +- .../test_apps/{ => lvgl_port}/main/test.c | 0 .../lvgl_port/sdkconfig.ci.asm_render | 6 + .../{ => lvgl_port}/sdkconfig.defaults | 0 .../test_apps/simd/CMakeLists.txt | 7 + .../esp_lvgl_port/test_apps/simd/README.md | 109 ++ .../test_apps/simd/main/CMakeLists.txt | 21 + .../test_apps/simd/main/Kconfig.projbuild | 5 + .../simd/main/lv_blend/include/lv_assert.h | 60 ++ .../simd/main/lv_blend/include/lv_color.h | 272 +++++ .../simd/main/lv_blend/include/lv_color_op.h | 93 ++ .../main/lv_blend/include/lv_draw_sw_blend.h | 74 ++ .../include/lv_draw_sw_blend_to_argb8888.h | 51 + .../include/lv_draw_sw_blend_to_rgb565.h | 51 + .../simd/main/lv_blend/include/lv_log.h | 45 + .../simd/main/lv_blend/include/lv_math.h | 56 + .../simd/main/lv_blend/include/lv_style.h | 48 + .../simd/main/lv_blend/include/lv_types.h | 51 + .../simd/main/lv_blend/src/lv_color.c | 66 ++ .../src/lv_draw_sw_blend_to_argb8888.c | 911 +++++++++++++++++ .../lv_blend/src/lv_draw_sw_blend_to_rgb565.c | 960 ++++++++++++++++++ .../test_apps/simd/main/lv_fill_common.h | 73 ++ .../test_apps/simd/main/test_app_main.c | 50 + .../simd/main/test_lv_fill_benchmark.c | 176 ++++ .../simd/main/test_lv_fill_functionality.c | 311 ++++++ .../test_apps/simd/sdkconfig.defaults | 3 + 35 files changed, 4320 insertions(+), 2 deletions(-) create mode 100644 components/esp_lvgl_port/include/esp_lvgl_port_lv_blend.h create mode 100644 components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32.S create mode 100644 components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32s3.S create mode 100644 components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32.S create mode 100644 components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32s3.S rename components/esp_lvgl_port/test_apps/{ => lvgl_port}/CMakeLists.txt (100%) rename components/esp_lvgl_port/test_apps/{ => lvgl_port}/main/CMakeLists.txt (100%) rename components/esp_lvgl_port/test_apps/{ => lvgl_port}/main/idf_component.yml (59%) rename components/esp_lvgl_port/test_apps/{ => lvgl_port}/main/test.c (100%) create mode 100644 components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.ci.asm_render rename components/esp_lvgl_port/test_apps/{ => lvgl_port}/sdkconfig.defaults (100%) create mode 100644 components/esp_lvgl_port/test_apps/simd/CMakeLists.txt create mode 100644 components/esp_lvgl_port/test_apps/simd/README.md create mode 100644 components/esp_lvgl_port/test_apps/simd/main/CMakeLists.txt create mode 100644 components/esp_lvgl_port/test_apps/simd/main/Kconfig.projbuild create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_assert.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color_op.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_argb8888.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_rgb565.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_log.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_math.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_style.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_types.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_color.c create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_argb8888.c create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_rgb565.c create mode 100644 components/esp_lvgl_port/test_apps/simd/main/lv_fill_common.h create mode 100644 components/esp_lvgl_port/test_apps/simd/main/test_app_main.c create mode 100644 components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_benchmark.c create mode 100644 components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_functionality.c create mode 100644 components/esp_lvgl_port/test_apps/simd/sdkconfig.defaults diff --git a/.gitignore b/.gitignore index e8989d81..89afabd6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dependencies.lock doxygen_output/** dist __pycache__ +gdbinit diff --git a/components/esp_lvgl_port/CMakeLists.txt b/components/esp_lvgl_port/CMakeLists.txt index c9948677..8dc53693 100644 --- a/components/esp_lvgl_port/CMakeLists.txt +++ b/components/esp_lvgl_port/CMakeLists.txt @@ -76,6 +76,27 @@ if("usb_host_hid" IN_LIST build_components) list(APPEND ADD_LIBS idf::usb_host_hid) endif() +# Include SIMD assembly source code for rendering, only for (9.1.0 <= LVG_version < 9.2.0) and only for esp32 and esp32s3 +if((lvgl_ver VERSION_GREATER_EQUAL "9.1.0") AND (lvgl_ver VERSION_LESS "9.2.0")) + if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3) + message(VERBOSE "Compiling SIMD") + if(CONFIG_IDF_TARGET_ESP32S3) + file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32s3.S) # Select only esp32s3 related files + else() + file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files + endif() + list(APPEND ADD_SRCS ${ASM_SRCS}) + + # Include component libraries, so lvgl component would see lvgl_port includes + idf_component_get_property(lvgl_lib ${lvgl_name} COMPONENT_LIB) + target_include_directories(${lvgl_lib} PRIVATE "include") + + # Force link .S files + set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_argb8888_esp") + set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_rgb565_esp") + endif() +endif() + # Here we create the real lvgl_port_lib add_library(lvgl_port_lib STATIC ${PORT_PATH}/esp_lvgl_port.c diff --git a/components/esp_lvgl_port/include/esp_lvgl_port_lv_blend.h b/components/esp_lvgl_port/include/esp_lvgl_port_lv_blend.h new file mode 100644 index 00000000..c00de1c0 --- /dev/null +++ b/components/esp_lvgl_port/include/esp_lvgl_port_lv_blend.h @@ -0,0 +1,90 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ + +#if !CONFIG_LV_DRAW_SW_ASM_CUSTOM +#warning "esp_lvgl_port_lv_blend.h included, but CONFIG_LV_DRAW_SW_ASM_CUSTOM not set. Assembly rendering not used" +#else + +/********************* + * DEFINES + *********************/ + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888 +#define LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888(dsc) \ + _lv_color_blend_to_argb8888_esp(dsc) +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB565 +#define LV_DRAW_SW_COLOR_BLEND_TO_RGB565(dsc) \ + _lv_color_blend_to_rgb565_esp(dsc) +#endif + + +/********************** + * TYPEDEFS + **********************/ + +typedef struct { + uint32_t opa; + void *dst_buf; + uint32_t dst_w; + uint32_t dst_h; + uint32_t dst_stride; + const void *src_buf; + uint32_t src_stride; + const lv_opa_t *mask_buf; + uint32_t mask_stride; +} asm_dsc_t; + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +extern int lv_color_blend_to_argb8888_esp(asm_dsc_t *asm_dsc); + +static inline lv_result_t _lv_color_blend_to_argb8888_esp(_lv_draw_sw_blend_fill_dsc_t *dsc) +{ + asm_dsc_t asm_dsc = { + .dst_buf = dsc->dest_buf, + .dst_w = dsc->dest_w, + .dst_h = dsc->dest_h, + .dst_stride = dsc->dest_stride, + .src_buf = &dsc->color, + }; + + return lv_color_blend_to_argb8888_esp(&asm_dsc); +} + +extern int lv_color_blend_to_rgb565_esp(asm_dsc_t *asm_dsc); + +static inline lv_result_t _lv_color_blend_to_rgb565_esp(_lv_draw_sw_blend_fill_dsc_t *dsc) +{ + asm_dsc_t asm_dsc = { + .dst_buf = dsc->dest_buf, + .dst_w = dsc->dest_w, + .dst_h = dsc->dest_h, + .dst_stride = dsc->dest_stride, + .src_buf = &dsc->color, + }; + + return lv_color_blend_to_rgb565_esp(&asm_dsc); +} + +#endif // CONFIG_LV_DRAW_SW_ASM_CUSTOM + +#ifdef __cplusplus +} /*extern "C"*/ +#endif diff --git a/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32.S b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32.S new file mode 100644 index 00000000..7d060675 --- /dev/null +++ b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32.S @@ -0,0 +1,81 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +// This is LVGL ARGB8888 simple fill for ESP32 processor + + .section .text + .align 4 + .global lv_color_blend_to_argb8888_esp + .type lv_color_blend_to_argb8888_esp,@function + +// The function implements the following C code: +// void lv_color_blend_to_argb8888(_lv_draw_sw_blend_fill_dsc_t * dsc); + +// Input params +// +// dsc - a2 + +// typedef struct { +// uint32_t opa; l32i 0 +// void * dst_buf; l32i 4 +// uint32_t dst_w; l32i 8 +// uint32_t dst_h; l32i 12 +// uint32_t dst_stride; l32i 16 +// const void * src_buf; l32i 20 +// uint32_t src_stride; l32i 24 +// const lv_opa_t * mask_buf; l32i 28 +// uint32_t mask_stride; l32i 32 +// } asm_dsc_t; + +lv_color_blend_to_argb8888_esp: + + entry a1, 32 + + l32i.n a3, a2, 4 // a3 - dest_buff + l32i.n a4, a2, 8 // a4 - dest_w in uint32_t + l32i.n a5, a2, 12 // a5 - dest_h in uint32_t + l32i.n a6, a2, 16 // a6 - dest_stride in bytes + l32i.n a7, a2, 20 // a7 - src_buff (color) + l32i.n a8, a7, 0 // a8 - color as value + slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w + + movi a7, 0xff000000 // oppactiy mask + or a10, a7, a8 // apply oppacity + + srli a9, a4, 2 // a9 - loop_len = dest_w / 4 + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + + .outer_loop: + + // Run main loop which sets 16 bytes in one loop run + loopnez a9, ._main_loop + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 8 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 12 // save 32 bits from a10 to dest_buff a3 + addi.n a3, a3, 16 // increment dest_buff pointer by 16 bytes + ._main_loop: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 8 bytes + bbci a11, 3, _mod_8_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _mod_8_check: + + // Check modulo 4 of the dest_w_bytes, if - then set 4 bytes + bbci a11, 2, _mod_4_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _mod_4_check: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + bnez a5, .outer_loop + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return diff --git a/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32s3.S b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32s3.S new file mode 100644 index 00000000..4d9f84f1 --- /dev/null +++ b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_argb8888_esp32s3.S @@ -0,0 +1,328 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +// This is LVGL ARGB8888 simple fill for ESP32S3 processor + + .section .text + .align 4 + .global lv_color_blend_to_argb8888_esp + .type lv_color_blend_to_argb8888_esp,@function +// The function implements the following C code: +// void lv_color_blend_to_argb8888(_lv_draw_sw_blend_fill_dsc_t * dsc); + +// Input params +// +// dsc - a2 + +// typedef struct { +// uint32_t opa; l32i 0 +// void * dst_buf; l32i 4 +// uint32_t dst_w; l32i 8 +// uint32_t dst_h; l32i 12 +// uint32_t dst_stride; l32i 16 +// const void * src_buf; l32i 20 +// uint32_t src_stride; l32i 24 +// const lv_opa_t * mask_buf; l32i 28 +// uint32_t mask_stride; l32i 32 +// } asm_dsc_t; + + +lv_color_blend_to_argb8888_esp: + + entry a1, 32 + ee.zero.q q0 // dummy TIE instruction, to enable the TIE + + l32i.n a3, a2, 4 // a3 - dest_buff + l32i.n a4, a2, 8 // a4 - dest_w in uint32_t + l32i.n a5, a2, 12 // a5 - dest_h in uint32_t + l32i.n a6, a2, 16 // a6 - dest_stride in bytes + l32i.n a7, a2, 20 // a7 - src_buff (color) + l32i.n a8, a7, 0 // a8 - color as value + slli a11, a4, 2 // a11 - dest_w_bytes = sizeof(uint32_t) * dest_w + + movi a7, 0xff000000 // oppactiy mask + or a10, a7, a8 // apply oppacity + + // Check for short lengths + // dest_w should be at least 8, othewise it's not worth using esp32s3 TIE + bgei a4, 8, _esp32s3_implementation // Branch if dest_w is greater than or equal to 8 + j .lv_color_blend_to_argb8888_esp32_body // Jump to esp32 implementation + + _esp32s3_implementation: + + ee.movi.32.q q0, a10, 0 // fill q0 register from a10 by 32 bits + ee.movi.32.q q0, a10, 1 + ee.movi.32.q q0, a10, 2 + ee.movi.32.q q0, a10, 3 + + // Check dest_buff alignment + movi.n a7, 0xf // 0xf alignment mask (16-byte alignment) + and a15, a7, a3 // 16-byte alignment mask AND dest_buff pointer + bnez a15, _unaligned_by_4byte // branch if a15 not equals to zero + + // Check dest_stride alignment + and a15, a7, a6 // 16-byte alignment mask AND dest_stride + bnez a15, _unaligned_by_4byte // branch if a15 not equals to zero + + // Check dest_w_bytes alignment + and a15, a7, a11 // 16-byte alignment mask AND dest_w_bytes + bnez a15, _unaligned_by_4byte // branch if a15 not equals to zero + +//********************************************************************************************************************** + + // all aligned, the most ideal case + + // dest_buff (a3) - 16-byte aligned + // dest_stride (a6) - 16-byte multiple + // dest_w (a4) - 16-byte multiple + + srli a9, a4, 2 // a9 - loop_len = dest_w / 4 + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + + .outer_loop_aligned: + + loopnez a9, ._main_loop_aligned // 16 bytes (4 argb8888) in one loop + ee.vst.128.ip q0, a3, 16 // store 16 bytes from q0 to dest_buff a3 + ._main_loop_aligned: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + bnez a5, .outer_loop_aligned + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return + + _unaligned_by_4byte: + + // Check dest_buff alignment + movi.n a7, 0x3 // 0x3 alignment mask (4-byte alignment) + and a15, a7, a3 // 4-byte alignment mask AND dest_buff pointer + bnez a15, _unaligned_by_1byte // branch if a15 not equals to zero + + // Check dest_stride alignment + and a15, a7, a6 // 4-byte alignment mask AND dest_stride pointer + bnez a15, _unaligned_by_1byte // branch if a15 not equals to zero + +//********************************************************************************************************************** + + // either dest_buff or dest_stride is not 16-byte aligned + // dest_w is always 4-byte multiple + // all of the following are 4-byte aligned + + // dest_buff (a3) - 16-byte, or 4-byte aligned + // dest_stride (a6) - 16-byte, or 4-byte multiple + // dest_w (a4) - 4-byte multiple + + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + movi.n a7, 0xf // 0xf alignment mask + + .outer_loop_aligned_by_4byte: + + // alignment check + and a15, a7, a3 // 0xf (alignment mask) AND dest_buff pointer + mov a12, a11 // a12 - local_dest_w_bytes = dest_w_bytes + beqz a15, _dest_buff_aligned_by_4byte // branch if a15 equals to zero + + + movi.n a14, 16 // a14 - 16 + sub a15, a14, a15 // a15 = 16 - unalignment (lower 4 bits of dest_buff address) + sub a12, a12, a15 // local_dest_w_bytes = len - (16 - unalignment) + + // keep setting until dest_buff is aligned + // Check modulo 8 of the unalignment, if - then set 8 bytes + bbci a15, 3, _aligning_mod_8_check_4byte // branch if 3-rd bit of unalignment a15 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 4 bytes + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _aligning_mod_8_check_4byte: + + // Check modulo 4 of the unalignment, if - then set 4 bytes + bbci a15, 2, _aligning_mod_4_check_4byte // branch if 2-nd bit unalignment a15 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _aligning_mod_4_check_4byte: + + _dest_buff_aligned_by_4byte: + // Calculate main loop_len + srli a9, a12, 4 // a9 - loop_len = local_dest_w_bytes / 16 + + // Main loop + loopnez a9, ._main_loop_unaligned_by_4byte // 16 bytes (4 argb8888) in one loop + ee.vst.128.ip q0, a3, 16 // store 16 bytes from q0 to dest_buff a3 + ._main_loop_unaligned_by_4byte: + + // Check modulo 8 of the dest_w, if - then set 8 bytes + bbci a12, 3, _aligned_mod_8_check_4byte // branch if 3-rd bit of local_dest_w_bytes a12 is clear + ee.vst.l.64.ip q0, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes + _aligned_mod_8_check_4byte: + + // Check modulo 4 of the dest_w, if - then set 4 bytes + bbci a12, 2, _aligned_mod_4_check_4byte // branch if 2-nd bit of local_dest_w_bytes a12 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _aligned_mod_4_check_4byte: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + bnez a5, .outer_loop_aligned_by_4byte + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return + + _unaligned_by_1byte: + +//********************************************************************************************************************** + + // either dest_buff or dest_stride is not 4-byte aligned + // dest_w is always 4-byte multiple + + // dest_buff (a3) - 4-byte, or 1-byte aligned + // dest_stride (a6) - 4-byte, or 1-byte multiple + // dest_w (a4) - 4-byte multiple + + + mov a13, a3 + + ee.zero.q q1 // clear q1 + ee.orq q1, q1, q0 // copy q0 to q1 + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + movi.n a7, 0xf // 0xf alignment mask + + .outer_loop_aligned_by_1byte: + + // alignment check + and a15, a7, a3 // 0xf (alignment mask) AND dest_buff pointer + mov a12, a11 // a12 - local_dest_w_bytes = dest_w_bytes + beqz a15, _dest_buff_aligned_by_1byte // branch if a15 equals to zero + + + movi.n a14, 16 // a14 - 16 + sub a15, a14, a15 // a15 = 16 - unalignment (lower 4 bits of dest_buff address) + sub a12, a12, a15 // local_dest_w_bytes = len - (16 - unalignment) + + // keep setting until dest_buff is aligned + // Check modulo 8 of the unalignment, if - then set 8 bytes + bbci a15, 3, _aligning_mod_8_check_1byte// branch if 3-rd bit of unalignment a15 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 4 bytes + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _aligning_mod_8_check_1byte: + + // Check modulo 4 of the unalignment, if - then set 4 bytes + bbci a15, 2, _aligning_mod_4_check_1byte // branch if 2-nd bit unalignment a15 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _aligning_mod_4_check_1byte: + + // Check modulo 2 and 1 (the following 2 ifs do the same correction) + // modulo 2 and modulo 1 requires the same action, just once + bbci a15, 1, _aligning_mod_2_check_1byte + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + j _dest_buff_aligned_by_1byte + _aligning_mod_2_check_1byte: + + bbci a15, 0, _dest_buff_aligned_by_1byte + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _dest_buff_aligned_by_1byte: + + // Shift q reg, allowing to set 16-byte unaligned adata + wur.sar_byte a15 // apply unalignment to the SAR_BYTE + ee.src.q q2, q0, q1 // shift concat. of q0 and q1 to q2 by SAR_BYTE amount + + // Calculate main loop_len + srli a9, a12, 4 // a9 - loop_len = local_dest_w_bytes / 16 + + // Main loop + loopnez a9, ._main_loop_unaligned_by_1byte // 16 bytes (4 argb8888) in one loop + ee.vst.128.ip q2, a3, 16 // store 16 bytes from q0 to dest_buff a3 + ._main_loop_unaligned_by_1byte: + + // Firstly check mod 0 and mod 1 - correcting the aligned memory access + // Go back in one Byte, allow to correct after ee.vst.128.ip aligned access + addi a3, a3, -4 + + // Check modulo 2 of the dest_w, if - then set 2 bytes + // set SSSS in 0xSSSS0000 + bbci a12, 1, _aligned_mod_2_check_1byte // branch if 1-st bit of dest_w a12 is clear + srli a14, a10, 16 // shift a10 in 16, allowing s16i (saving of lower 16 bits) + s16i a14, a3, 2 // save 16 bits from a10 to dest_buff a3, offset 2 bytes + + // Check modulo 1 of the dest_w, if - then set 1 byte + // additionally set SS in 0x0000SS00 + bbci a12, 0, _aligned_end // branch if 0-th bit of dest_w a12 is clear + srli a14, a10, 8 // shift a10 in 8, allowing s8i + s8i a14, a3, 1 // save 8 bits from a10 to dest_buff a3, offset 1 byte + j _aligned_end + _aligned_mod_2_check_1byte: + + // Check modulo 1 of the dest_w, if - then set 1 byte + // set SS in 0xSS000000 + bbci a12, 0, _aligned_end // branch if 0-th bit of dest_w a12 is clear + srli a14, a10, 24 // shift a10 in 24, allowing s8i (saving of lower 8 bits) + s8i a14, a3, 3 // save 8 bits from a10 to dest_buff a3, offset 3 bytes + _aligned_end: + + addi a3, a3, 4 // Increase the pointer back, correction for addi a3, a3, -4 + + // Check modulo 8 of the dest_w, if - then set 8 bytes + bbci a12, 3, _aligned_mod_8_check_1byte // branch if 3-rd bit of local_dest_w_bytes a12 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 8 // increment dest_buff pointer by 4 bytes + //ee.vst.l.64.ip q2, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes + _aligned_mod_8_check_1byte: + + // Check modulo 4 of the dest_w, if - then set 4 bytes + bbci a12, 2, _aligned_mod_4_check_1byte // branch if 2-nd bit of local_dest_w_bytes a12 is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _aligned_mod_4_check_1byte: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + bnez a5, .outer_loop_aligned_by_1byte + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return + + .lv_color_blend_to_argb8888_esp32_body: + + srli a9, a4, 2 // a9 - loop_len = dest_w / 4 + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + + .outer_loop: + + // Run main loop which sets 16 bytes in one loop run + loopnez a9, ._main_loop + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 8 // save 32 bits from a10 to dest_buff a3 + s32i.n a10, a3, 12 // save 32 bits from a10 to dest_buff a3 + addi.n a3, a3, 16 // increment dest_buff pointer by 16 bytes + ._main_loop: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 8 bytes + bbci a11, 3, _mod_8_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + s32i.n a10, a3, 4 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _mod_8_check: + + // Check modulo 4 of the dest_w_bytes, if - then set 4 bytes + bbci a11, 2, _mod_4_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from a10 to dest_buff a3, offset 0 bytes + addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes + _mod_4_check: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + bnez a5, .outer_loop + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return diff --git a/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32.S b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32.S new file mode 100644 index 00000000..07b5aa11 --- /dev/null +++ b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32.S @@ -0,0 +1,149 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +// This is LVGL RGB565 simple fill for ESP32 processor + + .section .text + .align 4 + .global lv_color_blend_to_rgb565_esp + .type lv_color_blend_to_rgb565_esp,@function +// The function implements the following C code: +// void lv_color_blend_to_rgb565(_lv_draw_sw_blend_fill_dsc_t * dsc); + +// Input params +// +// dsc - a2 + +// typedef struct { +// uint32_t opa; l32i 0 +// void * dst_buf; l32i 4 +// uint32_t dst_w; l32i 8 +// uint32_t dst_h; l32i 12 +// uint32_t dst_stride; l32i 16 +// const void * src_buf; l32i 20 +// uint32_t src_stride; l32i 24 +// const lv_opa_t * mask_buf; l32i 28 +// uint32_t mask_stride; l32i 32 +// } asm_dsc_t; + +lv_color_blend_to_rgb565_esp: + + entry a1, 32 + + l32i.n a3, a2, 4 // a3 - dest_buff + l32i.n a4, a2, 8 // a4 - dest_w in uint16_t + l32i.n a5, a2, 12 // a5 - dest_h in uint16_t + l32i.n a6, a2, 16 // a6 - dest_stride in bytes + l32i.n a7, a2, 20 // a7 - src_buff (color) + l32i.n a8, a7, 0 // a8 - color as value + slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w + + // Convert color to rgb656 + l8ui a15, a7, 2 // red + movi.n a14, 0xf8 + and a13, a15, a14 + slli a10, a13, 8 + + l8ui a15, a7, 0 // blue + and a13, a15, a14 + srli a12, a13, 3 + add a10, a10, a12 + + l8ui a15, a7, 1 // green + movi.n a14, 0xfc + and a13, a15, a14 + slli a12, a13, 3 + add a12, a10, a12 // a12 = 16-bit color + + slli a10, a12, 16 + movi.n a13, 0xFFFF0000 + and a10, a10, a13 + or a10, a10, a12 // a10 = 32-bit color (16bit + (16bit << 16)) + + movi.n a8, 0x3 // a8 = 0x3, dest_buff align mask + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + + // cache init + // Prepare main loop length and dest_w_bytes + srli a9, a4, 4 // a9 = loop_len = dest_w / 8, calculate main loop_len for original dest_w + slli a11, a4, 1 // a11 = dest_w_bytes = sizeof(uint16_t) * dest_w + addi a4, a4, -1 // a4-- (decrement a4) + s32i.n a9, a1, 0 // cache.orig.loop_len + s32i.n a11, a1, 4 // cache.orig.dest_w_bytes + + // Prepare decreased main loop length and dest_w_bytes + srli a9, a4, 4 // a9 = loop_len = dest_w / 8, calculate main loop_len for dest_w - 1 + slli a11, a4, 1 // a11 = dest_w_bytes = sizeof(uint16_t) * (dest_w - 1) + s32i.n a9, a1, 8 // cache.decr.loop_len + s32i.n a11, a1, 12 // cache.decr.dest_w_bytes + and a7, a8, a3 // a7 = dest_buff AND 0x3 (chck if the address is 4-byte aligned) + + .outer_loop: + + // Check if the des_buff is 2-byte aligned + beqz a7, _dest_buff_2_byte_aligned // branch if a7 is equal to zero + s16i a12, a3, 0 // save 16 bits from 16-bit color a12 to dest_buff a3, offset 0 + l32i.n a9, a1, 8 // a9 = load cache.decr.loop_len + l32i.n a11, a1, 12 // a11 = load cache.decr.dest_w_bytes + addi.n a3, a3, 2 // increment dest_buff pointer by 2 + j _dest_buff_unaligned + _dest_buff_2_byte_aligned: + + l32i.n a9, a1, 0 // a11 = load cache.orig.loop_len + l32i.n a11, a1, 4 // a11 = load cache.orig.dest_w_bytes + + _dest_buff_unaligned: + + // Run main loop which sets 16 bytes in one loop run + loopnez a9, ._main_loop + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + s32i.n a10, a3, 8 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 8 + s32i.n a10, a3, 12 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 12 + s32i.n a10, a3, 16 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 16 + s32i.n a10, a3, 20 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 20 + s32i.n a10, a3, 24 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 24 + s32i.n a10, a3, 28 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 28 + addi.n a3, a3, 32 // increment dest_buff pointer by 32 + ._main_loop: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 16 bytes + bbci a11, 4, _mod_16_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + s32i.n a10, a3, 8 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 8 + s32i.n a10, a3, 12 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 12 + addi.n a3, a3, 16 // increment dest_buff pointer by 16 + _mod_16_check: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 8 bytes + bbci a11, 3, _mod_8_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _mod_8_check: + + // Check modulo 4 of the dest_w_bytes, if - then set 4 bytes + bbci a11, 2, _mod_4_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + addi.n a3, a3, 4 // increment dest_buff pointer by 4 + _mod_4_check: + + // Check modulo 2 of the dest_w_bytes, if - then set 2 bytes + bbci a11, 1, _mod_2_check // branch if 1-st bit of dest_w_bytes is clear + s16i a12, a3, 0 // save 16 bits from 16-bit color a12 to dest_buff a3, offset 0 + addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes + _mod_2_check: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + and a7, a8, a3 // a7 = dest_buff AND 0x3 (chck if the address is 4-byte aligned) + bnez a5, .outer_loop + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return diff --git a/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32s3.S b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32s3.S new file mode 100644 index 00000000..07b5aa11 --- /dev/null +++ b/components/esp_lvgl_port/src/lvgl9/simd/lv_color_blend_to_rgb565_esp32s3.S @@ -0,0 +1,149 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +// This is LVGL RGB565 simple fill for ESP32 processor + + .section .text + .align 4 + .global lv_color_blend_to_rgb565_esp + .type lv_color_blend_to_rgb565_esp,@function +// The function implements the following C code: +// void lv_color_blend_to_rgb565(_lv_draw_sw_blend_fill_dsc_t * dsc); + +// Input params +// +// dsc - a2 + +// typedef struct { +// uint32_t opa; l32i 0 +// void * dst_buf; l32i 4 +// uint32_t dst_w; l32i 8 +// uint32_t dst_h; l32i 12 +// uint32_t dst_stride; l32i 16 +// const void * src_buf; l32i 20 +// uint32_t src_stride; l32i 24 +// const lv_opa_t * mask_buf; l32i 28 +// uint32_t mask_stride; l32i 32 +// } asm_dsc_t; + +lv_color_blend_to_rgb565_esp: + + entry a1, 32 + + l32i.n a3, a2, 4 // a3 - dest_buff + l32i.n a4, a2, 8 // a4 - dest_w in uint16_t + l32i.n a5, a2, 12 // a5 - dest_h in uint16_t + l32i.n a6, a2, 16 // a6 - dest_stride in bytes + l32i.n a7, a2, 20 // a7 - src_buff (color) + l32i.n a8, a7, 0 // a8 - color as value + slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w + + // Convert color to rgb656 + l8ui a15, a7, 2 // red + movi.n a14, 0xf8 + and a13, a15, a14 + slli a10, a13, 8 + + l8ui a15, a7, 0 // blue + and a13, a15, a14 + srli a12, a13, 3 + add a10, a10, a12 + + l8ui a15, a7, 1 // green + movi.n a14, 0xfc + and a13, a15, a14 + slli a12, a13, 3 + add a12, a10, a12 // a12 = 16-bit color + + slli a10, a12, 16 + movi.n a13, 0xFFFF0000 + and a10, a10, a13 + or a10, a10, a12 // a10 = 32-bit color (16bit + (16bit << 16)) + + movi.n a8, 0x3 // a8 = 0x3, dest_buff align mask + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes + + // cache init + // Prepare main loop length and dest_w_bytes + srli a9, a4, 4 // a9 = loop_len = dest_w / 8, calculate main loop_len for original dest_w + slli a11, a4, 1 // a11 = dest_w_bytes = sizeof(uint16_t) * dest_w + addi a4, a4, -1 // a4-- (decrement a4) + s32i.n a9, a1, 0 // cache.orig.loop_len + s32i.n a11, a1, 4 // cache.orig.dest_w_bytes + + // Prepare decreased main loop length and dest_w_bytes + srli a9, a4, 4 // a9 = loop_len = dest_w / 8, calculate main loop_len for dest_w - 1 + slli a11, a4, 1 // a11 = dest_w_bytes = sizeof(uint16_t) * (dest_w - 1) + s32i.n a9, a1, 8 // cache.decr.loop_len + s32i.n a11, a1, 12 // cache.decr.dest_w_bytes + and a7, a8, a3 // a7 = dest_buff AND 0x3 (chck if the address is 4-byte aligned) + + .outer_loop: + + // Check if the des_buff is 2-byte aligned + beqz a7, _dest_buff_2_byte_aligned // branch if a7 is equal to zero + s16i a12, a3, 0 // save 16 bits from 16-bit color a12 to dest_buff a3, offset 0 + l32i.n a9, a1, 8 // a9 = load cache.decr.loop_len + l32i.n a11, a1, 12 // a11 = load cache.decr.dest_w_bytes + addi.n a3, a3, 2 // increment dest_buff pointer by 2 + j _dest_buff_unaligned + _dest_buff_2_byte_aligned: + + l32i.n a9, a1, 0 // a11 = load cache.orig.loop_len + l32i.n a11, a1, 4 // a11 = load cache.orig.dest_w_bytes + + _dest_buff_unaligned: + + // Run main loop which sets 16 bytes in one loop run + loopnez a9, ._main_loop + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + s32i.n a10, a3, 8 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 8 + s32i.n a10, a3, 12 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 12 + s32i.n a10, a3, 16 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 16 + s32i.n a10, a3, 20 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 20 + s32i.n a10, a3, 24 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 24 + s32i.n a10, a3, 28 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 28 + addi.n a3, a3, 32 // increment dest_buff pointer by 32 + ._main_loop: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 16 bytes + bbci a11, 4, _mod_16_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + s32i.n a10, a3, 8 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 8 + s32i.n a10, a3, 12 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 12 + addi.n a3, a3, 16 // increment dest_buff pointer by 16 + _mod_16_check: + + // Finish the remaining bytes out of the loop + // Check modulo 8 of the dest_w_bytes, if - then set 8 bytes + bbci a11, 3, _mod_8_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + s32i.n a10, a3, 4 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 4 + addi.n a3, a3, 8 // increment dest_buff pointer by 8 bytes + _mod_8_check: + + // Check modulo 4 of the dest_w_bytes, if - then set 4 bytes + bbci a11, 2, _mod_4_check // branch if 2-nd bit of dest_w_bytes is clear + s32i.n a10, a3, 0 // save 32 bits from 32-bit color a10 to dest_buff a3, offset 0 + addi.n a3, a3, 4 // increment dest_buff pointer by 4 + _mod_4_check: + + // Check modulo 2 of the dest_w_bytes, if - then set 2 bytes + bbci a11, 1, _mod_2_check // branch if 1-st bit of dest_w_bytes is clear + s16i a12, a3, 0 // save 16 bits from 16-bit color a12 to dest_buff a3, offset 0 + addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes + _mod_2_check: + + add a3, a3, a6 // dest_buff + dest_stride + addi.n a5, a5, -1 // decrease the outer loop + and a7, a8, a3 // a7 = dest_buff AND 0x3 (chck if the address is 4-byte aligned) + bnez a5, .outer_loop + + movi.n a2, 1 // return LV_RESULT_OK = 1 + retw.n // return diff --git a/components/esp_lvgl_port/test_apps/CMakeLists.txt b/components/esp_lvgl_port/test_apps/lvgl_port/CMakeLists.txt similarity index 100% rename from components/esp_lvgl_port/test_apps/CMakeLists.txt rename to components/esp_lvgl_port/test_apps/lvgl_port/CMakeLists.txt diff --git a/components/esp_lvgl_port/test_apps/main/CMakeLists.txt b/components/esp_lvgl_port/test_apps/lvgl_port/main/CMakeLists.txt similarity index 100% rename from components/esp_lvgl_port/test_apps/main/CMakeLists.txt rename to components/esp_lvgl_port/test_apps/lvgl_port/main/CMakeLists.txt diff --git a/components/esp_lvgl_port/test_apps/main/idf_component.yml b/components/esp_lvgl_port/test_apps/lvgl_port/main/idf_component.yml similarity index 59% rename from components/esp_lvgl_port/test_apps/main/idf_component.yml rename to components/esp_lvgl_port/test_apps/lvgl_port/main/idf_component.yml index 1f1a2d1a..39731a01 100644 --- a/components/esp_lvgl_port/test_apps/main/idf_component.yml +++ b/components/esp_lvgl_port/test_apps/lvgl_port/main/idf_component.yml @@ -3,7 +3,7 @@ dependencies: idf: ">=4.4" esp_lcd_touch_tt21100: version: "^1" - override_path: "../../../lcd_touch/esp_lcd_touch_tt21100/" + override_path: "../../../../lcd_touch/esp_lcd_touch_tt21100/" esp_lvgl_port: version: "*" - override_path: "../../" + override_path: "../../../" diff --git a/components/esp_lvgl_port/test_apps/main/test.c b/components/esp_lvgl_port/test_apps/lvgl_port/main/test.c similarity index 100% rename from components/esp_lvgl_port/test_apps/main/test.c rename to components/esp_lvgl_port/test_apps/lvgl_port/main/test.c diff --git a/components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.ci.asm_render b/components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.ci.asm_render new file mode 100644 index 00000000..30815ee1 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.ci.asm_render @@ -0,0 +1,6 @@ +# sdkconfig to enable the SIMD in the lvgl_port + +# Set custom ASM render and provide a header file with function prototypes +CONFIG_LV_DRAW_SW_ASM_CUSTOM=y +CONFIG_LV_USE_DRAW_SW_ASM=255 +CONFIG_LV_DRAW_SW_ASM_CUSTOM_INCLUDE="esp_lvgl_port_lv_blend.h" diff --git a/components/esp_lvgl_port/test_apps/sdkconfig.defaults b/components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.defaults similarity index 100% rename from components/esp_lvgl_port/test_apps/sdkconfig.defaults rename to components/esp_lvgl_port/test_apps/lvgl_port/sdkconfig.defaults diff --git a/components/esp_lvgl_port/test_apps/simd/CMakeLists.txt b/components/esp_lvgl_port/test_apps/simd/CMakeLists.txt new file mode 100644 index 00000000..735c48dc --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/CMakeLists.txt @@ -0,0 +1,7 @@ +# The following lines of boilerplate have to be in your project's +# CMakeLists in this exact order for cmake to work correctly +cmake_minimum_required(VERSION 3.16) + +include($ENV{IDF_PATH}/tools/cmake/project.cmake) + +project(test_lvgl_simd) \ No newline at end of file diff --git a/components/esp_lvgl_port/test_apps/simd/README.md b/components/esp_lvgl_port/test_apps/simd/README.md new file mode 100644 index 00000000..7c579c4f --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/README.md @@ -0,0 +1,109 @@ +# HW Acceleration using SIMD assembly instructions + +Test app accommodates two types of tests: [`functionality test`](#Functionality-test) and [`benchmark test`](#Benchmark-test). Both tests are provided per each function written in assembly (typically per each assembly file). Both test apps use a hard copy of LVGL blending API, representing an ANSI implementation of the LVGL blending functions. The hard copy is present in [`lv_blend`](main/lv_blend/) folder. + +Assembly source files could be found in the [`lvgl_port`](../../src/lvgl9/simd/) component. Header file with the assembly function prototypes is provided into the LVGL using Kconfig option `LV_DRAW_SW_ASM_CUSTOM_INCLUDE` and can be found in the [`lvgl_port/include`](../../include/esp_lvgl_port_lv_blend.h) + +## Functionality test +* Tests, whether the HW accelerated assembly version of an LVGL function provides the same results as the ANSI version +* A top-level flow of the functionality test: + * generate a test matrix with test parameters (matrix width, matrix height, memory alignment.. ) + * run an ANSI version of a DUT function with the generated input parameters + * run an assembly version of a DUT function with the same input parameters + * compare the results given by the ANSI and the assembly DUTs + * the results shall be the same + * repeat all the steps for a set of different input parameters, checking different matrix heights, widths.. + +## Benchmark test +* Tests, whether the HW accelerated assembly version of an LVGL function provides a performance increase over the ANSI version +* A top-level flow of the functionality test: + * generate a test matrix with test parameters (matrix width, matrix height, memory alignment.. ) + * run an ANSI version of a DUT function with the generated input parameters multiple times (1000 times for example), while counting CPU cycles + * run an assembly version of a DUT function with the generated input parameters multiple times (1000 times for example), while counting CPU cycles + * compare the results given by the ANSI and the assembly DUTs + * the assembly version of the DUT function shall be faster than the ANSI version of the DUT function + +## Run the test app + +The test app is intended to be used only with esp32 and esp32s3 + + idf.py build + +## Example output + +``` +I (302) main_task: Started on CPU0 +I (322) main_task: Calling app_main() +______ _____ ______ _ _ +| _ \/ ___|| ___ \ | | | | +| | | |\ `--. | |_/ / | |_ ___ ___ | |_ +| | | | `--. \| __/ | __| / _ \/ __|| __| +| |/ / /\__/ /| | | |_ | __/\__ \| |_ +|___/ \____/ \_| \__| \___||___/ \__| + + +Press ENTER to see the list of tests. + + + +Here's the test menu, pick your combo: +(1) "Test fill functionality ARGB8888" [fill][functionality][ARGB8888] +(2) "Test fill functionality RGB565" [fill][functionality][RGB565] +(3) "LV Fill benchmark ARGB8888" [fill][benchmark][ARGB8888] +(4) "LV Fill benchmark RGB565" [fill][benchmark][RGB565] + +Enter test for running. +``` + +### Example of a functionality test run + +``` +Running Test fill functionality ARGB8888... +I (81512) LV Fill Functionality: running test for ARGB8888 color format +I (84732) LV Fill Functionality: test combinations: 31824 + +MALLOC_CAP_8BIT usage: Free memory delta: 0 Leak threshold: -800 +MALLOC_CAP_32BIT usage: Free memory delta: 0 Leak threshold: -800 +./main/test_lv_fill_functionality.c:102:Test fill functionality ARGB8888:PASS +Test ran in 3242ms +``` +The test gives a simple FAIL/PASS result after comparison of the two DUTs results. +Also gives us an information about how many combinations (input parameters) the functionality test run with, `31824` in this case. + +### Example of a benchmark test run + +``` +Running LV Fill benchmark ARGB8888... +I (163492) LV Fill Benchmark: running test for ARGB8888 color format +I (163522) LV Fill Benchmark: ASM ideal case: 5363.123 cycles for 128x128 matrix, 0.327 cycles per sample +I (163572) LV Fill Benchmark: ASM corner case: 7868.724 cycles for 127x127 matrix, 0.488 cycles per sample + +I (163732) LV Fill Benchmark: ANSI ideal case: 26219.137 cycles for 128x128 matrix, 1.600 cycles per sample +I (163902) LV Fill Benchmark: ANSI corner case: 25762.178 cycles for 127x127 matrix, 1.597 cycles per sample + +MALLOC_CAP_8BIT usage: Free memory delta: -220 Leak threshold: -800 +MALLOC_CAP_8BIT potential leak: Before 393820 bytes free, After 393600 bytes free (delta 220) +MALLOC_CAP_32BIT usage: Free memory delta: -220 Leak threshold: -800 +MALLOC_CAP_32BIT potential leak: Before 393820 bytes free, After 393600 bytes free (delta 220) +./main/test_lv_fill_benchmark.c:69:LV Fill benchmark ARGB8888:PASS +Test ran in 458ms +``` + +The test provides couple of information: +* Total number of CPU cycles for the whole DUT function + * `5363.123` cycles for the assembly DUT function + * `26219.137` cycles for the ANSI DUT function +* Number of CPU cycles per sample, which is basically the total number of CPU cycles divided by the test matrix area + * `0.327` cycles per sample for the assembly DUT + * `1.6` cycles per sample for the ANSI DUT + * In this case, the assembly implementation has achieved a performance increase in around 4.9-times, comparing to the ANSI implementation. +* Range of the CPU cycles (a best case and a corner case scenarios) into which, the DUT functions are expected to fit into + * The execution time of those function highly depends on the input parameters, thus a boundary scenarios for input parameters shall be set + * An example of such a boundaries is in a table below + * The benchmark boundary would help us to get an performance expectations of the real scenarios + +Example of an best and corner case input parameters for benchmark test, for a color format `ARGB8888` +| Test matrix params | Memory alignment | Width | Height | Stride | +| :----------------- | :--------------- | :------------- | :------------- | :------------- | +| Best case | 16-byte aligned | Multiple of 8 | Multiple of 8 | Multiple of 8 | +| Corner case | 1-byte aligned | Not power of 2 | Not power of 2 | Not power of 2 | diff --git a/components/esp_lvgl_port/test_apps/simd/main/CMakeLists.txt b/components/esp_lvgl_port/test_apps/simd/main/CMakeLists.txt new file mode 100644 index 00000000..0a6d5da4 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/CMakeLists.txt @@ -0,0 +1,21 @@ +# Include SIMD assembly source code for rendering +if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3) + message(VERBOSE "Compiling SIMD") + set(PORT_PATH "../../../src/lvgl9") + + if(CONFIG_IDF_TARGET_ESP32S3) + file(GLOB_RECURSE ASM_SOURCES ${PORT_PATH}/simd/*_esp32s3.S) # Select only esp32s3 related files + else() + file(GLOB_RECURSE ASM_SOURCES ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files + endif() +else() + message(WARNING "This test app is intended only for esp32 and esp32s3") +endif() + +# Hard copy of LV files +file(GLOB_RECURSE BLEND_SRCS lv_blend/src/*.c) + +idf_component_register(SRCS "test_app_main.c" "test_lv_fill_functionality.c" "test_lv_fill_benchmark.c" ${BLEND_SRCS} ${ASM_SOURCES} + INCLUDE_DIRS "lv_blend/include" "../../../include" + REQUIRES unity + WHOLE_ARCHIVE) diff --git a/components/esp_lvgl_port/test_apps/simd/main/Kconfig.projbuild b/components/esp_lvgl_port/test_apps/simd/main/Kconfig.projbuild new file mode 100644 index 00000000..8d2c596c --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/Kconfig.projbuild @@ -0,0 +1,5 @@ +# Creating CONFIG_LV_DRAW_SW_ASM_CUSTOM avaliable in lvgl Kconfig to enable assembler source files by deafult + +config LV_DRAW_SW_ASM_CUSTOM + bool + default y \ No newline at end of file diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_assert.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_assert.h new file mode 100644 index 00000000..6fe5589d --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_assert.h @@ -0,0 +1,60 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_assert.h + * + */ + +#ifndef LV_ASSERT_H +#define LV_ASSERT_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_log.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +/********************** + * MACROS + **********************/ +#define LV_ASSERT_HANDLER while(1); /*Halt by default*/ + +#define LV_ASSERT(expr) \ + do { \ + if(!(expr)) { \ + LV_LOG_ERROR("Asserted at expression: %s", #expr); \ + LV_ASSERT_HANDLER \ + } \ + } while(0) + +/*----------------- + * ASSERTS + *-----------------*/ + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_ASSERT_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color.h new file mode 100644 index 00000000..ecb6017e --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color.h @@ -0,0 +1,272 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_color.h + * + */ + +#ifndef LV_COLOR_H +#define LV_COLOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "stdint.h" +#include "stdbool.h" +#include "sdkconfig.h" + +/********************* + * DEFINES + *********************/ +#define LV_ATTRIBUTE_FAST_MEM + +#ifndef LV_COLOR_MIX_ROUND_OFS +#ifdef CONFIG_LV_COLOR_MIX_ROUND_OFS +#define LV_COLOR_MIX_ROUND_OFS CONFIG_LV_COLOR_MIX_ROUND_OFS +#else +#define LV_COLOR_MIX_ROUND_OFS 0 +#endif +#endif + +/** + * Opacity percentages. + */ + +typedef enum { + LV_OPA_TRANSP = 0, + LV_OPA_0 = 0, + LV_OPA_10 = 25, + LV_OPA_20 = 51, + LV_OPA_30 = 76, + LV_OPA_40 = 102, + LV_OPA_50 = 127, + LV_OPA_60 = 153, + LV_OPA_70 = 178, + LV_OPA_80 = 204, + LV_OPA_90 = 229, + LV_OPA_100 = 255, + LV_OPA_COVER = 255, +} lv_opa_t; + +#define LV_OPA_MIN 2 /*Opacities below this will be transparent*/ +#define LV_OPA_MAX 253 /*Opacities above this will fully cover*/ + +#define LV_COLOR_FORMAT_GET_BPP(cf) ( \ + (cf) == LV_COLOR_FORMAT_I1 ? 1 : \ + (cf) == LV_COLOR_FORMAT_A1 ? 1 : \ + (cf) == LV_COLOR_FORMAT_I2 ? 2 : \ + (cf) == LV_COLOR_FORMAT_A2 ? 2 : \ + (cf) == LV_COLOR_FORMAT_I4 ? 4 : \ + (cf) == LV_COLOR_FORMAT_A4 ? 4 : \ + (cf) == LV_COLOR_FORMAT_L8 ? 8 : \ + (cf) == LV_COLOR_FORMAT_A8 ? 8 : \ + (cf) == LV_COLOR_FORMAT_I8 ? 8 : \ + (cf) == LV_COLOR_FORMAT_AL88 ? 16 : \ + (cf) == LV_COLOR_FORMAT_RGB565 ? 16 : \ + (cf) == LV_COLOR_FORMAT_RGB565A8 ? 16 : \ + (cf) == LV_COLOR_FORMAT_ARGB8565 ? 24 : \ + (cf) == LV_COLOR_FORMAT_RGB888 ? 24 : \ + (cf) == LV_COLOR_FORMAT_ARGB8888 ? 32 : \ + (cf) == LV_COLOR_FORMAT_XRGB8888 ? 32 : \ + 0 \ + ) + +/********************** + * TYPEDEFS + **********************/ + +typedef struct { + uint8_t blue; + uint8_t green; + uint8_t red; +} lv_color_t; + +typedef struct { + uint16_t blue : 5; + uint16_t green : 6; + uint16_t red : 5; +} lv_color16_t; + +typedef struct { + uint8_t blue; + uint8_t green; + uint8_t red; + uint8_t alpha; +} lv_color32_t; + +typedef struct { + uint16_t h; + uint8_t s; + uint8_t v; +} lv_color_hsv_t; + +typedef struct { + uint8_t lumi; + uint8_t alpha; +} lv_color16a_t; + +typedef enum { + LV_COLOR_FORMAT_UNKNOWN = 0, + + LV_COLOR_FORMAT_RAW = 0x01, + LV_COLOR_FORMAT_RAW_ALPHA = 0x02, + + /*<=1 byte (+alpha) formats*/ + LV_COLOR_FORMAT_L8 = 0x06, + LV_COLOR_FORMAT_I1 = 0x07, + LV_COLOR_FORMAT_I2 = 0x08, + LV_COLOR_FORMAT_I4 = 0x09, + LV_COLOR_FORMAT_I8 = 0x0A, + LV_COLOR_FORMAT_A8 = 0x0E, + + /*2 byte (+alpha) formats*/ + LV_COLOR_FORMAT_RGB565 = 0x12, + LV_COLOR_FORMAT_ARGB8565 = 0x13, /**< Not supported by sw renderer yet. */ + LV_COLOR_FORMAT_RGB565A8 = 0x14, /**< Color array followed by Alpha array*/ + LV_COLOR_FORMAT_AL88 = 0x15, /**< L8 with alpha >*/ + + /*3 byte (+alpha) formats*/ + LV_COLOR_FORMAT_RGB888 = 0x0F, + LV_COLOR_FORMAT_ARGB8888 = 0x10, + LV_COLOR_FORMAT_XRGB8888 = 0x11, + + /*Formats not supported by software renderer but kept here so GPU can use it*/ + LV_COLOR_FORMAT_A1 = 0x0B, + LV_COLOR_FORMAT_A2 = 0x0C, + LV_COLOR_FORMAT_A4 = 0x0D, + + /* reference to https://wiki.videolan.org/YUV/ */ + /*YUV planar formats*/ + LV_COLOR_FORMAT_YUV_START = 0x20, + LV_COLOR_FORMAT_I420 = LV_COLOR_FORMAT_YUV_START, /*YUV420 planar(3 plane)*/ + LV_COLOR_FORMAT_I422 = 0x21, /*YUV422 planar(3 plane)*/ + LV_COLOR_FORMAT_I444 = 0x22, /*YUV444 planar(3 plane)*/ + LV_COLOR_FORMAT_I400 = 0x23, /*YUV400 no chroma channel*/ + LV_COLOR_FORMAT_NV21 = 0x24, /*YUV420 planar(2 plane), UV plane in 'V, U, V, U'*/ + LV_COLOR_FORMAT_NV12 = 0x25, /*YUV420 planar(2 plane), UV plane in 'U, V, U, V'*/ + + /*YUV packed formats*/ + LV_COLOR_FORMAT_YUY2 = 0x26, /*YUV422 packed like 'Y U Y V'*/ + LV_COLOR_FORMAT_UYVY = 0x27, /*YUV422 packed like 'U Y V Y'*/ + + LV_COLOR_FORMAT_YUV_END = LV_COLOR_FORMAT_UYVY, + + /*Color formats in which LVGL can render*/ +#if LV_COLOR_DEPTH == 8 + LV_COLOR_FORMAT_NATIVE = LV_COLOR_FORMAT_L8, + LV_COLOR_FORMAT_NATIVE_WITH_ALPHA = LV_COLOR_FORMAT_AL88, +#elif LV_COLOR_DEPTH == 16 + LV_COLOR_FORMAT_NATIVE = LV_COLOR_FORMAT_RGB565, + LV_COLOR_FORMAT_NATIVE_WITH_ALPHA = LV_COLOR_FORMAT_RGB565A8, +#elif LV_COLOR_DEPTH == 24 + LV_COLOR_FORMAT_NATIVE = LV_COLOR_FORMAT_RGB888, + LV_COLOR_FORMAT_NATIVE_WITH_ALPHA = LV_COLOR_FORMAT_ARGB8888, +#elif LV_COLOR_DEPTH == 32 + LV_COLOR_FORMAT_NATIVE = LV_COLOR_FORMAT_XRGB8888, + LV_COLOR_FORMAT_NATIVE_WITH_ALPHA = LV_COLOR_FORMAT_ARGB8888, +#endif +} lv_color_format_t; + +/********************** + * MACROS + **********************/ + +#define LV_COLOR_MAKE(r8, g8, b8) {b8, g8, r8} + +#define LV_OPA_MIX2(a1, a2) (((int32_t)(a1) * (a2)) >> 8) +#define LV_OPA_MIX3(a1, a2, a3) (((int32_t)(a1) * (a2) * (a3)) >> 16) + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +/** + * Create an ARGB8888 color from RGB888 + alpha + * @param color an RGB888 color + * @param opa the alpha value + * @return the ARGB8888 color + */ +lv_color32_t lv_color_to_32(lv_color_t color, lv_opa_t opa); + +/** + * Convert am RGB888 color to RGB565 stored in `uint16_t` + * @param color and RGB888 color + * @return `color` as RGB565 on `uin16_t` + */ +uint16_t lv_color_to_u16(lv_color_t color); + +/** + * Convert am RGB888 color to XRGB8888 stored in `uint32_t` + * @param color and RGB888 color + * @return `color` as XRGB8888 on `uin32_t` (the alpha channel is always set to 0xFF) + */ +uint32_t lv_color_to_u32(lv_color_t color); + +/** + * Mix two RGB565 colors + * @param c1 the first color (typically the foreground color) + * @param c2 the second color (typically the background color) + * @param mix 0..255, or LV_OPA_0/10/20... + * @return mix == 0: c2 + * mix == 255: c1 + * mix == 128: 0.5 x c1 + 0.5 x c2 + */ +static inline uint16_t LV_ATTRIBUTE_FAST_MEM lv_color_16_16_mix(uint16_t c1, uint16_t c2, uint8_t mix) +{ + if (mix == 255) { + return c1; + } + if (mix == 0) { + return c2; + } + if (c1 == c2) { + return c1; + } + + uint16_t ret; + + /* Source: https://stackoverflow.com/a/50012418/1999969*/ + mix = (uint32_t)((uint32_t)mix + 4) >> 3; + + /*0x7E0F81F = 0b00000111111000001111100000011111*/ + uint32_t bg = (uint32_t)(c2 | ((uint32_t)c2 << 16)) & 0x7E0F81F; + uint32_t fg = (uint32_t)(c1 | ((uint32_t)c1 << 16)) & 0x7E0F81F; + uint32_t result = ((((fg - bg) * mix) >> 5) + bg) & 0x7E0F81F; + ret = (uint16_t)(result >> 16) | result; + + return ret; +} + +/** + * Check if two ARGB8888 color are equal + * @param c1 the first color + * @param c2 the second color + * @return true: equal + */ +static inline bool lv_color32_eq(lv_color32_t c1, lv_color32_t c2) +{ + return *((uint32_t *)&c1) == *((uint32_t *)&c2); +} + +/********************** + * MACROS + **********************/ + +#include "lv_color_op.h" + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_COLOR_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color_op.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color_op.h new file mode 100644 index 00000000..083fcd83 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_color_op.h @@ -0,0 +1,93 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_color_op.h + * + */ + +#ifndef LV_COLOR_OP_H +#define LV_COLOR_OP_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_math.h" +#include "lv_color.h" +#include "lv_types.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +/** + * Mix two colors with a given ratio. + * @param c1 the first color to mix (usually the foreground) + * @param c2 the second color to mix (usually the background) + * @param mix The ratio of the colors. 0: full `c2`, 255: full `c1`, 127: half `c1` and half`c2` + * @return the mixed color + */ +static inline lv_color_t LV_ATTRIBUTE_FAST_MEM lv_color_mix(lv_color_t c1, lv_color_t c2, uint8_t mix) +{ + lv_color_t ret; + + ret.red = LV_UDIV255((uint16_t)c1.red * mix + c2.red * (255 - mix) + LV_COLOR_MIX_ROUND_OFS); + ret.green = LV_UDIV255((uint16_t)c1.green * mix + c2.green * (255 - mix) + LV_COLOR_MIX_ROUND_OFS); + ret.blue = LV_UDIV255((uint16_t)c1.blue * mix + c2.blue * (255 - mix) + LV_COLOR_MIX_ROUND_OFS); + return ret; +} + +/** + * + * @param fg + * @param bg + * @return + * @note Use bg.alpha in the return value + * @note Use fg.alpha as mix ratio + */ +static inline lv_color32_t lv_color_mix32(lv_color32_t fg, lv_color32_t bg) +{ + if (fg.alpha >= LV_OPA_MAX) { + fg.alpha = bg.alpha; + return fg; + } + if (fg.alpha <= LV_OPA_MIN) { + return bg; + } + bg.red = (uint32_t)((uint32_t)fg.red * fg.alpha + (uint32_t)bg.red * (255 - fg.alpha)) >> 8; + bg.green = (uint32_t)((uint32_t)fg.green * fg.alpha + (uint32_t)bg.green * (255 - fg.alpha)) >> 8; + bg.blue = (uint32_t)((uint32_t)fg.blue * fg.alpha + (uint32_t)bg.blue * (255 - fg.alpha)) >> 8; + return bg; +} + +/********************** + * PREDEFINED COLORS + **********************/ + +/********************** + * MACROS + **********************/ + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_COLOR_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend.h new file mode 100644 index 00000000..01c5f769 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend.h @@ -0,0 +1,74 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_draw_sw_blend.h + * + */ + +#ifndef LV_DRAW_SW_BLEND_H +#define LV_DRAW_SW_BLEND_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_style.h" +#include "lv_color.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +typedef struct { + void *dest_buf; + int32_t dest_w; + int32_t dest_h; + int32_t dest_stride; + const lv_opa_t *mask_buf; + int32_t mask_stride; + lv_color_t color; + lv_opa_t opa; + bool use_asm; +} _lv_draw_sw_blend_fill_dsc_t; + +typedef struct { + void *dest_buf; + int32_t dest_w; + int32_t dest_h; + int32_t dest_stride; + const lv_opa_t *mask_buf; + int32_t mask_stride; + const void *src_buf; + int32_t src_stride; + lv_color_format_t src_color_format; + lv_opa_t opa; + lv_blend_mode_t blend_mode; +} _lv_draw_sw_blend_image_dsc_t; + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +/********************** + * MACROS + **********************/ + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_DRAW_SW_BLEND_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_argb8888.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_argb8888.h new file mode 100644 index 00000000..c6c94880 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_argb8888.h @@ -0,0 +1,51 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_draw_sw_blend_argb8888.h + * + */ + +#ifndef LV_DRAW_SW_BLEND_ARGB8888_H +#define LV_DRAW_SW_BLEND_ARGB8888_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_draw_sw_blend.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_fill_dsc_t *dsc); + +void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_image_to_argb8888(_lv_draw_sw_blend_image_dsc_t *dsc); + +/********************** + * MACROS + **********************/ + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_DRAW_SW_BLEND_ARGB8888_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_rgb565.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_rgb565.h new file mode 100644 index 00000000..e8c1873d --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_draw_sw_blend_to_rgb565.h @@ -0,0 +1,51 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_draw_sw_blend_rgb565.h + * + */ + +#ifndef LV_DRAW_SW_BLEND_RGB565_H +#define LV_DRAW_SW_BLEND_RGB565_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_draw_sw_blend.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * GLOBAL PROTOTYPES + **********************/ + +void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fill_dsc_t *dsc); + +void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_image_to_rgb565(_lv_draw_sw_blend_image_dsc_t *dsc); + +/********************** + * MACROS + **********************/ + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_DRAW_SW_BLEND_RGB565_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_log.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_log.h new file mode 100644 index 00000000..c7250c96 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_log.h @@ -0,0 +1,45 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_log.h + * + */ + +#ifndef LV_LOG_H +#define LV_LOG_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_types.h" + +/********************* + * DEFINES + *********************/ + +/*Do nothing if `LV_USE_LOG 0`*/ +#define _lv_log_add(level, file, line, ...) +#define LV_LOG_TRACE(...) do {}while(0) +#define LV_LOG_INFO(...) do {}while(0) +#define LV_LOG_WARN(...) do {}while(0) +#define LV_LOG_ERROR(...) do {}while(0) +#define LV_LOG_USER(...) do {}while(0) +#define LV_LOG(...) do {}while(0) + + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_LOG_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_math.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_math.h new file mode 100644 index 00000000..52508a8e --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_math.h @@ -0,0 +1,56 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_math.h + * + */ + +#ifndef LV_MATH_H +#define LV_MATH_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ +#include "lv_types.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * MACROS + **********************/ +#define LV_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define LV_MIN3(a, b, c) (LV_MIN(LV_MIN(a,b), c)) +#define LV_MIN4(a, b, c, d) (LV_MIN(LV_MIN(a,b), LV_MIN(c,d))) + +#define LV_MAX(a, b) ((a) > (b) ? (a) : (b)) +#define LV_MAX3(a, b, c) (LV_MAX(LV_MAX(a,b), c)) +#define LV_MAX4(a, b, c, d) (LV_MAX(LV_MAX(a,b), LV_MAX(c,d))) + +#define LV_CLAMP(min, val, max) (LV_MAX(min, (LV_MIN(val, max)))) + +#define LV_ABS(x) ((x) > 0 ? (x) : (-(x))) +#define LV_UDIV255(x) (((x) * 0x8081U) >> 0x17) + +#define LV_IS_SIGNED(t) (((t)(-1)) < ((t)0)) +#define LV_UMAX_OF(t) (((0x1ULL << ((sizeof(t) * 8ULL) - 1ULL)) - 1ULL) | (0xFULL << ((sizeof(t) * 8ULL) - 4ULL))) +#define LV_SMAX_OF(t) (((0x1ULL << ((sizeof(t) * 8ULL) - 1ULL)) - 1ULL) | (0x7ULL << ((sizeof(t) * 8ULL) - 4ULL))) +#define LV_MAX_OF(t) ((unsigned long)(LV_IS_SIGNED(t) ? LV_SMAX_OF(t) : LV_UMAX_OF(t))) + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_style.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_style.h new file mode 100644 index 00000000..dd813add --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_style.h @@ -0,0 +1,48 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_style.h + * + */ + +#ifndef LV_STYLE_H +#define LV_STYLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* + * INCLUDES + *********************/ + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/** + * Possible options how to blend opaque drawings + */ +typedef enum { + LV_BLEND_MODE_NORMAL, /**< Simply mix according to the opacity value*/ + LV_BLEND_MODE_ADDITIVE, /**< Add the respective color channels*/ + LV_BLEND_MODE_SUBTRACTIVE,/**< Subtract the foreground from the background*/ + LV_BLEND_MODE_MULTIPLY, /**< Multiply the foreground and background*/ +} lv_blend_mode_t; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_STYLE_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_types.h b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_types.h new file mode 100644 index 00000000..2e9244fe --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/include/lv_types.h @@ -0,0 +1,51 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_types.h + * + */ + +#ifndef LV_TYPES_H +#define LV_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +/********************** + * TYPEDEFS + **********************/ + +/** + * LVGL error codes. + */ +typedef enum { + LV_RESULT_INVALID = 0, /*Typically indicates that the object is deleted (become invalid) in the action + function or an operation was failed*/ + LV_RESULT_OK, /*The object is valid (no deleted) after the action*/ +} lv_result_t; + +/********************** + * TYPEDEFS + **********************/ + +typedef uintptr_t lv_uintptr_t; + +/********************** + * MACROS + **********************/ + +#define LV_UNUSED(x) ((void)x) + +#ifdef __cplusplus +} /*extern "C"*/ +#endif + +#endif /*LV_TYPES_H*/ diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_color.c b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_color.c new file mode 100644 index 00000000..a2865e47 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_color.c @@ -0,0 +1,66 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_color.c + * + */ + +/********************* + * INCLUDES + *********************/ +#include "lv_color.h" + +/********************* + * DEFINES + *********************/ + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * STATIC PROTOTYPES + **********************/ + +/********************** + * GLOBAL VARIABLES + **********************/ + +/********************** + * STATIC VARIABLES + **********************/ + +/********************** + * MACROS + **********************/ + +/********************** + * GLOBAL FUNCTIONS + **********************/ + +lv_color32_t lv_color_to_32(lv_color_t color, lv_opa_t opa) +{ + lv_color32_t c32; + c32.red = color.red; + c32.green = color.green; + c32.blue = color.blue; + c32.alpha = opa; + return c32; +} + +uint16_t lv_color_to_u16(lv_color_t color) +{ + return ((color.red & 0xF8) << 8) + ((color.green & 0xFC) << 3) + ((color.blue & 0xF8) >> 3); +} + +uint32_t lv_color_to_u32(lv_color_t color) +{ + return (uint32_t)((uint32_t)0xff << 24) + (color.red << 16) + (color.green << 8) + (color.blue); +} diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_argb8888.c b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_argb8888.c new file mode 100644 index 00000000..f18e3670 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_argb8888.c @@ -0,0 +1,911 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_draw_sw_blend.c + * + */ + +/********************* + * INCLUDES + *********************/ +#include "lv_draw_sw_blend_to_argb8888.h" + +#include "lv_assert.h" +#include "lv_types.h" +#include "lv_log.h" +#include "lv_draw_sw_blend.h" +#include "lv_math.h" +#include "lv_color.h" +#include "string.h" + +#include "esp_lvgl_port_lv_blend.h" + +/********************* + * DEFINES + *********************/ + +#define LV_ATTRIBUTE_FAST_MEM + +/********************** + * TYPEDEFS + **********************/ + +typedef struct { + lv_color32_t fg_saved; + lv_color32_t bg_saved; + lv_color32_t res_saved; + lv_opa_t res_alpha_saved; + lv_opa_t ratio_saved; +} lv_color_mix_alpha_cache_t; + +/********************** + * STATIC PROTOTYPES + **********************/ + +static void /* LV_ATTRIBUTE_FAST_MEM */ al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, + const uint8_t src_px_size); + +static void /* LV_ATTRIBUTE_FAST_MEM */ argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static inline void /* LV_ATTRIBUTE_FAST_MEM */ lv_color_8_32_mix(const uint8_t src, lv_color32_t *dest, uint8_t mix); + +static inline lv_color32_t /* LV_ATTRIBUTE_FAST_MEM */ lv_color_32_32_mix(lv_color32_t fg, lv_color32_t bg, + lv_color_mix_alpha_cache_t *cache); + +static void lv_color_mix_with_alpha_cache_init(lv_color_mix_alpha_cache_t *cache); + +static inline void /* LV_ATTRIBUTE_FAST_MEM */ blend_non_normal_pixel(lv_color32_t *dest, lv_color32_t src, + lv_blend_mode_t mode, lv_color_mix_alpha_cache_t *cache); +static inline void * /* LV_ATTRIBUTE_FAST_MEM */ drawbuf_next_row(const void *buf, uint32_t stride); + +/********************** + * STATIC VARIABLES + **********************/ + +/********************** + * MACROS + **********************/ + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888 +#define LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888 +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888 +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888 +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888 +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888 +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +/********************** + * GLOBAL FUNCTIONS + **********************/ + +void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_color_to_argb8888(_lv_draw_sw_blend_fill_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + const lv_opa_t *mask = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + int32_t dest_stride = dsc->dest_stride; + + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + + int32_t x; + int32_t y; + + LV_UNUSED(w); + LV_UNUSED(h); + LV_UNUSED(x); + LV_UNUSED(y); + LV_UNUSED(opa); + LV_UNUSED(mask); + LV_UNUSED(mask_stride); + LV_UNUSED(dest_stride); + + /*Simple fill*/ + if (mask == NULL && opa >= LV_OPA_MAX) { + if (dsc->use_asm) { + LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888(dsc); + } else { + uint32_t color32 = lv_color_to_u32(dsc->color); + uint32_t *dest_buf = dsc->dest_buf; + for (y = 0; y < h; y++) { + for (x = 0; x < w - 16; x += 16) { + dest_buf[x + 0] = color32; + dest_buf[x + 1] = color32; + dest_buf[x + 2] = color32; + dest_buf[x + 3] = color32; + + dest_buf[x + 4] = color32; + dest_buf[x + 5] = color32; + dest_buf[x + 6] = color32; + dest_buf[x + 7] = color32; + + dest_buf[x + 8] = color32; + dest_buf[x + 9] = color32; + dest_buf[x + 10] = color32; + dest_buf[x + 11] = color32; + + dest_buf[x + 12] = color32; + dest_buf[x + 13] = color32; + dest_buf[x + 14] = color32; + dest_buf[x + 15] = color32; + } + for (; x < w; x ++) { + dest_buf[x] = color32; + } + + dest_buf = drawbuf_next_row(dest_buf, dest_stride); + } + } + + } + /*Opacity only*/ + else if (mask == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_OPA(dsc)) { + lv_color32_t color_argb = lv_color_to_32(dsc->color, opa); + lv_color32_t *dest_buf = dsc->dest_buf; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf[x] = lv_color_32_32_mix(color_argb, dest_buf[x], &cache); + } + dest_buf = drawbuf_next_row(dest_buf, dest_stride); + } + } + + } + /*Masked with full opacity*/ + else if (mask && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_WITH_MASK(dsc)) { + lv_color32_t color_argb = lv_color_to_32(dsc->color, 0xff); + lv_color32_t *dest_buf = dsc->dest_buf; + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb.alpha = mask[x]; + dest_buf[x] = lv_color_32_32_mix(color_argb, dest_buf[x], &cache); + } + + dest_buf = drawbuf_next_row(dest_buf, dest_stride); + mask += mask_stride; + } + } + + } + /*Masked with opacity*/ + else { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_ARGB8888_MIX_MASK_OPA(dsc)) { + lv_color32_t color_argb = lv_color_to_32(dsc->color, opa); + lv_color32_t *dest_buf = dsc->dest_buf; + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb.alpha = LV_OPA_MIX2(mask[x], opa); + dest_buf[x] = lv_color_32_32_mix(color_argb, dest_buf[x], &cache); + } + dest_buf = drawbuf_next_row(dest_buf, dest_stride); + mask += mask_stride; + } + } + } +} + +void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_image_to_argb8888(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + switch (dsc->src_color_format) { + case LV_COLOR_FORMAT_RGB565: + rgb565_image_blend(dsc); + break; + case LV_COLOR_FORMAT_RGB888: + rgb888_image_blend(dsc, 3); + break; + case LV_COLOR_FORMAT_XRGB8888: + rgb888_image_blend(dsc, 4); + break; + case LV_COLOR_FORMAT_ARGB8888: + argb8888_image_blend(dsc); + break; + case LV_COLOR_FORMAT_L8: + l8_image_blend(dsc); + break; + case LV_COLOR_FORMAT_AL88: + al88_image_blend(dsc); + break; + default: + LV_LOG_WARN("Not supported source color format"); + break; + } +} + +/********************** + * STATIC FUNCTIONS + **********************/ + +static void LV_ATTRIBUTE_FAST_MEM al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + lv_color32_t *dest_buf_c32 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const lv_color16a_t *src_buf_al88 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + /* + dest_buf_c32[dest_x].alpha = src_buf_al88[src_x].alpha; + dest_buf_c32[dest_x].red = src_buf_al88[src_x].lumi; + dest_buf_c32[dest_x].green = src_buf_al88[src_x].lumi; + dest_buf_c32[dest_x].blue = src_buf_al88[src_x].lumi; + */ + lv_color_8_32_mix(src_buf_al88[src_x].lumi, &dest_buf_c32[dest_x], src_buf_al88[src_x].alpha); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_al88[src_x].lumi, &dest_buf_c32[dest_x], LV_OPA_MIX2(src_buf_al88[src_x].alpha, opa)); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_al88[src_x].lumi, &dest_buf_c32[dest_x], LV_OPA_MIX2(src_buf_al88[src_x].alpha, + mask_buf[src_x])); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_al88[src_x].lumi, &dest_buf_c32[dest_x], LV_OPA_MIX3(src_buf_al88[src_x].alpha, + mask_buf[src_x], opa)); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + lv_color32_t src_argb; + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + src_argb.red = src_buf_al88[src_x].lumi; + src_argb.green = src_buf_al88[src_x].lumi; + src_argb.blue = src_buf_al88[src_x].lumi; + if (mask_buf == NULL) { + src_argb.alpha = LV_OPA_MIX2(src_buf_al88[src_x].alpha, opa); + } else { + src_argb.alpha = LV_OPA_MIX3(src_buf_al88[src_x].alpha, mask_buf[dest_x], opa); + } + blend_non_normal_pixel(&dest_buf_c32[dest_x], src_argb, dsc->blend_mode, &cache); + } + if (mask_buf) { + mask_buf += mask_stride; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + lv_color32_t *dest_buf_c32 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint8_t *src_buf_l8 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + dest_buf_c32[dest_x].alpha = src_buf_l8[src_x]; + dest_buf_c32[dest_x].red = src_buf_l8[src_x]; + dest_buf_c32[dest_x].green = src_buf_l8[src_x]; + dest_buf_c32[dest_x].blue = src_buf_l8[src_x]; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride); + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_l8[src_x], &dest_buf_c32[dest_x], opa); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_l8[src_x], &dest_buf_c32[dest_x], mask_buf[src_x]); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride); + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + lv_color_8_32_mix(src_buf_l8[src_x], &dest_buf_c32[dest_x], LV_OPA_MIX2(mask_buf[src_x], opa)); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + lv_color32_t src_argb; + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; src_x < w; dest_x++, src_x++) { + src_argb.red = src_buf_l8[src_x]; + src_argb.green = src_buf_l8[src_x]; + src_argb.blue = src_buf_l8[src_x]; + if (mask_buf == NULL) { + src_argb.alpha = opa; + } else { + src_argb.alpha = LV_OPA_MIX2(mask_buf[dest_x], opa); + } + blend_non_normal_pixel(&dest_buf_c32[dest_x], src_argb, dsc->blend_mode, &cache); + } + if (mask_buf) { + mask_buf += mask_stride; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride); + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + lv_color32_t *dest_buf_c32 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const lv_color16_t *src_buf_c16 = (const lv_color16_t *) dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + lv_color32_t color_argb; + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + + int32_t x; + int32_t y; + + LV_UNUSED(color_argb); + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL) { + lv_result_t accelerated; + if (opa >= LV_OPA_MAX) { + accelerated = LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888(dsc); + } else { + accelerated = LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc); + } + if (LV_RESULT_INVALID == accelerated) { + color_argb.alpha = opa; + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb.red = (src_buf_c16[x].red * 2106) >> 8; /*To make it rounded*/ + color_argb.green = (src_buf_c16[x].green * 1037) >> 8; + color_argb.blue = (src_buf_c16[x].blue * 2106) >> 8; + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb.alpha = mask_buf[x]; + color_argb.red = (src_buf_c16[x].red * 2106) >> 8; /*To make it rounded*/ + color_argb.green = (src_buf_c16[x].green * 1037) >> 8; + color_argb.blue = (src_buf_c16[x].blue * 2106) >> 8; + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride); + mask_buf += mask_stride; + } + } + } else { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb.alpha = LV_OPA_MIX2(mask_buf[x], opa); + color_argb.red = (src_buf_c16[x].red * 2106) >> 8; /*To make it rounded*/ + color_argb.green = (src_buf_c16[x].green * 1037) >> 8; + color_argb.blue = (src_buf_c16[x].blue * 2106) >> 8; + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + lv_color32_t src_argb; + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + src_argb.red = (src_buf_c16[x].red * 2106) >> 8; + src_argb.green = (src_buf_c16[x].green * 1037) >> 8; + src_argb.blue = (src_buf_c16[x].blue * 2106) >> 8; + if (mask_buf == NULL) { + src_argb.alpha = opa; + } else { + src_argb.alpha = LV_OPA_MIX2(mask_buf[x], opa); + } + blend_non_normal_pixel(&dest_buf_c32[x], src_argb, dsc->blend_mode, &cache); + } + if (mask_buf) { + mask_buf += mask_stride; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride); + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, const uint8_t src_px_size) +{ + + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + lv_color32_t *dest_buf_c32 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint8_t *src_buf = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + lv_color32_t color_argb; + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + + int32_t dest_x; + int32_t src_x; + int32_t y; + + LV_UNUSED(color_argb); + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + /*Special case*/ + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888(dsc, src_px_size)) { + if (src_px_size == 4) { + uint32_t line_in_bytes = w * 4; + for (y = 0; y < h; y++) { + memcpy(dest_buf_c32, src_buf, line_in_bytes); // lv_memcpy + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + } + } else if (src_px_size == 3) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 3) { + dest_buf_c32[dest_x].red = src_buf[src_x + 2]; + dest_buf_c32[dest_x].green = src_buf[src_x + 1]; + dest_buf_c32[dest_x].blue = src_buf[src_x + 0]; + dest_buf_c32[dest_x].alpha = 0xff; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + } + } + } + + } + if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc, src_px_size)) { + color_argb.alpha = opa; + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + color_argb.red = src_buf[src_x + 2]; + color_argb.green = src_buf[src_x + 1]; + color_argb.blue = src_buf[src_x + 0]; + dest_buf_c32[dest_x] = lv_color_32_32_mix(color_argb, dest_buf_c32[dest_x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + } + } + + } + if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + color_argb.alpha = mask_buf[dest_x]; + color_argb.red = src_buf[src_x + 2]; + color_argb.green = src_buf[src_x + 1]; + color_argb.blue = src_buf[src_x + 0]; + dest_buf_c32[dest_x] = lv_color_32_32_mix(color_argb, dest_buf_c32[dest_x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + mask_buf += mask_stride; + } + } + } + if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + color_argb.alpha = (opa * mask_buf[dest_x]) >> 8; + color_argb.red = src_buf[src_x + 2]; + color_argb.green = src_buf[src_x + 1]; + color_argb.blue = src_buf[src_x + 0]; + dest_buf_c32[dest_x] = lv_color_32_32_mix(color_argb, dest_buf_c32[dest_x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + lv_color32_t src_argb; + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + src_argb.red = src_buf[src_x + 2]; + src_argb.green = src_buf[src_x + 1]; + src_argb.blue = src_buf[src_x + 0]; + if (mask_buf == NULL) { + src_argb.alpha = opa; + } else { + src_argb.alpha = LV_OPA_MIX2(mask_buf[dest_x], opa); + } + + blend_non_normal_pixel(&dest_buf_c32[dest_x], src_argb, dsc->blend_mode, &cache); + } + if (mask_buf) { + mask_buf += mask_stride; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf = drawbuf_next_row(src_buf, src_stride); + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + lv_color32_t *dest_buf_c32 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const lv_color32_t *src_buf_c32 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + lv_color32_t color_argb; + lv_color_mix_alpha_cache_t cache; + lv_color_mix_with_alpha_cache_init(&cache); + + int32_t x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf_c32[x] = lv_color_32_32_mix(src_buf_c32[x], dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride); + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb = src_buf_c32[x]; + color_argb.alpha = LV_OPA_MIX2(color_argb.alpha, opa); + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb = src_buf_c32[x]; + color_argb.alpha = LV_OPA_MIX2(color_argb.alpha, mask_buf[x]); + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride); + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_ARGB8888_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb = src_buf_c32[x]; + color_argb.alpha = LV_OPA_MIX3(color_argb.alpha, opa, mask_buf[x]); + dest_buf_c32[x] = lv_color_32_32_mix(color_argb, dest_buf_c32[x], &cache); + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + color_argb = src_buf_c32[x]; + if (mask_buf == NULL) { + color_argb.alpha = LV_OPA_MIX2(color_argb.alpha, opa); + } else { + color_argb.alpha = LV_OPA_MIX3(color_argb.alpha, mask_buf[x], opa); + } + blend_non_normal_pixel(&dest_buf_c32[x], color_argb, dsc->blend_mode, &cache); + } + if (mask_buf) { + mask_buf += mask_stride; + } + dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); + src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride); + } + } +} + +static inline void LV_ATTRIBUTE_FAST_MEM lv_color_8_32_mix(const uint8_t src, lv_color32_t *dest, uint8_t mix) +{ + + if (mix == 0) { + return; + } + + dest->alpha = 255; + if (mix >= LV_OPA_MAX) { + dest->red = src; + dest->green = src; + dest->blue = src; + } else { + lv_opa_t mix_inv = 255 - mix; + dest->red = (uint32_t)((uint32_t)src * mix + dest->red * mix_inv) >> 8; + dest->green = (uint32_t)((uint32_t)src * mix + dest->green * mix_inv) >> 8; + dest->blue = (uint32_t)((uint32_t)src * mix + dest->blue * mix_inv) >> 8; + } +} + +static inline lv_color32_t LV_ATTRIBUTE_FAST_MEM lv_color_32_32_mix(lv_color32_t fg, lv_color32_t bg, + lv_color_mix_alpha_cache_t *cache) +{ + /*Pick the foreground if it's fully opaque or the Background is fully transparent*/ + if (fg.alpha >= LV_OPA_MAX || bg.alpha <= LV_OPA_MIN) { + return fg; + } + /*Transparent foreground: use the Background*/ + else if (fg.alpha <= LV_OPA_MIN) { + return bg; + } + /*Opaque background: use simple mix*/ + else if (bg.alpha == 255) { + return lv_color_mix32(fg, bg); + } + /*Both colors have alpha. Expensive calculation need to be applied*/ + else { + /*Save the parameters and the result. If they will be asked again don't compute again*/ + + /*Update the ratio and the result alpha value if the input alpha values change*/ + if (bg.alpha != cache->bg_saved.alpha || fg.alpha != cache->fg_saved.alpha) { + /*Info: + * https://en.wikipedia.org/wiki/Alpha_compositing#Analytical_derivation_of_the_over_operator*/ + cache->res_alpha_saved = 255 - LV_OPA_MIX2(255 - fg.alpha, 255 - bg.alpha); + LV_ASSERT(cache->res_alpha_saved != 0); + cache->ratio_saved = (uint32_t)((uint32_t)fg.alpha * 255) / cache->res_alpha_saved; + } + + if (!lv_color32_eq(bg, cache->bg_saved) || !lv_color32_eq(fg, cache->fg_saved)) { + cache->fg_saved = fg; + cache->bg_saved = bg; + fg.alpha = cache->ratio_saved; + cache->res_saved = lv_color_mix32(fg, bg); + cache->res_saved.alpha = cache->res_alpha_saved; + } + + return cache->res_saved; + } +} + +void lv_color_mix_with_alpha_cache_init(lv_color_mix_alpha_cache_t *cache) +{ + memset(&cache->fg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero + memset(&cache->bg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero + memset(&cache->res_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero + cache->res_alpha_saved = 255; + cache->ratio_saved = 255; +} + +static inline void LV_ATTRIBUTE_FAST_MEM blend_non_normal_pixel(lv_color32_t *dest, lv_color32_t src, + lv_blend_mode_t mode, lv_color_mix_alpha_cache_t *cache) +{ + lv_color32_t res; + switch (mode) { + case LV_BLEND_MODE_ADDITIVE: + res.red = LV_MIN(dest->red + src.red, 255); + res.green = LV_MIN(dest->green + src.green, 255); + res.blue = LV_MIN(dest->blue + src.blue, 255); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + res.red = LV_MAX(dest->red - src.red, 0); + res.green = LV_MAX(dest->green - src.green, 0); + res.blue = LV_MAX(dest->blue - src.blue, 0); + break; + case LV_BLEND_MODE_MULTIPLY: + res.red = (dest->red * src.red) >> 8; + res.green = (dest->green * src.green) >> 8; + res.blue = (dest->blue * src.blue) >> 8; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", mode); + return; + } + res.alpha = src.alpha; + *dest = lv_color_32_32_mix(res, *dest, cache); +} + +static inline void *LV_ATTRIBUTE_FAST_MEM drawbuf_next_row(const void *buf, uint32_t stride) +{ + return (void *)((uint8_t *)buf + stride); +} diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_rgb565.c b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_rgb565.c new file mode 100644 index 00000000..361571ff --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_blend/src/lv_draw_sw_blend_to_rgb565.c @@ -0,0 +1,960 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + * + * This file is derived from the LVGL project. + * See https://github.com/lvgl/lvgl for details. + */ + +/** + * @file lv_draw_sw_blend_to_rgb565.c + * + */ + +/********************* + * INCLUDES + *********************/ +#include "lv_draw_sw_blend_to_rgb565.h" + +#include "lv_assert.h" +#include "lv_types.h" +#include "lv_log.h" +#include "lv_draw_sw_blend.h" +#include "lv_math.h" +#include "lv_color.h" +#include "string.h" + +#include "esp_lvgl_port_lv_blend.h" + + +/********************* + * DEFINES + *********************/ + +#define LV_ATTRIBUTE_FAST_MEM + +/********************** + * TYPEDEFS + **********************/ + +/********************** + * STATIC PROTOTYPES + **********************/ + +static void /* LV_ATTRIBUTE_FAST_MEM */ al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static void /* LV_ATTRIBUTE_FAST_MEM */ rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, + const uint8_t src_px_size); + +static void /* LV_ATTRIBUTE_FAST_MEM */ argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc); + +static inline uint16_t /* LV_ATTRIBUTE_FAST_MEM */ l8_to_rgb565(const uint8_t c1); + +static inline uint16_t /* LV_ATTRIBUTE_FAST_MEM */ lv_color_8_16_mix(const uint8_t c1, uint16_t c2, uint8_t mix); + +static inline uint16_t /* LV_ATTRIBUTE_FAST_MEM */ lv_color_24_16_mix(const uint8_t *c1, uint16_t c2, uint8_t mix); + +static inline void * /* LV_ATTRIBUTE_FAST_MEM */ drawbuf_next_row(const void *buf, uint32_t stride); + +/********************** + * STATIC VARIABLES + **********************/ + +/********************** + * MACROS + **********************/ + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB565 +#define LV_DRAW_SW_COLOR_BLEND_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_COLOR_BLEND_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565 +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565 +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565 +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565 +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565 +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_OPA +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_OPA(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_MASK +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_MASK(...) LV_RESULT_INVALID +#endif + +#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA +#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(...) LV_RESULT_INVALID +#endif + +/********************** + * GLOBAL FUNCTIONS + **********************/ + +/** + * Fill an area with a color. + * Supports normal fill, fill with opacity, fill with mask, and fill with mask and opacity. + * dest_buf and color have native color depth. (RGB565, RGB888, XRGB8888) + * The background (dest_buf) cannot have alpha channel + * @param dest_buf + * @param dest_area + * @param dest_stride + * @param color + * @param opa + * @param mask + * @param mask_stride + */ +void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_color_to_rgb565(_lv_draw_sw_blend_fill_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + uint16_t color16 = lv_color_to_u16(dsc->color); + lv_opa_t opa = dsc->opa; + const lv_opa_t *mask = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + + int32_t x; + int32_t y; + + LV_UNUSED(w); + LV_UNUSED(h); + LV_UNUSED(x); + LV_UNUSED(y); + LV_UNUSED(opa); + LV_UNUSED(mask); + LV_UNUSED(color16); + LV_UNUSED(mask_stride); + LV_UNUSED(dest_stride); + LV_UNUSED(dest_buf_u16); + + /*Simple fill*/ + if (mask == NULL && opa >= LV_OPA_MAX) { + if (dsc->use_asm) { + LV_DRAW_SW_COLOR_BLEND_TO_RGB565(dsc); + } else { + for (y = 0; y < h; y++) { + uint16_t *dest_end_final = dest_buf_u16 + w; + uint32_t *dest_end_mid = (uint32_t *)((uint16_t *) dest_buf_u16 + ((w - 1) & ~(0xF))); + if ((lv_uintptr_t)&dest_buf_u16[0] & 0x3) { + dest_buf_u16[0] = color16; + dest_buf_u16++; + } + + uint32_t c32 = (uint32_t)color16 + ((uint32_t)color16 << 16); + uint32_t *dest32 = (uint32_t *)dest_buf_u16; + while (dest32 < dest_end_mid) { + dest32[0] = c32; + dest32[1] = c32; + dest32[2] = c32; + dest32[3] = c32; + dest32[4] = c32; + dest32[5] = c32; + dest32[6] = c32; + dest32[7] = c32; + dest32 += 8; + } + + dest_buf_u16 = (uint16_t *)dest32; + + while (dest_buf_u16 < dest_end_final) { + *dest_buf_u16 = color16; + dest_buf_u16++; + } + + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + dest_buf_u16 -= w; + } + } + + } + /*Opacity only*/ + else if (mask == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_OPA(dsc)) { + uint32_t last_dest32_color = dest_buf_u16[0] + 1; /*Set to value which is not equal to the first pixel*/ + uint32_t last_res32_color = 0; + + for (y = 0; y < h; y++) { + x = 0; + if ((lv_uintptr_t)&dest_buf_u16[0] & 0x3) { + dest_buf_u16[0] = lv_color_16_16_mix(color16, dest_buf_u16[0], opa); + x = 1; + } + + for (; x < w - 2; x += 2) { + if (dest_buf_u16[x] != dest_buf_u16[x + 1]) { + dest_buf_u16[x + 0] = lv_color_16_16_mix(color16, dest_buf_u16[x + 0], opa); + dest_buf_u16[x + 1] = lv_color_16_16_mix(color16, dest_buf_u16[x + 1], opa); + } else { + volatile uint32_t *dest32 = (uint32_t *)&dest_buf_u16[x]; + if (last_dest32_color == *dest32) { + *dest32 = last_res32_color; + } else { + last_dest32_color = *dest32; + + dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x + 0], opa); + dest_buf_u16[x + 1] = dest_buf_u16[x]; + + last_res32_color = *dest32; + } + } + } + + for (; x < w ; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x], opa); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + } + } + + } + + /*Masked with full opacity*/ + else if (mask && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB565_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + x = 0; + if ((lv_uintptr_t)(mask) & 0x1) { + dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x], mask[x]); + x++; + } + + for (; x <= w - 2; x += 2) { + uint16_t mask16 = *((uint16_t *)&mask[x]); + if (mask16 == 0xFFFF) { + dest_buf_u16[x + 0] = color16; + dest_buf_u16[x + 1] = color16; + } else if (mask16 != 0) { + dest_buf_u16[x + 0] = lv_color_16_16_mix(color16, dest_buf_u16[x + 0], mask[x + 0]); + dest_buf_u16[x + 1] = lv_color_16_16_mix(color16, dest_buf_u16[x + 1], mask[x + 1]); + } + } + + for (; x < w ; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x], mask[x]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + mask += mask_stride; + } + } + + } + /*Masked with opacity*/ + else if (mask && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB565_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(color16, dest_buf_u16[x], LV_OPA_MIX2(mask[x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + mask += mask_stride; + } + } + } +} + +void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_image_to_rgb565(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + switch (dsc->src_color_format) { + case LV_COLOR_FORMAT_RGB565: + rgb565_image_blend(dsc); + break; + case LV_COLOR_FORMAT_RGB888: + rgb888_image_blend(dsc, 3); + break; + case LV_COLOR_FORMAT_XRGB8888: + rgb888_image_blend(dsc, 4); + break; + case LV_COLOR_FORMAT_ARGB8888: + argb8888_image_blend(dsc); + break; + case LV_COLOR_FORMAT_L8: + l8_image_blend(dsc); + break; + case LV_COLOR_FORMAT_AL88: + al88_image_blend(dsc); + break; + default: + LV_LOG_WARN("Not supported source color format"); + break; + } +} + +/********************** + * STATIC FUNCTIONS + **********************/ + +static void LV_ATTRIBUTE_FAST_MEM al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const lv_color16a_t *src_buf_al88 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_al88[src_x].lumi, dest_buf_u16[dest_x], src_buf_al88[src_x].alpha); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_al88[src_x].lumi, dest_buf_u16[dest_x], + LV_OPA_MIX2(src_buf_al88[src_x].alpha, opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_al88[src_x].lumi, dest_buf_u16[dest_x], + LV_OPA_MIX2(src_buf_al88[src_x].alpha, mask_buf[dest_x])); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_AL88_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_al88[src_x].lumi, dest_buf_u16[dest_x], + LV_OPA_MIX3(src_buf_al88[src_x].alpha, mask_buf[dest_x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + uint16_t res = 0; + for (y = 0; y < h; y++) { + lv_color16_t *dest_buf_c16 = (lv_color16_t *)dest_buf_u16; + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + uint8_t rb = src_buf_al88[src_x].lumi >> 3; + uint8_t g = src_buf_al88[src_x].lumi >> 2; + switch (dsc->blend_mode) { + case LV_BLEND_MODE_ADDITIVE: + res = (LV_MIN(dest_buf_c16[dest_x].red + rb, 31)) << 11; + res += (LV_MIN(dest_buf_c16[dest_x].green + g, 63)) << 5; + res += LV_MIN(dest_buf_c16[dest_x].blue + rb, 31); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + res = (LV_MAX(dest_buf_c16[dest_x].red - rb, 0)) << 11; + res += (LV_MAX(dest_buf_c16[dest_x].green - g, 0)) << 5; + res += LV_MAX(dest_buf_c16[dest_x].blue - rb, 0); + break; + case LV_BLEND_MODE_MULTIPLY: + res = ((dest_buf_c16[dest_x].red * rb) >> 5) << 11; + res += ((dest_buf_c16[dest_x].green * g) >> 6) << 5; + res += (dest_buf_c16[dest_x].blue * rb) >> 5; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", dsc->blend_mode); + return; + } + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], src_buf_al88[src_x].alpha); + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX2(opa, src_buf_al88[src_x].alpha)); + } else { + if (opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], mask_buf[dest_x]); + } else dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX3(mask_buf[dest_x], opa, + src_buf_al88[src_x].alpha)); + } + } + + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride); + if (mask_buf) { + mask_buf += mask_stride; + } + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint8_t *src_buf_l8 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = l8_to_rgb565(src_buf_l8[src_x]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_l8 += src_stride; + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_l8[src_x], dest_buf_u16[dest_x], opa); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_l8 += src_stride; + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_l8[src_x], dest_buf_u16[dest_x], mask_buf[dest_x]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_l8 += src_stride; + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x++) { + dest_buf_u16[dest_x] = lv_color_8_16_mix(src_buf_l8[src_x], dest_buf_u16[dest_x], LV_OPA_MIX2(mask_buf[dest_x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_l8 += src_stride; + mask_buf += mask_stride; + } + } + } + } else { + uint16_t res = 0; + for (y = 0; y < h; y++) { + lv_color16_t *dest_buf_c16 = (lv_color16_t *)dest_buf_u16; + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + uint8_t rb = src_buf_l8[src_x] >> 3; + uint8_t g = src_buf_l8[src_x] >> 2; + switch (dsc->blend_mode) { + case LV_BLEND_MODE_ADDITIVE: + res = (LV_MIN(dest_buf_c16[dest_x].red + rb, 31)) << 11; + res += (LV_MIN(dest_buf_c16[dest_x].green + g, 63)) << 5; + res += LV_MIN(dest_buf_c16[dest_x].blue + rb, 31); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + res = (LV_MAX(dest_buf_c16[dest_x].red - rb, 0)) << 11; + res += (LV_MAX(dest_buf_c16[dest_x].green - g, 0)) << 5; + res += LV_MAX(dest_buf_c16[dest_x].blue - rb, 0); + break; + case LV_BLEND_MODE_MULTIPLY: + res = ((dest_buf_c16[dest_x].red * rb) >> 5) << 11; + res += ((dest_buf_c16[dest_x].green * g) >> 6) << 5; + res += (dest_buf_c16[dest_x].blue * rb) >> 5; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", dsc->blend_mode); + return; + } + + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = res; + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], opa); + } else { + if (opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], mask_buf[dest_x]); + } else { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX2(mask_buf[dest_x], opa)); + } + } + } + + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_l8 += src_stride; + if (mask_buf) { + mask_buf += mask_stride; + } + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint16_t *src_buf_u16 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(dsc)) { + uint32_t line_in_bytes = w * 2; + for (y = 0; y < h; y++) { + memcpy(dest_buf_u16, src_buf_u16, line_in_bytes); // lv_memcpy + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], opa); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], mask_buf[x]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); + mask_buf += mask_stride; + } + } + } else { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dest_buf_u16[x] = lv_color_16_16_mix(src_buf_u16[x], dest_buf_u16[x], LV_OPA_MIX2(mask_buf[x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); + mask_buf += mask_stride; + } + } + } + } else { + uint16_t res = 0; + for (y = 0; y < h; y++) { + lv_color16_t *dest_buf_c16 = (lv_color16_t *) dest_buf_u16; + lv_color16_t *src_buf_c16 = (lv_color16_t *) src_buf_u16; + for (x = 0; x < w; x++) { + switch (dsc->blend_mode) { + case LV_BLEND_MODE_ADDITIVE: + if (src_buf_u16[x] == 0x0000) { + continue; /*Do not add pure black*/ + } + res = (LV_MIN(dest_buf_c16[x].red + src_buf_c16[x].red, 31)) << 11; + res += (LV_MIN(dest_buf_c16[x].green + src_buf_c16[x].green, 63)) << 5; + res += LV_MIN(dest_buf_c16[x].blue + src_buf_c16[x].blue, 31); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + if (src_buf_u16[x] == 0x0000) { + continue; /*Do not subtract pure black*/ + } + res = (LV_MAX(dest_buf_c16[x].red - src_buf_c16[x].red, 0)) << 11; + res += (LV_MAX(dest_buf_c16[x].green - src_buf_c16[x].green, 0)) << 5; + res += LV_MAX(dest_buf_c16[x].blue - src_buf_c16[x].blue, 0); + break; + case LV_BLEND_MODE_MULTIPLY: + if (src_buf_u16[x] == 0xffff) { + continue; /*Do not multiply with pure white (considered as 1)*/ + } + res = ((dest_buf_c16[x].red * src_buf_c16[x].red) >> 5) << 11; + res += ((dest_buf_c16[x].green * src_buf_c16[x].green) >> 6) << 5; + res += (dest_buf_c16[x].blue * src_buf_c16[x].blue) >> 5; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", dsc->blend_mode); + return; + } + + if (mask_buf == NULL) { + dest_buf_u16[x] = lv_color_16_16_mix(res, dest_buf_u16[x], opa); + } else { + if (opa >= LV_OPA_MAX) { + dest_buf_u16[x] = lv_color_16_16_mix(res, dest_buf_u16[x], mask_buf[x]); + } else { + dest_buf_u16[x] = lv_color_16_16_mix(res, dest_buf_u16[x], LV_OPA_MIX2(mask_buf[x], opa)); + } + } + } + + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); + if (mask_buf) { + mask_buf += mask_stride; + } + } + } +} + +static void LV_ATTRIBUTE_FAST_MEM rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, const uint8_t src_px_size) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint8_t *src_buf_u8 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + dest_buf_u16[dest_x] = ((src_buf_u8[src_x + 2] & 0xF8) << 8) + + ((src_buf_u8[src_x + 1] & 0xFC) << 3) + + ((src_buf_u8[src_x + 0] & 0xF8) >> 3); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], opa); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + } + } + } + if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], mask_buf[dest_x]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + mask_buf += mask_stride; + } + } + } + if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc, src_px_size)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], LV_OPA_MIX2(mask_buf[dest_x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + mask_buf += mask_stride; + } + } + } + } else { + uint16_t res = 0; + for (y = 0; y < h; y++) { + lv_color16_t *dest_buf_c16 = (lv_color16_t *) dest_buf_u16; + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += src_px_size) { + switch (dsc->blend_mode) { + case LV_BLEND_MODE_ADDITIVE: + res = (LV_MIN(dest_buf_c16[dest_x].red + (src_buf_u8[src_x + 2] >> 3), 31)) << 11; + res += (LV_MIN(dest_buf_c16[dest_x].green + (src_buf_u8[src_x + 1] >> 2), 63)) << 5; + res += LV_MIN(dest_buf_c16[dest_x].blue + (src_buf_u8[src_x + 0] >> 3), 31); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + res = (LV_MAX(dest_buf_c16[dest_x].red - (src_buf_u8[src_x + 2] >> 3), 0)) << 11; + res += (LV_MAX(dest_buf_c16[dest_x].green - (src_buf_u8[src_x + 1] >> 2), 0)) << 5; + res += LV_MAX(dest_buf_c16[dest_x].blue - (src_buf_u8[src_x + 0] >> 3), 0); + break; + case LV_BLEND_MODE_MULTIPLY: + res = ((dest_buf_c16[dest_x].red * (src_buf_u8[src_x + 2] >> 3)) >> 5) << 11; + res += ((dest_buf_c16[dest_x].green * (src_buf_u8[src_x + 1] >> 2)) >> 6) << 5; + res += (dest_buf_c16[dest_x].blue * (src_buf_u8[src_x + 0] >> 3)) >> 5; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", dsc->blend_mode); + return; + } + + if (mask_buf == NULL) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], opa); + } else { + if (opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], mask_buf[dest_x]); + } else { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX2(mask_buf[dest_x], opa)); + } + } + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + if (mask_buf) { + mask_buf += mask_stride; + } + } + + } +} + +static void LV_ATTRIBUTE_FAST_MEM argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc) +{ + int32_t w = dsc->dest_w; + int32_t h = dsc->dest_h; + lv_opa_t opa = dsc->opa; + uint16_t *dest_buf_u16 = dsc->dest_buf; + int32_t dest_stride = dsc->dest_stride; + const uint8_t *src_buf_u8 = dsc->src_buf; + int32_t src_stride = dsc->src_stride; + const lv_opa_t *mask_buf = dsc->mask_buf; + int32_t mask_stride = dsc->mask_stride; + + int32_t dest_x; + int32_t src_x; + int32_t y; + + if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], src_buf_u8[src_x + 3]); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + } + } + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], LV_OPA_MIX2(src_buf_u8[src_x + 3], + opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + } + } + } else if (mask_buf && opa >= LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_WITH_MASK(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], + LV_OPA_MIX2(src_buf_u8[src_x + 3], mask_buf[dest_x])); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + mask_buf += mask_stride; + } + } + } else if (mask_buf && opa < LV_OPA_MAX) { + if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB565_MIX_MASK_OPA(dsc)) { + for (y = 0; y < h; y++) { + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + dest_buf_u16[dest_x] = lv_color_24_16_mix(&src_buf_u8[src_x], dest_buf_u16[dest_x], + LV_OPA_MIX3(src_buf_u8[src_x + 3], mask_buf[dest_x], opa)); + } + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + mask_buf += mask_stride; + } + } + } + } else { + uint16_t res = 0; + for (y = 0; y < h; y++) { + lv_color16_t *dest_buf_c16 = (lv_color16_t *) dest_buf_u16; + for (dest_x = 0, src_x = 0; dest_x < w; dest_x++, src_x += 4) { + switch (dsc->blend_mode) { + case LV_BLEND_MODE_ADDITIVE: + res = (LV_MIN(dest_buf_c16[dest_x].red + (src_buf_u8[src_x + 2] >> 3), 31)) << 11; + res += (LV_MIN(dest_buf_c16[dest_x].green + (src_buf_u8[src_x + 1] >> 2), 63)) << 5; + res += LV_MIN(dest_buf_c16[dest_x].blue + (src_buf_u8[src_x + 0] >> 3), 31); + break; + case LV_BLEND_MODE_SUBTRACTIVE: + res = (LV_MAX(dest_buf_c16[dest_x].red - (src_buf_u8[src_x + 2] >> 3), 0)) << 11; + res += (LV_MAX(dest_buf_c16[dest_x].green - (src_buf_u8[src_x + 1] >> 2), 0)) << 5; + res += LV_MAX(dest_buf_c16[dest_x].blue - (src_buf_u8[src_x + 0] >> 3), 0); + break; + case LV_BLEND_MODE_MULTIPLY: + res = ((dest_buf_c16[dest_x].red * (src_buf_u8[src_x + 2] >> 3)) >> 5) << 11; + res += ((dest_buf_c16[dest_x].green * (src_buf_u8[src_x + 1] >> 2)) >> 6) << 5; + res += (dest_buf_c16[dest_x].blue * (src_buf_u8[src_x + 0] >> 3)) >> 5; + break; + default: + LV_LOG_WARN("Not supported blend mode: %d", dsc->blend_mode); + return; + } + + if (mask_buf == NULL && opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], src_buf_u8[src_x + 3]); + } else if (mask_buf == NULL && opa < LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX2(opa, src_buf_u8[src_x + 3])); + } else { + if (opa >= LV_OPA_MAX) { + dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], mask_buf[dest_x]); + } else dest_buf_u16[dest_x] = lv_color_16_16_mix(res, dest_buf_u16[dest_x], LV_OPA_MIX3(mask_buf[dest_x], opa, + src_buf_u8[src_x + 3])); + } + } + + dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); + src_buf_u8 += src_stride; + if (mask_buf) { + mask_buf += mask_stride; + } + } + } +} + +static inline uint16_t LV_ATTRIBUTE_FAST_MEM l8_to_rgb565(const uint8_t c1) +{ + return ((c1 & 0xF8) << 8) + ((c1 & 0xFC) << 3) + ((c1 & 0xF8) >> 3); +} + +static inline uint16_t LV_ATTRIBUTE_FAST_MEM lv_color_8_16_mix(const uint8_t c1, uint16_t c2, uint8_t mix) +{ + + if (mix == 0) { + return c2; + } else if (mix == 255) { + return ((c1 & 0xF8) << 8) + ((c1 & 0xFC) << 3) + ((c1 & 0xF8) >> 3); + } else { + lv_opa_t mix_inv = 255 - mix; + + return ((((c1 >> 3) * mix + ((c2 >> 11) & 0x1F) * mix_inv) << 3) & 0xF800) + + ((((c1 >> 2) * mix + ((c2 >> 5) & 0x3F) * mix_inv) >> 3) & 0x07E0) + + (((c1 >> 3) * mix + (c2 & 0x1F) * mix_inv) >> 8); + } +} + +static inline uint16_t LV_ATTRIBUTE_FAST_MEM lv_color_24_16_mix(const uint8_t *c1, uint16_t c2, uint8_t mix) +{ + if (mix == 0) { + return c2; + } else if (mix == 255) { + return ((c1[2] & 0xF8) << 8) + ((c1[1] & 0xFC) << 3) + ((c1[0] & 0xF8) >> 3); + } else { + lv_opa_t mix_inv = 255 - mix; + + return ((((c1[2] >> 3) * mix + ((c2 >> 11) & 0x1F) * mix_inv) << 3) & 0xF800) + + ((((c1[1] >> 2) * mix + ((c2 >> 5) & 0x3F) * mix_inv) >> 3) & 0x07E0) + + (((c1[0] >> 3) * mix + (c2 & 0x1F) * mix_inv) >> 8); + } +} + +static inline void *LV_ATTRIBUTE_FAST_MEM drawbuf_next_row(const void *buf, uint32_t stride) +{ + return (void *)((uint8_t *)buf + stride); +} diff --git a/components/esp_lvgl_port/test_apps/simd/main/lv_fill_common.h b/components/esp_lvgl_port/test_apps/simd/main/lv_fill_common.h new file mode 100644 index 00000000..5243857e --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/lv_fill_common.h @@ -0,0 +1,73 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "esp_err.h" +#include +#include "lv_color.h" +#include "lv_draw_sw_blend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------- Macros and Types -------------------------------------------------- + +/** + * @brief Functionality test combinations + */ +typedef struct { + unsigned int min_w; // Minimum width of the test array + unsigned int min_h; // Minimum height of the test array + unsigned int max_w; // Maximum width of the test array + unsigned int max_h; // Maximum height of the test array + unsigned int min_unalign_byte; // Minimum amount of unaligned bytes of the test array + unsigned int max_unalign_byte; // Maximum amount of unaligned bytes of the test array + unsigned int unalign_step; // Increment step in bytes unalignment of the test array + unsigned int dest_stride_step; // Increment step in destination stride of the test array + unsigned int test_combinations_count; // Count of fest combinations +} test_matrix_params_t; + +/** + * @brief Functionality test case parameters + */ +typedef struct { + struct { + void *p_asm; // pointer to the working ASM test buf + void *p_ansi; // pointer to the working ANSI test buf + void *p_asm_alloc; // pointer to the beginning of the memory allocated for ASM test buf, used in free() + void *p_ansi_alloc; // pointer to the beginning of the memory allocated for ANSI test buf, used in free() + } buf; + void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function + lv_color_format_t color_format; // LV color format + size_t data_type_size; // Used data type size, eg sizeof() + size_t active_buf_len; // Length of buffer, where the actual data are stored (not including Canary bytes) + size_t total_buf_len; // Total length of buffer (including Canary bytes) + unsigned int dest_w; // Destination buffer width + unsigned int dest_h; // Destination buffer height + unsigned int dest_stride; // Destination buffer stride + unsigned int unalign_byte; // Destination buffer memory unalignment +} func_test_case_params_t; + +/** + * @brief Benchmark test case parameters + */ +typedef struct { + unsigned int height; // Test array height + unsigned int width; // Test array width + unsigned int stride; // Test array stride + unsigned int cc_height; // Corner case test array height + unsigned int cc_width; // Corner case test array width + unsigned int benchmark_cycles; // Count of benchmark cycles + void *array_align16; // test array with 16 byte alignment - testing most ideal case + void *array_align1; // test array with 1 byte alignment - testing wort case + void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function +} bench_test_case_params_t; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif diff --git a/components/esp_lvgl_port/test_apps/simd/main/test_app_main.c b/components/esp_lvgl_port/test_apps/simd/main/test_app_main.c new file mode 100644 index 00000000..c02cc997 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/test_app_main.c @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "unity.h" +#include "unity_test_utils.h" +#include "lv_fill_common.h" + +#define TEST_MEMORY_LEAK_THRESHOLD (300) + +void app_main(void) +{ + + // ______ _____ ______ _ _ + // | _ \/ ___|| ___ \ | | | | + // | | | |\ `--. | |_/ / | |_ ___ ___ | |_ + // | | | | `--. \| __/ | __| / _ \/ __|| __| + // | |/ / /\__/ /| | | |_ | __/\__ \| |_ + // |___/ \____/ \_| \__| \___||___/ \__| + + printf("______ _____ ______ _ _ \r\n"); + printf("| _ \\/ ___|| ___ \\ | | | | \r\n"); + printf("| | | |\\ `--. | |_/ / | |_ ___ ___ | |_ \r\n"); + printf("| | | | `--. \\| __/ | __| / _ \\/ __|| __|\r\n"); + printf("| |/ / /\\__/ /| | | |_ | __/\\__ \\| |_ \r\n"); + printf("|___/ \\____/ \\_| \\__| \\___||___/ \\__|\r\n"); + + + UNITY_BEGIN(); + unity_run_menu(); + UNITY_END(); +} + +/* setUp runs before every test */ +void setUp(void) +{ + // Check for memory leaks + unity_utils_set_leak_level(TEST_MEMORY_LEAK_THRESHOLD); + unity_utils_record_free_mem(); +} + +/* tearDown runs after every test */ +void tearDown(void) +{ + // Evaluate memory leaks + unity_utils_evaluate_leaks(); +} diff --git a/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_benchmark.c b/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_benchmark.c new file mode 100644 index 00000000..85935985 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_benchmark.c @@ -0,0 +1,176 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include "unity.h" +#include "esp_log.h" +#include "freertos/FreeRTOS.h" // for xthal_get_ccount() +#include "lv_fill_common.h" +#include "lv_draw_sw_blend.h" +#include "lv_draw_sw_blend_to_argb8888.h" +#include "lv_draw_sw_blend_to_rgb565.h" + +#define WIDTH 128 +#define HEIGHT 128 +#define STRIDE WIDTH +#define UNALIGN_BYTES 1 +#define BENCHMARK_CYCLES 1000 + +// ------------------------------------------------- Macros and Types -------------------------------------------------- + +static const char *TAG_LV_FILL_BENCH = "LV Fill Benchmark"; +static const char *asm_ansi_func[] = {"ASM", "ANSI"}; +static lv_color_t test_color = { + .blue = 0x56, + .green = 0x34, + .red = 0x12, +}; + +// ------------------------------------------------ Static function headers -------------------------------------------- + +/** + * @brief Initialize the benchmark test + */ +static void lv_fill_benchmark_init(bench_test_case_params_t *test_params); + +/** + * @brief Run the benchmark test + */ +static float lv_fill_benchmark_run(bench_test_case_params_t *test_params, _lv_draw_sw_blend_fill_dsc_t *dsc); + +// ------------------------------------------------ Test cases --------------------------------------------------------- + +/* +Benchmark tests + +Requires: + - To pass functionality tests first + +Purpose: + - Test that an acceleration is achieved by an assembly implementation of LVGL blending API + +Procedure: + - Initialize input parameters (test array length, width, allocate array...) of the benchmark test + - Run assembly version of LVGL blending API multiple times (1000-times or so) + - Firstly use an input test parameters for the most ideal case (16-byte aligned array, array width and height divisible by 4 for ARGB8888 color format) + - Then use worst-case input test parameters (1-byte aligned array, array width and height NOT divisible by 4 for ARGB8888 color format) + - Count how many CPU cycles does it take to run a function from the LVGL blending API for each case (ideal and worst case) + - Run ansi version of LVGL blending API multiple times (1000-times or so) and repeat the 2 above steps for the ansi version + - Free test arrays and structures needed for LVGL blending API +*/ +// ------------------------------------------------ Test cases stages -------------------------------------------------- + +TEST_CASE("LV Fill benchmark ARGB8888", "[fill][benchmark][ARGB8888]") +{ + uint32_t *dest_array_align16 = (uint32_t *)memalign(16, STRIDE * HEIGHT * sizeof(uint32_t) + UNALIGN_BYTES); + TEST_ASSERT_NOT_EQUAL(NULL, dest_array_align16); + + // Apply byte unalignment for the worst-case test scenario + uint32_t *dest_array_align1 = dest_array_align16 + UNALIGN_BYTES; + + bench_test_case_params_t test_params = { + .height = HEIGHT, + .width = WIDTH, + .stride = STRIDE * sizeof(uint32_t), + .cc_height = HEIGHT - 1, + .cc_width = WIDTH - 1, + .benchmark_cycles = BENCHMARK_CYCLES, + .array_align16 = (void *)dest_array_align16, + .array_align1 = (void *)dest_array_align1, + .blend_api_func = &lv_draw_sw_blend_color_to_argb8888, + }; + + ESP_LOGI(TAG_LV_FILL_BENCH, "running test for ARGB8888 color format"); + lv_fill_benchmark_init(&test_params); + free(dest_array_align16); +} + +TEST_CASE("LV Fill benchmark RGB565", "[fill][benchmark][RGB565]") +{ + uint16_t *dest_array_align16 = (uint16_t *)memalign(16, STRIDE * HEIGHT * sizeof(uint16_t) + UNALIGN_BYTES); + TEST_ASSERT_NOT_EQUAL(NULL, dest_array_align16); + + // Apply byte unalignment for the worst-case test scenario + uint16_t *dest_array_align1 = dest_array_align16 + UNALIGN_BYTES; + + bench_test_case_params_t test_params = { + .height = HEIGHT, + .width = WIDTH, + .stride = STRIDE * sizeof(uint16_t), + .cc_height = HEIGHT - 1, + .cc_width = WIDTH - 1, + .benchmark_cycles = BENCHMARK_CYCLES, + .array_align16 = (void *)dest_array_align16, + .array_align1 = (void *)dest_array_align1, + .blend_api_func = &lv_draw_sw_blend_color_to_rgb565, + }; + + ESP_LOGI(TAG_LV_FILL_BENCH, "running test for RGB565 color format"); + lv_fill_benchmark_init(&test_params); + free(dest_array_align16); +} +// ------------------------------------------------ Static test functions ---------------------------------------------- + +static void lv_fill_benchmark_init(bench_test_case_params_t *test_params) +{ + // Init structure for LVGL blend API, to call the Assembly API + _lv_draw_sw_blend_fill_dsc_t dsc = { + .dest_buf = test_params->array_align16, + .dest_w = test_params->width, + .dest_h = test_params->height, + .dest_stride = test_params->stride, // stride * sizeof() + .mask_buf = NULL, + .color = test_color, + .opa = LV_OPA_MAX, + .use_asm = true, + }; + + // Init structure for LVGL blend API, to call the ANSI API + _lv_draw_sw_blend_fill_dsc_t dsc_cc = dsc; + dsc_cc.dest_buf = test_params->array_align1; + dsc_cc.dest_w = test_params->cc_width; + dsc_cc.dest_h = test_params->cc_height; + + // Run benchmark 2 times: + // First run using assembly, second run using ANSI + for (int i = 0; i < 2; i++) { + + // Run benchmark with the most ideal input parameters + // Dest array is 16 byte aligned, dest_w and dest_h are dividable by 4 + float cycles = lv_fill_benchmark_run(test_params, &dsc); // Call Benchmark cycle + float per_sample = cycles / ((float)(dsc.dest_w * dsc.dest_h)); + ESP_LOGI(TAG_LV_FILL_BENCH, " %s ideal case: %.3f cycles for %"PRIi32"x%"PRIi32" matrix, %.3f cycles per sample", asm_ansi_func[i], cycles, dsc.dest_w, dsc.dest_h, per_sample); + + // Run benchmark with the corner case input parameters + // Dest array is 1 byte aligned, dest_w and dest_h are not dividable by 4 + cycles = lv_fill_benchmark_run(test_params, &dsc_cc); // Call Benchmark cycle + per_sample = cycles / ((float)(dsc_cc.dest_w * dsc_cc.dest_h)); + ESP_LOGI(TAG_LV_FILL_BENCH, " %s corner case: %.3f cycles for %"PRIi32"x%"PRIi32" matrix, %.3f cycles per sample\n", asm_ansi_func[i], cycles, dsc_cc.dest_w, dsc_cc.dest_h, per_sample); + + // change to ANSI + dsc.use_asm = false; + dsc_cc.use_asm = false; + } +} + +static float lv_fill_benchmark_run(bench_test_case_params_t *test_params, _lv_draw_sw_blend_fill_dsc_t *dsc) +{ + // Call the DUT function for the first time to init the benchmark test + test_params->blend_api_func(dsc); + + const unsigned int start_b = xthal_get_ccount(); + for (int i = 0; i < test_params->benchmark_cycles; i++) { + test_params->blend_api_func(dsc); + } + const unsigned int end_b = xthal_get_ccount(); + + const float total_b = end_b - start_b; + const float cycles = total_b / (test_params->benchmark_cycles); + return cycles; +} diff --git a/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_functionality.c b/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_functionality.c new file mode 100644 index 00000000..972f8edf --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/main/test_lv_fill_functionality.c @@ -0,0 +1,311 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include "unity.h" +#include "esp_log.h" +#include "lv_fill_common.h" +#include "lv_draw_sw_blend.h" +#include "lv_draw_sw_blend_to_argb8888.h" +#include "lv_draw_sw_blend_to_rgb565.h" + +// ------------------------------------------------- Defines ----------------------------------------------------------- + +#define DBG_PRINT_OUTPUT false +#define CANARY_BYTES 4 + +// ------------------------------------------------- Macros and Types -------------------------------------------------- + +#define UPDATE_TEST_CASE(test_case_ptr, dest_w, dest_h, dest_stride, unalign_byte) ({ \ + (test_case_ptr)->active_buf_len = (size_t)(dest_h * dest_stride); \ + (test_case_ptr)->total_buf_len = (size_t)((dest_h * dest_stride) + (CANARY_BYTES * 2)); \ + (test_case_ptr)->dest_w = (dest_w); \ + (test_case_ptr)->dest_h = (dest_h); \ + (test_case_ptr)->dest_stride = (dest_stride); \ + (test_case_ptr)->unalign_byte = (unalign_byte); \ +}) + +static const char *TAG_LV_FILL_FUNC = "LV Fill Functionality"; +static char test_msg_buf[128]; + +static lv_color_t test_color = { + .blue = 0x56, + .green = 0x34, + .red = 0x12, +}; + +// ------------------------------------------------ Static function headers -------------------------------------------- + +/** + * @brief Generate all the functionality test combinations + * + * - generate functionality test combinations, based on the provided test_matrix struct + * + * @param[in] test_matrix Pointer to structure defining test matrix - all the test combinations + * @param[in] test_case Pointer ot structure defining functionality test case + */ +static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case); + +/** + * @brief Fill test buffers for functionality test + * + * @param[in] test_case Pointer ot structure defining functionality test case + */ +static void fill_test_bufs(func_test_case_params_t *test_case); + +/** + * @brief The actual functionality test + * + * - function prepares structures for functionality testing and runs the LVGL API + * + * @param[in] test_case Pointer ot structure defining functionality test case + */ +static void lv_fill_functionality(func_test_case_params_t *test_case); + +/** + * @brief Evaluate results for 32bit data length + * + * @param[in] test_case Pointer ot structure defining functionality test case + */ +static void test_eval_32bit_data(func_test_case_params_t *test_case); + +/** + * @brief Evaluate results for 16bit data length + * + * @param[in] test_case Pointer ot structure defining functionality test case + */ +static void test_eval_16bit_data(func_test_case_params_t *test_case); + +// ------------------------------------------------ Test cases --------------------------------------------------------- + +/* +Functionality tests + +Purpose: + - Test that an assembly version of LVGL blending API achieves the same results as the ANSI version + +Procedure: + - Prepare testing matrix, to cover all the possible combinations of destination array widths, lengths, memory alignment... + - Run assembly version of the LVGL blending API + - Run ANSI C version of the LVGL blending API + - Compare the results + - Repeat above 3 steps for each test matrix setup +*/ + +// ------------------------------------------------ Test cases stages -------------------------------------------------- + +TEST_CASE("Test fill functionality ARGB8888", "[fill][functionality][ARGB8888]") +{ + test_matrix_params_t test_matrix = { + .min_w = 8, // 8 is the lower limit for the esp32s3 asm implementation, otherwise esp32 is executed + .min_h = 1, + .max_w = 16, + .max_h = 16, + .min_unalign_byte = 0, + .max_unalign_byte = 16, + .unalign_step = 1, + .dest_stride_step = 1, + .test_combinations_count = 0, + }; + + func_test_case_params_t test_case = { + .blend_api_func = &lv_draw_sw_blend_color_to_argb8888, + .color_format = LV_COLOR_FORMAT_ARGB8888, + .data_type_size = sizeof(uint32_t), + }; + + ESP_LOGI(TAG_LV_FILL_FUNC, "running test for ARGB8888 color format"); + functionality_test_matrix(&test_matrix, &test_case); +} + +TEST_CASE("Test fill functionality RGB565", "[fill][functionality][RGB565]") +{ + test_matrix_params_t test_matrix = { + .min_w = 8, // 8 is the lower limit for the esp32s3 asm implementation, otherwise esp32 is executed + .min_h = 1, + .max_w = 16, + .max_h = 16, + .min_unalign_byte = 0, + .max_unalign_byte = 16, + .unalign_step = 1, + .dest_stride_step = 1, + .test_combinations_count = 0, + }; + + func_test_case_params_t test_case = { + .blend_api_func = &lv_draw_sw_blend_color_to_rgb565, + .color_format = LV_COLOR_FORMAT_RGB565, + .data_type_size = sizeof(uint16_t), + }; + + ESP_LOGI(TAG_LV_FILL_FUNC, "running test for RGB565 color format"); + functionality_test_matrix(&test_matrix, &test_case); +} + +// ------------------------------------------------ Static test functions ---------------------------------------------- + +static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case) +{ + // Step destination array width + for (int dest_w = test_matrix->min_w; dest_w <= test_matrix->max_w; dest_w++) { + + // Step destination array height + for (int dest_h = test_matrix->min_h; dest_h <= test_matrix->max_h; dest_h++) { + + // Step destination array stride + for (int dest_stride = dest_w; dest_stride <= dest_w * 2; dest_stride += test_matrix->dest_stride_step) { + + // Step destination array unalignment + for (int unalign_byte = test_matrix->min_unalign_byte; unalign_byte <= test_matrix->max_unalign_byte; unalign_byte += test_matrix->unalign_step) { + + // Call functionality test + UPDATE_TEST_CASE(test_case, dest_w, dest_h, dest_stride, unalign_byte); + lv_fill_functionality(test_case); + test_matrix->test_combinations_count++; + } + } + } + } + ESP_LOGI(TAG_LV_FILL_FUNC, "test combinations: %d\n", test_matrix->test_combinations_count); +} + +static void lv_fill_functionality(func_test_case_params_t *test_case) +{ + fill_test_bufs(test_case); + + // Init structure for LVGL blend API, to call the Assembly API + _lv_draw_sw_blend_fill_dsc_t dsc_asm = { + .dest_buf = test_case->buf.p_asm, + .dest_w = test_case->dest_w, + .dest_h = test_case->dest_h, + .dest_stride = test_case->dest_stride * test_case->data_type_size, // stride * sizeof() + .mask_buf = NULL, + .color = test_color, + .opa = LV_OPA_MAX, + .use_asm = true, + }; + + // Init structure for LVGL blend API, to call the ANSI API + _lv_draw_sw_blend_fill_dsc_t dsc_ansi = dsc_asm; + dsc_ansi.dest_buf = test_case->buf.p_ansi; + dsc_ansi.use_asm = false; + + test_case->blend_api_func(&dsc_asm); // Call the LVGL API with Assembly code + test_case->blend_api_func(&dsc_ansi); // Call the LVGL API with ANSI code + + // Shift array pointers by Canary Bytes amount back + test_case->buf.p_asm -= CANARY_BYTES * test_case->data_type_size; + test_case->buf.p_ansi -= CANARY_BYTES * test_case->data_type_size; + + // Evaluate the results + sprintf(test_msg_buf, "Test case: dest_w = %d, dest_h = %d, dest_stride = %d, unalign_byte = %d\n", test_case->dest_w, test_case->dest_h, test_case->dest_stride, test_case->unalign_byte); + + switch (test_case->color_format) { + case LV_COLOR_FORMAT_ARGB8888: { + test_eval_32bit_data(test_case); + break; + } + + case LV_COLOR_FORMAT_RGB565: { + test_eval_16bit_data(test_case); + break; + } + + default: + TEST_ASSERT_MESSAGE(false, "LV Color format not found"); + } + + free(test_case->buf.p_asm_alloc); + free(test_case->buf.p_ansi_alloc); + +} + +static void fill_test_bufs(func_test_case_params_t *test_case) +{ + const size_t data_type_size = test_case->data_type_size; // sizeof() of used data type + const size_t total_buf_len = test_case->total_buf_len; // Total buffer length, data part of the buffer including the Canary bytes + const size_t active_buf_len = test_case->active_buf_len; // Length of buffer + const unsigned int unalign_byte = test_case->unalign_byte; + + // Allocate destination arrays for Assembly and ANSI LVGL Blend API + void *mem_asm = memalign(16, (total_buf_len * data_type_size) + unalign_byte); + void *mem_ansi = memalign(16, (total_buf_len * data_type_size) + unalign_byte); + TEST_ASSERT_NOT_NULL_MESSAGE(mem_asm, "Lack of memory"); + TEST_ASSERT_NOT_NULL_MESSAGE(mem_ansi, "Lack of memory"); + + // Save a pointer to the beginning of the allocated memory which will be used to free() + test_case->buf.p_asm_alloc = mem_asm; + test_case->buf.p_ansi_alloc = mem_ansi; + + // Apply destination array unalignment + uint8_t *dest_buf_asm = (uint8_t *)mem_asm + unalign_byte; + uint8_t *dest_buf_ansi = (uint8_t *)mem_ansi + unalign_byte; + + // Set the whole buffer to 0, including the Canary bytes part + memset(dest_buf_asm, 0, total_buf_len * data_type_size); + memset(dest_buf_ansi, 0, total_buf_len * data_type_size); + + // Fill the actual part of the destination buffers with known values, + // Values must be same, because of the stride + for (int i = CANARY_BYTES; i < active_buf_len + CANARY_BYTES; i++) { + dest_buf_asm[i * data_type_size] = (uint8_t)(i % 255); + dest_buf_ansi[i * data_type_size] = (uint8_t)(i % 255); + } + + // Shift array pointers by Canary Bytes amount + dest_buf_asm += CANARY_BYTES * data_type_size; + dest_buf_ansi += CANARY_BYTES * data_type_size; + + // Save a pointer to the working part of the memory, where the test data are stored + test_case->buf.p_asm = (void *)dest_buf_asm; + test_case->buf.p_ansi = (void *)dest_buf_ansi; +} + +static void test_eval_32bit_data(func_test_case_params_t *test_case) +{ + // Print results 32bit data +#if DBG_PRINT_OUTPUT + for (uint32_t i = 0; i < test_case->total_buf_len; i++) { + printf("dest_buf[%"PRIi32"] %s ansi = %8"PRIx32" \t asm = %8"PRIx32" \n", i, ((i < 10) ? (" ") : ("")), ((uint32_t *)test_case->buf.p_ansi)[i], ((uint32_t *)test_case->buf.p_asm)[i]); + } + printf("\n"); +#endif + + // Canary bytes area must stay 0 + TEST_ASSERT_EACH_EQUAL_UINT32_MESSAGE(0, (uint32_t *)test_case->buf.p_ansi, CANARY_BYTES, test_msg_buf); + TEST_ASSERT_EACH_EQUAL_UINT32_MESSAGE(0, (uint32_t *)test_case->buf.p_asm, CANARY_BYTES, test_msg_buf); + + // dest_buf_asm and dest_buf_ansi must be equal + TEST_ASSERT_EQUAL_UINT32_ARRAY_MESSAGE((uint32_t *)test_case->buf.p_asm + CANARY_BYTES, (uint32_t *)test_case->buf.p_ansi + CANARY_BYTES, test_case->active_buf_len, test_msg_buf); + + // Canary bytes area must stay 0 + TEST_ASSERT_EACH_EQUAL_UINT32_MESSAGE(0, (uint32_t *)test_case->buf.p_ansi + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); + TEST_ASSERT_EACH_EQUAL_UINT32_MESSAGE(0, (uint32_t *)test_case->buf.p_asm + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); +} + +static void test_eval_16bit_data(func_test_case_params_t *test_case) +{ + // Print results, 16bit data +#if DBG_PRINT_OUTPUT + for (uint32_t i = 0; i < test_case->total_buf_len; i++) { + printf("dest_buf[%"PRIi32"] %s ansi = %8"PRIx16" \t asm = %8"PRIx16" \n", i, ((i < 10) ? (" ") : ("")), ((uint16_t *)test_case->buf.p_ansi)[i], ((uint16_t *)test_case->buf.p_asm)[i]); + } + printf("\n"); +#endif + + // Canary bytes area must stay 0 + TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_ansi, CANARY_BYTES, test_msg_buf); + TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_asm, CANARY_BYTES, test_msg_buf); + + // dest_buf_asm and dest_buf_ansi must be equal + TEST_ASSERT_EQUAL_UINT16_ARRAY_MESSAGE((uint16_t *)test_case->buf.p_asm + CANARY_BYTES, (uint16_t *)test_case->buf.p_ansi + CANARY_BYTES, test_case->active_buf_len, test_msg_buf); + + // Canary bytes area must stay 0 + TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_ansi + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); + TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_asm + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); +} diff --git a/components/esp_lvgl_port/test_apps/simd/sdkconfig.defaults b/components/esp_lvgl_port/test_apps/simd/sdkconfig.defaults new file mode 100644 index 00000000..384a6053 --- /dev/null +++ b/components/esp_lvgl_port/test_apps/simd/sdkconfig.defaults @@ -0,0 +1,3 @@ +CONFIG_ESP_TASK_WDT=n +CONFIG_OPTIMIZATION_LEVEL_RELEASE=y +CONFIG_COMPILER_OPTIMIZATION_PERF=y \ No newline at end of file