From 7528308d65be23375658d179e200d3de50375a77 Mon Sep 17 00:00:00 2001
From: Hiroshi Hatake <hiroshi@chronosphere.io>
Date: Wed, 18 Dec 2024 11:00:02 +0900
Subject: [PATCH] simd: utils: Handle flexibility of RVV vlen

RISC-V Vector extension has a flexibility for the vlen length of it.
vlen is able to handle power of two.

Signed-off-by: Hiroshi Hatake <hiroshi@chronosphere.io>
---
 include/fluent-bit/flb_simd.h | 31 +++++++++++++++++++++----------
 src/flb_utils.c               |  2 +-
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/fluent-bit/flb_simd.h b/include/fluent-bit/flb_simd.h
index a0fa3f3b32b..d84a3f4b307 100644
--- a/include/fluent-bit/flb_simd.h
+++ b/include/fluent-bit/flb_simd.h
@@ -77,8 +77,13 @@ typedef uint32x4_t flb_vector32;
 typedef vuint8m1_t flb_vector8;
 typedef vuint32m1_t flb_vector32;
 
-/* Currently, VLEN is assumed to 128. */
-#define RVV_VEC_INST_LEN (128 / 8) /* 16 */
+#ifdef (FLB_RVV_VLEN)
+#define RVV_VEC8_INST_LEN  (FLB_RVV_VLEN / 8)
+#define RVV_VEC32_INST_LEN (FLB_RVV_VLEN / 8 / 4)
+#else
+#define RVV_VEC8_INST_LEN  (128 / 8)     /* 16 */
+#define RVV_VEC32_INST_LEN (128 / 8 / 4) /*  4 */
+#endif
 
 #else
 /*
@@ -116,7 +121,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s)
 #elif defined(FLB_SIMD_NEON)
 	*v = vld1q_u8(s);
 #elif defined(FLB_SIMD_RVV)
-	*v = __riscv_vle8_v_u8m1(s, 16);
+	*v = __riscv_vle8_v_u8m1(s, RVV_VEC8_INST_LEN);
 #else
 	memset(v, 0, sizeof(flb_vector8));
 #endif
@@ -153,7 +158,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto
 #elif defined(FLB_SIMD_NEON)
 	return vqsubq_u8(v1, v2);
 #elif defined(FLB_SIMD_RVV)
-	return __riscv_vssubu_vv_u8m1(v1, v2, 16);
+	return __riscv_vssubu_vv_u8m1(v1, v2, RVV_VEC8_INST_LEN);
 #endif
 }
 #endif /* ! FLB_SIMD_NONE */
@@ -170,8 +175,10 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8
 #elif defined(FLB_SIMD_NEON)
 	return vceqq_u8(v1, v2);
 #elif defined(FLB_SIMD_RVV)
-	vbool8_t ret = __riscv_vmseq_vv_u8m1_b8(v1, v2, 16);
-	return __riscv_vmerge_vvm_u8m1(__riscv_vmv_v_x_u8m1(0, 16), __riscv_vmv_v_x_u8m1(UINT8_MAX, 16), ret, 16);
+	vbool8_t ret = __riscv_vmseq_vv_u8m1_b8(v1, v2, RVV_VEC8_INST_LEN);
+	return __riscv_vmerge_vvm_u8m1(__riscv_vmv_v_x_u8m1(0, RVV_VEC8_INST_LEN),
+								   __riscv_vmv_v_x_u8m1(UINT8_MAX, RVV_VEC8_INST_LEN),
+								   ret, RVV_VEC8_INST_LEN);
 #endif
 }
 #endif /* ! FLB_SIMD_NONE */
@@ -184,8 +191,10 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect
 #elif defined(FLB_SIMD_NEON)
 	return vceqq_u32(v1, v2);
 #elif defined(FLB_SIMD_RVV)
-	vbool32_t ret = __riscv_vmseq_vv_u32m1_b32(v1, v2, 4);
-	return __riscv_vmerge_vvm_u32m1(__riscv_vmv_v_x_u32m1(0, 4), __riscv_vmv_v_x_u32m1(UINT32_MAX, 4), ret, 4);
+	vbool32_t ret = __riscv_vmseq_vv_u32m1_b32(v1, v2, RVV_VEC32_INST_LEN);
+	return __riscv_vmerge_vvm_u32m1(__riscv_vmv_v_x_u32m1(0, RVV_VEC32_INST_LEN),
+									__riscv_vmv_v_x_u32m1(UINT32_MAX, RVV_VEC32_INST_LEN),
+									ret, RVV_VEC32_INST_LEN);
 #endif
 }
 #endif /* ! FLB_SIMD_NONE */
@@ -200,7 +209,7 @@ static inline flb_vector8 flb_vector8_broadcast(const uint8_t c)
 #elif defined(FLB_SIMD_NEON)
 	return vdupq_n_u8(c);
 #elif defined(FLB_SIMD_RVV)
-	return __riscv_vmv_v_x_u8m1(c, 16);
+	return __riscv_vmv_v_x_u8m1(c, RVV_VEC8_INST_LEN);
 #else
 	return ~UINT64CONST(0) / 0xFF * c;
 #endif
@@ -216,7 +225,9 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v)
 #elif defined(FLB_SIMD_NEON)
 	return vmaxvq_u8(v) > 0x7F;
 #elif defined(FLB_SIMD_RVV)
-	return __riscv_vmv_x_s_u8m1_u8(__riscv_vredmaxu_vs_u8m1_u8m1(v, __riscv_vmv_v_x_u8m1(0, 16), 16));
+	return __riscv_vmv_x_s_u8m1_u8(__riscv_vredmaxu_vs_u8m1_u8m1(v,
+																 __riscv_vmv_v_x_u8m1(0, RVV_VEC8_INST_LEN),
+																 RVV_VEC8_INST_LEN));
 #else
 	return v & flb_vector8_broadcast(0x80);
 #endif
diff --git a/src/flb_utils.c b/src/flb_utils.c
index 7ba7da534c5..20d09b04d5c 100644
--- a/src/flb_utils.c
+++ b/src/flb_utils.c
@@ -802,7 +802,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_
     char tmp[16];
     char *p;
 #if defined(FLB_SIMD_RVV)
-    const size_t inst_len = RVV_VEC_INST_LEN;
+    const size_t inst_len = RVV_VEC8_INST_LEN;
 #else
     const size_t inst_len = sizeof(flb_vector8);
 #endif