diff --git a/v-spec.adoc b/v-spec.adoc index d9baec2..21d205d 100644 --- a/v-spec.adoc +++ b/v-spec.adoc @@ -1331,21 +1331,22 @@ throughput on mixed-width operations in a single loop. # a0 holds the total number of elements to process # a1 holds the address of the source array # a2 holds the address of the destination array +# a3 holds the multiplier for the widening multiplication loop: - vsetvli a3, a0, e16, m4, ta, ma # vtype = 16-bit integer vectors; - # also update a3 with vl (# of elements this iteration) + vsetvli t0, a0, e16, m4, ta, ma # vtype = 16-bit integer vectors; + # also update t0 with vl (# of elements this iteration) vle16.v v4, (a1) # Get 16b vector - slli t1, a3, 1 # Multiply # elements this iteration by 2 bytes/source element + slli t1, t0, 1 # Multiply # elements this iteration by 2 bytes/source element add a1, a1, t1 # Bump pointer - vwmul.vx v8, v4, x10 # Widening multiply into 32b in + vwmul.vx v8, v4, a3 # Widening multiply into 32b in vsetvli x0, x0, e32, m8, ta, ma # Operate on 32b values vsrl.vi v8, v8, 3 vse32.v v8, (a2) # Store vector of 32b elements - slli t1, a3, 2 # Multiply # elements this iteration by 4 bytes/destination element + slli t1, t0, 2 # Multiply # elements this iteration by 4 bytes/destination element add a2, a2, t1 # Bump pointer - sub a0, a0, a3 # Decrement count by vl + sub a0, a0, t0 # Decrement count by vl bnez a0, loop # Any more? ----