lavc/audiodsp: rework RISC-V V scalar product

Take vector reduction out of the loop and unroll.

Before:
audiodsp.scalarproduct_int16_c: 12321.0
audiodsp.scalarproduct_int16_rvv_i32: 4175.7

After:
audiodsp.scalarproduct_int16_c: 12320.5
audiodsp.scalarproduct_int16_rvv_i32: 1230.2
This commit is contained in:
Rémi Denis-Courmont 2023-07-17 20:46:06 +03:00
parent 29b9d616c2
commit 44cac1def0

View File

@ -21,21 +21,22 @@
#include "libavutil/riscv/asm.S"
func ff_scalarproduct_int16_rvv, zve32x
vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v8, zero
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
1:
vsetvli t0, a2, e16, m1, ta, ma
vsetvli t0, a2, e16, m4, tu, ma
vle16.v v16, (a0)
sub a2, a2, t0
vle16.v v24, (a1)
sh1add a0, t0, a0
vwmul.vv v0, v16, v24
vwmacc.vv v8, v16, v24
sh1add a1, t0, a1
vsetvli zero, t0, e32, m2, ta, ma
vredsum.vs v8, v0, v8
bnez a2, 1b
vmv.x.s a0, v8
vsetvli t0, zero, e32, m8, ta, ma
vredsum.vs v0, v8, v0
vmv.x.s a0, v0
ret
endfunc