lavc/audiodsp: rework RISC-V V scalar product
Take vector reduction out of the loop and unroll. Before: audiodsp.scalarproduct_int16_c: 12321.0 audiodsp.scalarproduct_int16_rvv_i32: 4175.7 After: audiodsp.scalarproduct_int16_c: 12320.5 audiodsp.scalarproduct_int16_rvv_i32: 1230.2
This commit is contained in:
parent
29b9d616c2
commit
44cac1def0
@ -21,21 +21,22 @@
|
||||
#include "libavutil/riscv/asm.S"
|
||||
|
||||
func ff_scalarproduct_int16_rvv, zve32x
|
||||
vsetivli zero, 1, e32, m1, ta, ma
|
||||
vmv.s.x v8, zero
|
||||
vsetvli t0, zero, e32, m8, ta, ma
|
||||
vmv.v.x v8, zero
|
||||
vmv.s.x v0, zero
|
||||
1:
|
||||
vsetvli t0, a2, e16, m1, ta, ma
|
||||
vsetvli t0, a2, e16, m4, tu, ma
|
||||
vle16.v v16, (a0)
|
||||
sub a2, a2, t0
|
||||
vle16.v v24, (a1)
|
||||
sh1add a0, t0, a0
|
||||
vwmul.vv v0, v16, v24
|
||||
vwmacc.vv v8, v16, v24
|
||||
sh1add a1, t0, a1
|
||||
vsetvli zero, t0, e32, m2, ta, ma
|
||||
vredsum.vs v8, v0, v8
|
||||
bnez a2, 1b
|
||||
|
||||
vmv.x.s a0, v8
|
||||
vsetvli t0, zero, e32, m8, ta, ma
|
||||
vredsum.vs v0, v8, v0
|
||||
vmv.x.s a0, v0
|
||||
ret
|
||||
endfunc
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user