lavc/g722dsp: optimise R-V V apply_qmf

This stores the constant coefficients deinterleaved, so that they can be
loaded directly with NF=0. Unfortunately, we cannot optimise loading the
input, due to insufficient memory alignment (not 32-bit).

Before:
g722_apply_qmf_c:       82.5
g722_apply_qmf_rvv_i32: 78.2

After:
g722_apply_qmf_c:       82.5
g722_apply_qmf_rvv_i32: 65.2
This commit is contained in:
Rémi Denis-Courmont 2023-11-19 14:49:28 +02:00
parent e33ce0d9dd
commit b88d4058f9

View File

@ -24,7 +24,9 @@ func ff_g722_apply_qmf_rvv, zve32x
lla t0, qmf_coeffs lla t0, qmf_coeffs
vsetivli zero, 12, e16, m2, ta, ma vsetivli zero, 12, e16, m2, ta, ma
vlseg2e16.v v28, (a0) vlseg2e16.v v28, (a0)
vlseg2e16.v v24, (t0) addi t1, t0, 12 * 2
vle16.v v24, (t0)
vle16.v v26, (t1)
vwmul.vv v16, v28, v24 vwmul.vv v16, v28, v24
vwmul.vv v20, v30, v26 vwmul.vv v20, v30, v26
vsetivli zero, 12, e32, m4, ta, ma vsetivli zero, 12, e32, m4, ta, ma
@ -41,26 +43,26 @@ endfunc
const qmf_coeffs, align=2 const qmf_coeffs, align=2
.short 3 .short 3
.short -11 .short -11
.short -11
.short 53
.short 12 .short 12
.short -156
.short 32 .short 32
.short 362
.short -210 .short -210
.short -805
.short 951 .short 951
.short 3876 .short 3876
.short 3876
.short 951
.short -805 .short -805
.short -210
.short 362 .short 362
.short 32
.short -156 .short -156
.short 12
.short 53 .short 53
.short -11 .short -11
.short -11 .short -11
.short 53
.short -156
.short 362
.short -805
.short 3876
.short 951
.short -210
.short 32
.short 12
.short -11
.short 3 .short 3
endconst endconst