lavc/g722dsp: optimise R-V V apply_qmf

This stores the constant coefficients deinterleaved, so that they can be loaded directly with NF=0. Unfortunately, we cannot optimise loading the input, due to insufficient memory alignment (not 32-bit). Before: g722_apply_qmf_c: 82.5 g722_apply_qmf_rvv_i32: 78.2 After: g722_apply_qmf_c: 82.5 g722_apply_qmf_rvv_i32: 65.2
2023-11-19 14:49:28 +02:00 · 2023-11-19 14:49:28 +02:00 · b88d4058f9
commit b88d4058f9
parent e33ce0d9dd
1 changed files with 13 additions and 11 deletions
--- a/libavcodec/riscv/g722dsp_rvv.S
+++ b/libavcodec/riscv/g722dsp_rvv.S
@ -24,7 +24,9 @@ func ff_g722_apply_qmf_rvv, zve32x
        lla         t0, qmf_coeffs
        vsetivli    zero, 12, e16, m2, ta, ma
        vlseg2e16.v v28, (a0)
-        vlseg2e16.v v24, (t0)
+        addi        t1, t0, 12 * 2
        vle16.v     v24, (t0)
        vle16.v     v26, (t1)
        vwmul.vv    v16, v28, v24
        vwmul.vv    v20, v30, v26
        vsetivli    zero, 12, e32, m4, ta, ma
@ -41,26 +43,26 @@ endfunc
 const qmf_coeffs, align=2
        .short     3
        .short   -11
        .short   -11
        .short    53
        .short    12
        .short  -156
        .short    32
        .short   362
        .short  -210
        .short  -805
        .short   951
        .short  3876
        .short  3876
        .short   951
        .short  -805
        .short  -210
        .short   362
        .short    32
        .short  -156
        .short    12
        .short    53
        .short   -11
        .short   -11
        .short    53
        .short  -156
        .short   362
        .short  -805
        .short  3876
        .short   951
        .short  -210
        .short    32
        .short    12
        .short   -11
        .short     3
 endconst