dcadsp: add int8x8_fmul_int32 to dsp context
It is currently declared as a macro who is set to inlinable functions, among which a Neon and a default C implementations. Add a DSP parameter to each inline function, unused except by the default C implementation which calls a function from the DSP context. On an Arrandale CPU, gain for an inlined SSE2 function vs. a call: - Win32: 29 to 26 cycles - Win64: 25 to 23 cycles Signed-off-by: Janne Grunau <janne-libav@jannau.net>
This commit is contained in:
		
							parent
							
								
									e3fec3f095
								
							
						
					
					
						commit
						2bd44cb705
					
				@ -83,7 +83,8 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
 | 
			
		||||
#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
 | 
			
		||||
 | 
			
		||||
#define int8x8_fmul_int32 int8x8_fmul_int32
 | 
			
		||||
static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
 | 
			
		||||
static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
 | 
			
		||||
                                     float *dst, const int8_t *src, int scale)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ ("vcvt.f32.s32 %2,  %2,  #4         \n"
 | 
			
		||||
             "vld1.8       {d0},     [%1,:64]   \n"
 | 
			
		||||
 | 
			
		||||
@ -1086,12 +1086,10 @@ static const uint8_t abits_sizes[7]  = { 7, 10, 12, 13, 15, 17, 19 };
 | 
			
		||||
static const uint8_t abits_levels[7] = { 3,  5,  7,  9, 13, 17, 25 };
 | 
			
		||||
 | 
			
		||||
#ifndef int8x8_fmul_int32
 | 
			
		||||
static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
 | 
			
		||||
static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst,
 | 
			
		||||
                                     const int8_t *src, int scale)
 | 
			
		||||
{
 | 
			
		||||
    float fscale = scale / 16.0;
 | 
			
		||||
    int i;
 | 
			
		||||
    for (i = 0; i < 8; i++)
 | 
			
		||||
        dst[i] = src[i] * fscale;
 | 
			
		||||
    dsp->int8x8_fmul_int32(dst, src, scale);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -1219,7 +1217,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 | 
			
		||||
                s->debug_flag |= 0x01;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            int8x8_fmul_int32(subband_samples[k][l],
 | 
			
		||||
            int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l],
 | 
			
		||||
                              &high_freq_vq[hfvq][subsubframe * 8],
 | 
			
		||||
                              s->scale_factor[k][l][0]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@ -24,6 +24,14 @@
 | 
			
		||||
#include "libavutil/intreadwrite.h"
 | 
			
		||||
#include "dcadsp.h"
 | 
			
		||||
 | 
			
		||||
static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
 | 
			
		||||
{
 | 
			
		||||
    float fscale = scale / 16.0;
 | 
			
		||||
    int i;
 | 
			
		||||
    for (i = 0; i < 8; i++)
 | 
			
		||||
        dst[i] = src[i] * fscale;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
 | 
			
		||||
                          int decifactor, float scale)
 | 
			
		||||
{
 | 
			
		||||
@ -78,5 +86,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
 | 
			
		||||
{
 | 
			
		||||
    s->lfe_fir = dca_lfe_fir_c;
 | 
			
		||||
    s->qmf_32_subbands = dca_qmf_32_subbands;
 | 
			
		||||
    s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
 | 
			
		||||
    if (ARCH_ARM) ff_dcadsp_init_arm(s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,7 @@ typedef struct DCADSPContext {
 | 
			
		||||
                            int *synth_buf_offset, float synth_buf2[32],
 | 
			
		||||
                            const float window[512], float *samples_out,
 | 
			
		||||
                            float raXin[32], float scale);
 | 
			
		||||
    void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale);
 | 
			
		||||
} DCADSPContext;
 | 
			
		||||
 | 
			
		||||
void ff_dcadsp_init(DCADSPContext *s);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user