x86/ac3dsp: add ff_float_to_fixed24_avx()
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
							parent
							
								
									d8b1a34433
								
							
						
					
					
						commit
						e40ea9f34b
					
				@ -47,9 +47,9 @@ typedef struct AC3DSPContext {
 | 
			
		||||
     * [-(1<<24),(1<<24)]
 | 
			
		||||
     *
 | 
			
		||||
     * @param dst destination array of int32_t.
 | 
			
		||||
     *            constraints: 16-byte aligned
 | 
			
		||||
     *            constraints: 32-byte aligned
 | 
			
		||||
     * @param src source array of float.
 | 
			
		||||
     *            constraints: 16-byte aligned
 | 
			
		||||
     *            constraints: 32-byte aligned
 | 
			
		||||
     * @param len number of elements to convert.
 | 
			
		||||
     *            constraints: multiple of 32 greater than zero
 | 
			
		||||
     */
 | 
			
		||||
 | 
			
		||||
@ -110,9 +110,9 @@ static void apply_mdct(AC3EncodeContext *s)
 | 
			
		||||
 */
 | 
			
		||||
static void apply_channel_coupling(AC3EncodeContext *s)
 | 
			
		||||
{
 | 
			
		||||
    LOCAL_ALIGNED_16(CoefType, cpl_coords,      [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
 | 
			
		||||
    LOCAL_ALIGNED_32(CoefType, cpl_coords,      [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
 | 
			
		||||
#if AC3ENC_FLOAT
 | 
			
		||||
    LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
 | 
			
		||||
    LOCAL_ALIGNED_32(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
 | 
			
		||||
#else
 | 
			
		||||
    int32_t (*fixed_cpl_coords)[AC3_MAX_CHANNELS][16] = cpl_coords;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -128,6 +128,30 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
 | 
			
		||||
    jl .loop
 | 
			
		||||
    RET
 | 
			
		||||
 | 
			
		||||
INIT_YMM avx
 | 
			
		||||
cglobal float_to_fixed24, 3, 3, 5, dst, src, len
 | 
			
		||||
    vbroadcastf128 m0, [pf_1_24]
 | 
			
		||||
    shl      lenq, 2
 | 
			
		||||
    add      srcq, lenq
 | 
			
		||||
    add      dstq, lenq
 | 
			
		||||
    neg      lenq
 | 
			
		||||
.loop:
 | 
			
		||||
    mulps      m1, m0, [srcq+lenq+mmsize*0]
 | 
			
		||||
    mulps      m2, m0, [srcq+lenq+mmsize*1]
 | 
			
		||||
    mulps      m3, m0, [srcq+lenq+mmsize*2]
 | 
			
		||||
    mulps      m4, m0, [srcq+lenq+mmsize*3]
 | 
			
		||||
    cvtps2dq   m1, m1
 | 
			
		||||
    cvtps2dq   m2, m2
 | 
			
		||||
    cvtps2dq   m3, m3
 | 
			
		||||
    cvtps2dq   m4, m4
 | 
			
		||||
    mova  [dstq+lenq+mmsize*0], m1
 | 
			
		||||
    mova  [dstq+lenq+mmsize*1], m2
 | 
			
		||||
    mova  [dstq+lenq+mmsize*2], m3
 | 
			
		||||
    mova  [dstq+lenq+mmsize*3], m4
 | 
			
		||||
    add      lenq, mmsize*4
 | 
			
		||||
    jl .loop
 | 
			
		||||
    RET
 | 
			
		||||
 | 
			
		||||
;------------------------------------------------------------------------------
 | 
			
		||||
; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
 | 
			
		||||
;------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
@ -27,6 +27,7 @@
 | 
			
		||||
void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
 | 
			
		||||
 | 
			
		||||
void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, size_t len);
 | 
			
		||||
void ff_float_to_fixed24_avx  (int32_t *dst, const float *src, size_t len);
 | 
			
		||||
 | 
			
		||||
int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
 | 
			
		||||
 | 
			
		||||
@ -48,6 +49,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
 | 
			
		||||
        if (!(cpu_flags & AV_CPU_FLAG_ATOM))
 | 
			
		||||
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
 | 
			
		||||
    }
 | 
			
		||||
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
 | 
			
		||||
        c->float_to_fixed24 = ff_float_to_fixed24_avx;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define DOWNMIX_FUNC_OPT(ch, opt)                                       \
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user