ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions.
This commit is contained in:
		
							parent
							
								
									8a8d0ce208
								
							
						
					
					
						commit
						6054cd25b4
					
				@ -2676,6 +2676,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                                int32_t max, unsigned int len)
 | 
			
		||||
{
 | 
			
		||||
    do {
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        *dst++ = av_clip(*src++, min, max);
 | 
			
		||||
        len -= 8;
 | 
			
		||||
    } while (len > 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define W0 2048
 | 
			
		||||
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
 | 
			
		||||
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
 | 
			
		||||
@ -3122,6 +3138,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
    c->scalarproduct_int16 = scalarproduct_int16_c;
 | 
			
		||||
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
 | 
			
		||||
    c->apply_window_int16 = apply_window_int16_c;
 | 
			
		||||
    c->vector_clip_int32 = vector_clip_int32_c;
 | 
			
		||||
    c->scalarproduct_float = scalarproduct_float_c;
 | 
			
		||||
    c->butterflies_float = butterflies_float_c;
 | 
			
		||||
    c->vector_fmul_scalar = vector_fmul_scalar_c;
 | 
			
		||||
 | 
			
		||||
@ -555,6 +555,22 @@ typedef struct DSPContext {
 | 
			
		||||
    void (*apply_window_int16)(int16_t *output, const int16_t *input,
 | 
			
		||||
                               const int16_t *window, unsigned int len);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Clip each element in an array of int32_t to a given minimum and maximum value.
 | 
			
		||||
     * @param dst  destination array
 | 
			
		||||
     *             constraints: 16-byte aligned
 | 
			
		||||
     * @param src  source array
 | 
			
		||||
     *             constraints: 16-byte aligned
 | 
			
		||||
     * @param min  minimum value
 | 
			
		||||
     *             constraints: must in the the range [-(1<<24), 1<<24]
 | 
			
		||||
     * @param max  maximum value
 | 
			
		||||
     *             constraints: must in the the range [-(1<<24), 1<<24]
 | 
			
		||||
     * @param len  number of elements in the array
 | 
			
		||||
     *             constraints: multiple of 32 greater than zero
 | 
			
		||||
     */
 | 
			
		||||
    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                              int32_t max, unsigned int len);
 | 
			
		||||
 | 
			
		||||
    /* rv30 functions */
 | 
			
		||||
    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
 | 
			
		||||
    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
 | 
			
		||||
 | 
			
		||||
@ -2429,6 +2429,15 @@ int  ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
 | 
			
		||||
 | 
			
		||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 | 
			
		||||
 | 
			
		||||
void ff_vector_clip_int32_mmx     (int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                                   int32_t max, unsigned int len);
 | 
			
		||||
void ff_vector_clip_int32_sse2    (int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                                   int32_t max, unsigned int len);
 | 
			
		||||
void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                                   int32_t max, unsigned int len);
 | 
			
		||||
void ff_vector_clip_int32_sse41   (int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
                                   int32_t max, unsigned int len);
 | 
			
		||||
 | 
			
		||||
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
{
 | 
			
		||||
    int mm_flags = av_get_cpu_flags();
 | 
			
		||||
@ -2570,6 +2579,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
 | 
			
		||||
        c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
 | 
			
		||||
        c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
 | 
			
		||||
 | 
			
		||||
        c->vector_clip_int32 = ff_vector_clip_int32_mmx;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
        if (mm_flags & AV_CPU_FLAG_MMX2) {
 | 
			
		||||
@ -2855,6 +2866,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
#if HAVE_YASM
 | 
			
		||||
            c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
 | 
			
		||||
            c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
 | 
			
		||||
            if (mm_flags & AV_CPU_FLAG_ATOM) {
 | 
			
		||||
                c->vector_clip_int32 = ff_vector_clip_int32_sse2_int;
 | 
			
		||||
            } else {
 | 
			
		||||
                c->vector_clip_int32 = ff_vector_clip_int32_sse2;
 | 
			
		||||
            }
 | 
			
		||||
            if (avctx->flags & CODEC_FLAG_BITEXACT) {
 | 
			
		||||
                c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
 | 
			
		||||
            } else {
 | 
			
		||||
@ -2880,6 +2896,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
            }
 | 
			
		||||
#endif
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 | 
			
		||||
#if HAVE_YASM
 | 
			
		||||
            c->vector_clip_int32 = ff_vector_clip_int32_sse41;
 | 
			
		||||
#endif
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
#if HAVE_AVX && HAVE_YASM
 | 
			
		||||
        if (mm_flags & AV_CPU_FLAG_AVX) {
 | 
			
		||||
            if (bit_depth == 10) {
 | 
			
		||||
 | 
			
		||||
@ -1048,3 +1048,118 @@ emu_edge sse
 | 
			
		||||
%ifdef ARCH_X86_32
 | 
			
		||||
emu_edge mmx
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
;-----------------------------------------------------------------------------
 | 
			
		||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
;                           int32_t max, unsigned int len)
 | 
			
		||||
;-----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
%macro PMINSD_MMX 3 ; dst, src, tmp
 | 
			
		||||
    mova      %3, %2
 | 
			
		||||
    pcmpgtd   %3, %1
 | 
			
		||||
    pxor      %1, %2
 | 
			
		||||
    pand      %1, %3
 | 
			
		||||
    pxor      %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro PMAXSD_MMX 3 ; dst, src, tmp
 | 
			
		||||
    mova      %3, %1
 | 
			
		||||
    pcmpgtd   %3, %2
 | 
			
		||||
    pand      %1, %3
 | 
			
		||||
    pandn     %3, %2
 | 
			
		||||
    por       %1, %3
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
 | 
			
		||||
    PMINSD_MMX %1, %3, %4
 | 
			
		||||
    PMAXSD_MMX %1, %2, %4
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
 | 
			
		||||
    cvtdq2ps  %1, %1
 | 
			
		||||
    minps     %1, %3
 | 
			
		||||
    maxps     %1, %2
 | 
			
		||||
    cvtps2dq  %1, %1
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro CLIPD_SSE41 3-4 ;  src/dst, min, max, unused
 | 
			
		||||
    pminsd  %1, %3
 | 
			
		||||
    pmaxsd  %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro SPLATD_MMX 1
 | 
			
		||||
    punpckldq  %1, %1
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro SPLATD_SSE2 1
 | 
			
		||||
    pshufd  %1, %1, 0
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro VECTOR_CLIP_INT32 4
 | 
			
		||||
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
 | 
			
		||||
%ifidn %1, sse2
 | 
			
		||||
    cvtsi2ss  m4, minm
 | 
			
		||||
    cvtsi2ss  m5, maxm
 | 
			
		||||
%else
 | 
			
		||||
    movd      m4, minm
 | 
			
		||||
    movd      m5, maxm
 | 
			
		||||
%endif
 | 
			
		||||
    SPLATD    m4
 | 
			
		||||
    SPLATD    m5
 | 
			
		||||
.loop:
 | 
			
		||||
%assign %%i 1
 | 
			
		||||
%rep %3
 | 
			
		||||
    mova      m0,  [srcq+mmsize*0*%%i]
 | 
			
		||||
    mova      m1,  [srcq+mmsize*1*%%i]
 | 
			
		||||
    mova      m2,  [srcq+mmsize*2*%%i]
 | 
			
		||||
    mova      m3,  [srcq+mmsize*3*%%i]
 | 
			
		||||
%if %4
 | 
			
		||||
    mova      m7,  [srcq+mmsize*4*%%i]
 | 
			
		||||
    mova      m8,  [srcq+mmsize*5*%%i]
 | 
			
		||||
    mova      m9,  [srcq+mmsize*6*%%i]
 | 
			
		||||
    mova      m10, [srcq+mmsize*7*%%i]
 | 
			
		||||
%endif
 | 
			
		||||
    CLIPD  m0,  m4, m5, m6
 | 
			
		||||
    CLIPD  m1,  m4, m5, m6
 | 
			
		||||
    CLIPD  m2,  m4, m5, m6
 | 
			
		||||
    CLIPD  m3,  m4, m5, m6
 | 
			
		||||
%if %4
 | 
			
		||||
    CLIPD  m7,  m4, m5, m6
 | 
			
		||||
    CLIPD  m8,  m4, m5, m6
 | 
			
		||||
    CLIPD  m9,  m4, m5, m6
 | 
			
		||||
    CLIPD  m10, m4, m5, m6
 | 
			
		||||
%endif
 | 
			
		||||
    mova  [dstq+mmsize*0*%%i], m0
 | 
			
		||||
    mova  [dstq+mmsize*1*%%i], m1
 | 
			
		||||
    mova  [dstq+mmsize*2*%%i], m2
 | 
			
		||||
    mova  [dstq+mmsize*3*%%i], m3
 | 
			
		||||
%if %4
 | 
			
		||||
    mova  [dstq+mmsize*4*%%i], m7
 | 
			
		||||
    mova  [dstq+mmsize*5*%%i], m8
 | 
			
		||||
    mova  [dstq+mmsize*6*%%i], m9
 | 
			
		||||
    mova  [dstq+mmsize*7*%%i], m10
 | 
			
		||||
%endif
 | 
			
		||||
%assign %%i %%i+1
 | 
			
		||||
%endrep
 | 
			
		||||
    add     srcq, mmsize*4*(%3+%4)
 | 
			
		||||
    add     dstq, mmsize*4*(%3+%4)
 | 
			
		||||
    sub     lend, mmsize*(%3+%4)
 | 
			
		||||
    jg .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX
 | 
			
		||||
%define SPLATD SPLATD_MMX
 | 
			
		||||
%define CLIPD CLIPD_MMX
 | 
			
		||||
VECTOR_CLIP_INT32 mmx, 0, 1, 0
 | 
			
		||||
INIT_XMM
 | 
			
		||||
%define SPLATD SPLATD_SSE2
 | 
			
		||||
VECTOR_CLIP_INT32 sse2_int, 6, 1, 0
 | 
			
		||||
%define CLIPD CLIPD_SSE2
 | 
			
		||||
VECTOR_CLIP_INT32 sse2, 6, 2, 0
 | 
			
		||||
%define CLIPD CLIPD_SSE41
 | 
			
		||||
%ifdef m8
 | 
			
		||||
VECTOR_CLIP_INT32 sse41, 11, 1, 1
 | 
			
		||||
%else
 | 
			
		||||
VECTOR_CLIP_INT32 sse41, 6, 1, 0
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user