x86/swr: convert resample_{common, linear}_double_sse2 to yasm
Signed-off-by: James Almer <jamrial@gmail.com> 312531 -> 311528 dezicycles Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		
							parent
							
								
									fb318def5d
								
							
						
					
					
						commit
						dd2c9034b1
					
				@ -25,23 +25,15 @@
 | 
			
		||||
 * @author Michael Niedermayer <michaelni@gmx.at>
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#if    defined(TEMPLATE_RESAMPLE_DBL)     \
 | 
			
		||||
    || defined(TEMPLATE_RESAMPLE_DBL_SSE2)
 | 
			
		||||
#if defined(TEMPLATE_RESAMPLE_DBL)
 | 
			
		||||
 | 
			
		||||
#    define RENAME(N) N ## _double
 | 
			
		||||
#    define FILTER_SHIFT 0
 | 
			
		||||
#    define DELEM  double
 | 
			
		||||
#    define FELEM  double
 | 
			
		||||
#    define FELEM2 double
 | 
			
		||||
#    define OUT(d, v) d = v
 | 
			
		||||
 | 
			
		||||
#    if defined(TEMPLATE_RESAMPLE_DBL)
 | 
			
		||||
#        define RENAME(N) N ## _double
 | 
			
		||||
#    elif defined(TEMPLATE_RESAMPLE_DBL_SSE2)
 | 
			
		||||
#        define COMMON_CORE COMMON_CORE_DBL_SSE2
 | 
			
		||||
#        define LINEAR_CORE LINEAR_CORE_DBL_SSE2
 | 
			
		||||
#        define RENAME(N) N ## _double_sse2
 | 
			
		||||
#    endif
 | 
			
		||||
 | 
			
		||||
#elif    defined(TEMPLATE_RESAMPLE_FLT)
 | 
			
		||||
 | 
			
		||||
#    define RENAME(N) N ## _float
 | 
			
		||||
@ -104,16 +96,12 @@ int RENAME(swri_resample_common)(ResampleContext *c,
 | 
			
		||||
    for (dst_index = 0; dst_index < n; dst_index++) {
 | 
			
		||||
        FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
 | 
			
		||||
 | 
			
		||||
#ifdef COMMON_CORE
 | 
			
		||||
        COMMON_CORE
 | 
			
		||||
#else
 | 
			
		||||
        FELEM2 val=0;
 | 
			
		||||
        int i;
 | 
			
		||||
        for (i = 0; i < c->filter_length; i++) {
 | 
			
		||||
            val += src[sample_index + i] * (FELEM2)filter[i];
 | 
			
		||||
        }
 | 
			
		||||
        OUT(dst[dst_index], val);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
        frac  += c->dst_incr_mod;
 | 
			
		||||
        index += c->dst_incr_div;
 | 
			
		||||
@ -150,15 +138,11 @@ int RENAME(swri_resample_linear)(ResampleContext *c,
 | 
			
		||||
        FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
 | 
			
		||||
        FELEM2 val=0, v2 = 0;
 | 
			
		||||
 | 
			
		||||
#ifdef LINEAR_CORE
 | 
			
		||||
        LINEAR_CORE
 | 
			
		||||
#else
 | 
			
		||||
        int i;
 | 
			
		||||
        for (i = 0; i < c->filter_length; i++) {
 | 
			
		||||
            val += src[sample_index + i] * (FELEM2)filter[i];
 | 
			
		||||
            v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
 | 
			
		||||
        }
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef FELEML
 | 
			
		||||
        val += (v2 - val) * (FELEML) frac / c->src_incr;
 | 
			
		||||
#else
 | 
			
		||||
@ -188,8 +172,6 @@ int RENAME(swri_resample_linear)(ResampleContext *c,
 | 
			
		||||
    return sample_index;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef COMMON_CORE
 | 
			
		||||
#undef LINEAR_CORE
 | 
			
		||||
#undef RENAME
 | 
			
		||||
#undef FILTER_SHIFT
 | 
			
		||||
#undef DELEM
 | 
			
		||||
 | 
			
		||||
@ -50,11 +50,12 @@ endstruc
 | 
			
		||||
SECTION_RODATA
 | 
			
		||||
 | 
			
		||||
pf_1:      dd 1.0
 | 
			
		||||
pdbl_1:    dq 1.0
 | 
			
		||||
pd_0x4000: dd 0x4000
 | 
			
		||||
 | 
			
		||||
SECTION .text
 | 
			
		||||
 | 
			
		||||
%macro RESAMPLE_FNS 3 ; format [float or int16], bps, log2_bps
 | 
			
		||||
%macro RESAMPLE_FNS 3-5 ; format [float or int16], bps, log2_bps, float op suffix [s or d], 1.0 constant
 | 
			
		||||
; int resample_common_$format(ResampleContext *ctx, $format *dst,
 | 
			
		||||
;                             const $format *src, int size, int update_ctx)
 | 
			
		||||
%if ARCH_X86_64 ; unix64 and win64
 | 
			
		||||
@ -165,21 +166,21 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 | 
			
		||||
    lea                      filterq, [min_filter_count_x4q+filterq*%2]
 | 
			
		||||
    mov         min_filter_count_x4q, min_filter_length_x4q
 | 
			
		||||
%endif
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    xorps                         m0, m0, m0
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    movd                          m0, [pd_0x4000]
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    xorps                         m0, m0, m0
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
    align 16
 | 
			
		||||
.inner_loop:
 | 
			
		||||
    movu                          m1, [srcq+min_filter_count_x4q*1]
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    mulps                         m1, m1, [filterq+min_filter_count_x4q*1]
 | 
			
		||||
    addps                         m0, m0, m1
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    pmaddwd                       m1, [filterq+min_filter_count_x4q*1]
 | 
			
		||||
    paddd                         m0, m1
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    mulp%4                        m1, m1, [filterq+min_filter_count_x4q*1]
 | 
			
		||||
    addp%4                        m0, m0, m1
 | 
			
		||||
%endif
 | 
			
		||||
    add         min_filter_count_x4q, mmsize
 | 
			
		||||
    js .inner_loop
 | 
			
		||||
@ -189,16 +190,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
    ; horizontal sum & store
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    movhlps                      xm1, xm0
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    shufps                       xm1, xm0, xm0, q0001
 | 
			
		||||
    add                        fracd, dst_incr_modd
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    add                       indexd, dst_incr_divd
 | 
			
		||||
    movss                     [dstq], xm0
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
%if mmsize == 16
 | 
			
		||||
    pshufd                        m1, m0, q0032
 | 
			
		||||
    paddd                         m0, m1
 | 
			
		||||
@ -212,6 +204,17 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 | 
			
		||||
    packssdw                      m0, m0
 | 
			
		||||
    add                       indexd, dst_incr_divd
 | 
			
		||||
    movd                      [dstq], m0
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    ; horizontal sum & store
 | 
			
		||||
    movhlps                      xm1, xm0
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    shufps                       xm1, xm0, xm0, q0001
 | 
			
		||||
%endif
 | 
			
		||||
    add                        fracd, dst_incr_modd
 | 
			
		||||
    addp%4                       xm0, xm1
 | 
			
		||||
    add                       indexd, dst_incr_divd
 | 
			
		||||
    movs%4                    [dstq], xm0
 | 
			
		||||
%endif
 | 
			
		||||
    cmp                        fracd, src_incrd
 | 
			
		||||
    jl .skip
 | 
			
		||||
@ -307,12 +310,12 @@ cglobal resample_linear_%1, 0, 15, 5, ctx, phase_mask, src, phase_shift, index,
 | 
			
		||||
    mov                   ctx_stackq, ctxq
 | 
			
		||||
    mov            phase_mask_stackd, phase_maskd
 | 
			
		||||
    mov           min_filter_len_x4d, [ctxq+ResampleContext.filter_length]
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    cvtsi2ss                     xm0, src_incrd
 | 
			
		||||
    movss                        xm4, [pf_1]
 | 
			
		||||
    divss                        xm4, xm0
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    movd                          m4, [pd_0x4000]
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    cvtsi2s%4                    xm0, src_incrd
 | 
			
		||||
    movs%4                       xm4, [%5]
 | 
			
		||||
    divs%4                       xm4, xm0
 | 
			
		||||
%endif
 | 
			
		||||
    mov                dst_incr_divd, [ctxq+ResampleContext.dst_incr_div]
 | 
			
		||||
    shl           min_filter_len_x4d, %3
 | 
			
		||||
@ -360,12 +363,12 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 | 
			
		||||
    mov                           r3, dword [ctxq+ResampleContext.src_incr]
 | 
			
		||||
    PUSH                              dword [ctxq+ResampleContext.phase_mask]
 | 
			
		||||
    PUSH                              r3d
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    cvtsi2ss                     xm0, r3d
 | 
			
		||||
    movss                        xm4, [pf_1]
 | 
			
		||||
    divss                        xm4, xm0
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    movd                          m4, [pd_0x4000]
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    cvtsi2s%4                    xm0, r3d
 | 
			
		||||
    movs%4                       xm4, [%5]
 | 
			
		||||
    divs%4                       xm4, xm0
 | 
			
		||||
%endif
 | 
			
		||||
    mov        min_filter_length_x4d, [ctxq+ResampleContext.filter_length]
 | 
			
		||||
    mov                       indexd, [ctxq+ResampleContext.index]
 | 
			
		||||
@ -409,27 +412,27 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 | 
			
		||||
    mov                     filter2q, filter1q
 | 
			
		||||
    add                     filter2q, filter_alloc_x4q
 | 
			
		||||
%endif
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    xorps                         m0, m0, m0
 | 
			
		||||
    xorps                         m2, m2, m2
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    mova                          m0, m4
 | 
			
		||||
    mova                          m2, m4
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    xorps                         m0, m0, m0
 | 
			
		||||
    xorps                         m2, m2, m2
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
    align 16
 | 
			
		||||
.inner_loop:
 | 
			
		||||
    movu                          m1, [srcq+min_filter_count_x4q*1]
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    mulps                         m3, m1, [filter2q+min_filter_count_x4q*1]
 | 
			
		||||
    mulps                         m1, m1, [filter1q+min_filter_count_x4q*1]
 | 
			
		||||
    addps                         m2, m2, m3
 | 
			
		||||
    addps                         m0, m0, m1
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
    pmaddwd                       m3, m1, [filter2q+min_filter_count_x4q*1]
 | 
			
		||||
    pmaddwd                       m1, [filter1q+min_filter_count_x4q*1]
 | 
			
		||||
    paddd                         m2, m3
 | 
			
		||||
    paddd                         m0, m1
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    mulp%4                        m3, m1, [filter2q+min_filter_count_x4q*1]
 | 
			
		||||
    mulp%4                        m1, m1, [filter1q+min_filter_count_x4q*1]
 | 
			
		||||
    addp%4                        m2, m2, m3
 | 
			
		||||
    addp%4                        m0, m0, m1
 | 
			
		||||
%endif
 | 
			
		||||
    add         min_filter_count_x4q, mmsize
 | 
			
		||||
    js .inner_loop
 | 
			
		||||
@ -441,24 +444,7 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 | 
			
		||||
    addps                        xm2, xm3
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    ; val += (v2 - val) * (FELEML) frac / c->src_incr;
 | 
			
		||||
    cvtsi2ss                     xm1, fracd
 | 
			
		||||
    subps                        xm2, xm0
 | 
			
		||||
    mulps                        xm1, xm4
 | 
			
		||||
    shufps                       xm1, xm1, q0000
 | 
			
		||||
    mulps                        xm2, xm1
 | 
			
		||||
    addps                        xm0, xm2
 | 
			
		||||
 | 
			
		||||
    ; horizontal sum & store
 | 
			
		||||
    movhlps                      xm1, xm0
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    shufps                       xm1, xm0, xm0, q0001
 | 
			
		||||
    add                        fracd, dst_incr_modd
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    add                       indexd, dst_incr_divd
 | 
			
		||||
    movss                     [dstq], xm0
 | 
			
		||||
%else ; int16
 | 
			
		||||
%ifidn %1, int16
 | 
			
		||||
%if mmsize == 16
 | 
			
		||||
    pshufd                        m3, m2, q0032
 | 
			
		||||
    pshufd                        m1, m0, q0032
 | 
			
		||||
@ -491,6 +477,25 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 | 
			
		||||
    ; - 32bit: eax=r0[filter1], edx=r2[filter2]
 | 
			
		||||
    ; - win64: eax=r6[filter1], edx=r1[todo]
 | 
			
		||||
    ; - unix64: eax=r6[filter1], edx=r2[todo]
 | 
			
		||||
%else ; float/double
 | 
			
		||||
    ; val += (v2 - val) * (FELEML) frac / c->src_incr;
 | 
			
		||||
    cvtsi2s%4                    xm1, fracd
 | 
			
		||||
    subp%4                       xm2, xm0
 | 
			
		||||
    mulp%4                       xm1, xm4
 | 
			
		||||
    shufp%4                      xm1, xm1, q0000
 | 
			
		||||
    mulp%4                       xm2, xm1
 | 
			
		||||
    addp%4                       xm0, xm2
 | 
			
		||||
 | 
			
		||||
    ; horizontal sum & store
 | 
			
		||||
    movhlps                      xm1, xm0
 | 
			
		||||
%ifidn %1, float
 | 
			
		||||
    addps                        xm0, xm1
 | 
			
		||||
    shufps                       xm1, xm0, xm0, q0001
 | 
			
		||||
%endif
 | 
			
		||||
    add                        fracd, dst_incr_modd
 | 
			
		||||
    addp%4                       xm0, xm1
 | 
			
		||||
    add                       indexd, dst_incr_divd
 | 
			
		||||
    movs%4                    [dstq], xm0
 | 
			
		||||
%endif
 | 
			
		||||
    cmp                        fracd, src_incrd
 | 
			
		||||
    jl .skip
 | 
			
		||||
@ -553,11 +558,11 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_XMM sse
 | 
			
		||||
RESAMPLE_FNS float, 4, 2
 | 
			
		||||
RESAMPLE_FNS float, 4, 2, s, pf_1
 | 
			
		||||
 | 
			
		||||
%if HAVE_AVX_EXTERNAL
 | 
			
		||||
INIT_YMM avx
 | 
			
		||||
RESAMPLE_FNS float, 4, 2
 | 
			
		||||
RESAMPLE_FNS float, 4, 2, s, pf_1
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
%if ARCH_X86_32
 | 
			
		||||
@ -567,3 +572,4 @@ RESAMPLE_FNS int16, 2, 1
 | 
			
		||||
 | 
			
		||||
INIT_XMM sse2
 | 
			
		||||
RESAMPLE_FNS int16, 2, 1
 | 
			
		||||
RESAMPLE_FNS double, 8, 3, d, pdbl_1
 | 
			
		||||
 | 
			
		||||
@ -1,72 +0,0 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
 | 
			
		||||
 *
 | 
			
		||||
 * This file is part of FFmpeg.
 | 
			
		||||
 *
 | 
			
		||||
 * FFmpeg is free software; you can redistribute it and/or
 | 
			
		||||
 * modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
 * License as published by the Free Software Foundation; either
 | 
			
		||||
 * version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * FFmpeg is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
 * Lesser General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
 * License along with FFmpeg; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "libavutil/x86/asm.h"
 | 
			
		||||
#include "libavutil/cpu.h"
 | 
			
		||||
#include "libswresample/swresample_internal.h"
 | 
			
		||||
 | 
			
		||||
#define COMMON_CORE_DBL_SSE2 \
 | 
			
		||||
    x86_reg len= -8*c->filter_length;\
 | 
			
		||||
__asm__ volatile(\
 | 
			
		||||
    "xorpd     %%xmm0, %%xmm0     \n\t"\
 | 
			
		||||
    "1:                           \n\t"\
 | 
			
		||||
    "movupd  (%1, %0), %%xmm1     \n\t"\
 | 
			
		||||
    "mulpd   (%2, %0), %%xmm1     \n\t"\
 | 
			
		||||
    "addpd     %%xmm1, %%xmm0     \n\t"\
 | 
			
		||||
    "add       $16, %0            \n\t"\
 | 
			
		||||
    " js 1b                       \n\t"\
 | 
			
		||||
    "movhlps   %%xmm0, %%xmm1     \n\t"\
 | 
			
		||||
    "addpd     %%xmm1, %%xmm0     \n\t"\
 | 
			
		||||
    "movsd     %%xmm0, (%3)       \n\t"\
 | 
			
		||||
    : "+r" (len)\
 | 
			
		||||
    : "r" (((uint8_t*)(src+sample_index))-len),\
 | 
			
		||||
      "r" (((uint8_t*)filter)-len),\
 | 
			
		||||
      "r" (dst+dst_index)\
 | 
			
		||||
    XMM_CLOBBERS_ONLY("%xmm0", "%xmm1")\
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
#define LINEAR_CORE_DBL_SSE2 \
 | 
			
		||||
    x86_reg len= -8*c->filter_length;\
 | 
			
		||||
__asm__ volatile(\
 | 
			
		||||
    "xorpd      %%xmm0, %%xmm0    \n\t"\
 | 
			
		||||
    "xorpd      %%xmm2, %%xmm2    \n\t"\
 | 
			
		||||
    "1:                           \n\t"\
 | 
			
		||||
    "movupd   (%3, %0), %%xmm1    \n\t"\
 | 
			
		||||
    "movapd     %%xmm1, %%xmm3    \n\t"\
 | 
			
		||||
    "mulpd    (%4, %0), %%xmm1    \n\t"\
 | 
			
		||||
    "mulpd    (%5, %0), %%xmm3    \n\t"\
 | 
			
		||||
    "addpd      %%xmm1, %%xmm0    \n\t"\
 | 
			
		||||
    "addpd      %%xmm3, %%xmm2    \n\t"\
 | 
			
		||||
    "add           $16, %0        \n\t"\
 | 
			
		||||
    " js 1b                       \n\t"\
 | 
			
		||||
    "movhlps    %%xmm0, %%xmm1    \n\t"\
 | 
			
		||||
    "movhlps    %%xmm2, %%xmm3    \n\t"\
 | 
			
		||||
    "addpd      %%xmm1, %%xmm0    \n\t"\
 | 
			
		||||
    "addpd      %%xmm3, %%xmm2    \n\t"\
 | 
			
		||||
    "movsd      %%xmm0, %1        \n\t"\
 | 
			
		||||
    "movsd      %%xmm2, %2        \n\t"\
 | 
			
		||||
    : "+r" (len),\
 | 
			
		||||
      "=m" (val),\
 | 
			
		||||
      "=m" (v2)\
 | 
			
		||||
    : "r" (((uint8_t*)(src+sample_index))-len),\
 | 
			
		||||
      "r" (((uint8_t*)filter)-len),\
 | 
			
		||||
      "r" (((uint8_t*)(filter+c->filter_alloc))-len)\
 | 
			
		||||
    XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\
 | 
			
		||||
);
 | 
			
		||||
@ -27,21 +27,6 @@
 | 
			
		||||
 | 
			
		||||
#include "libswresample/resample.h"
 | 
			
		||||
 | 
			
		||||
int swri_resample_common_double_sse2(ResampleContext *c,  double *dst, const  double *src, int n, int update_ctx);
 | 
			
		||||
int swri_resample_linear_double_sse2(ResampleContext *c,  double *dst, const  double *src, int n, int update_ctx);
 | 
			
		||||
 | 
			
		||||
#if HAVE_SSE2_INLINE
 | 
			
		||||
#define DO_RESAMPLE_ONE 0
 | 
			
		||||
 | 
			
		||||
#include "resample_mmx.h"
 | 
			
		||||
 | 
			
		||||
#define TEMPLATE_RESAMPLE_DBL_SSE2
 | 
			
		||||
#include "libswresample/resample_template.c"
 | 
			
		||||
#undef TEMPLATE_RESAMPLE_DBL_SSE2
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#undef DO_RESAMPLE_ONE
 | 
			
		||||
 | 
			
		||||
int ff_resample_common_int16_mmxext(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
                                    const uint8_t *src, int sz, int upd);
 | 
			
		||||
int ff_resample_linear_int16_mmxext(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
@ -62,6 +47,11 @@ int ff_resample_common_float_avx(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
int ff_resample_linear_float_avx(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
                                 const uint8_t *src, int sz, int upd);
 | 
			
		||||
 | 
			
		||||
int ff_resample_common_double_sse2(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
                                   const uint8_t *src, int sz, int upd);
 | 
			
		||||
int ff_resample_linear_double_sse2(ResampleContext *c, uint8_t *dst,
 | 
			
		||||
                                   const uint8_t *src, int sz, int upd);
 | 
			
		||||
 | 
			
		||||
void swresample_dsp_x86_init(ResampleContext *c)
 | 
			
		||||
{
 | 
			
		||||
    int av_unused mm_flags = av_get_cpu_flags();
 | 
			
		||||
@ -78,10 +68,9 @@ void swresample_dsp_x86_init(ResampleContext *c)
 | 
			
		||||
    if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) {
 | 
			
		||||
        c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_sse2;
 | 
			
		||||
        c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_sse2;
 | 
			
		||||
    }
 | 
			
		||||
    if (HAVE_SSE2_INLINE && mm_flags & AV_CPU_FLAG_SSE2) {
 | 
			
		||||
        c->dsp.resample_common[FNIDX(DBLP)] = (resample_fn) swri_resample_common_double_sse2;
 | 
			
		||||
        c->dsp.resample_linear[FNIDX(DBLP)] = (resample_fn) swri_resample_linear_double_sse2;
 | 
			
		||||
 | 
			
		||||
        c->dsp.resample_common[FNIDX(DBLP)] = ff_resample_common_double_sse2;
 | 
			
		||||
        c->dsp.resample_linear[FNIDX(DBLP)] = ff_resample_linear_double_sse2;
 | 
			
		||||
    }
 | 
			
		||||
    if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
 | 
			
		||||
        c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_avx;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user