From 683da86aabb4fbeddc3ead5fce737c63c0ee762c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 4 Sep 2016 14:45:48 +0200 Subject: [PATCH 1/2] audiodsp: reorder arguments for vector_clipf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make the x86 asm simpler. ARM conversion by Martin Storsjö and Janne Grunau --- libavcodec/ac3enc_float.c | 2 +- libavcodec/arm/audiodsp_init_neon.c | 3 +-- libavcodec/arm/audiodsp_neon.S | 5 ++--- libavcodec/audiodsp.c | 4 ++-- libavcodec/audiodsp.h | 3 ++- libavcodec/cook.c | 2 +- libavcodec/x86/audiodsp.h | 2 +- libavcodec/x86/audiodsp_mmx.c | 2 +- tests/checkasm/audiodsp.c | 8 ++++---- 9 files changed, 15 insertions(+), 16 deletions(-) diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 822f431b44..968cb2c533 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -111,7 +111,7 @@ static void scale_coefficients(AC3EncodeContext *s) static void clip_coefficients(AudioDSPContext *adsp, float *coef, unsigned int len) { - adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX); } diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c index af532724c8..08405cb829 100644 --- a/libavcodec/arm/audiodsp_init_neon.c +++ b/libavcodec/arm/audiodsp_init_neon.c @@ -25,8 +25,7 @@ #include "libavcodec/audiodsp.h" #include "audiodsp_arm.h" -void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, - int len); +void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max); void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S index dfb998de32..5871b82c2c 100644 --- a/libavcodec/arm/audiodsp_neon.S +++ b/libavcodec/arm/audiodsp_neon.S @@ -24,9 +24,8 @@ function ff_vector_clipf_neon, export=1 VFP vdup.32 q1, d0[1] VFP vdup.32 q0, d0[0] -NOVFP vdup.32 q0, r2 -NOVFP vdup.32 q1, r3 -NOVFP ldr r2, [sp] +NOVFP vdup.32 q0, r3 +NOVFP vld1.32 {d2[],d3[]}, [sp] vld1.f32 {q2},[r1,:128]! vmin.f32 q10, q2, q1 vld1.f32 {q3},[r1,:128]! diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c index f7e6167cb0..776cd11ce1 100644 --- a/libavcodec/audiodsp.c +++ b/libavcodec/audiodsp.c @@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src, } } -static void vector_clipf_c(float *dst, const float *src, - float min, float max, int len) +static void vector_clipf_c(float *dst, const float *src, int len, + float min, float max) { int i; diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h index e48cdb092e..2b4f9d44e2 100644 --- a/libavcodec/audiodsp.h +++ b/libavcodec/audiodsp.h @@ -48,7 +48,8 @@ typedef struct AudioDSPContext { /* assume len is a multiple of 16, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, - float min, float max, int len /* align 16 */); + int len /* align 16 */, + float min, float max); } AudioDSPContext; void ff_audiodsp_init(AudioDSPContext *c); diff --git a/libavcodec/cook.c b/libavcodec/cook.c index 016b1d01bb..c990333a7c 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -867,7 +867,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, static void saturate_output_float(COOKContext *q, float *out) { q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, - -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); + FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f); } diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/x86/audiodsp.h index 321056b8b7..c87ee45193 100644 --- a/libavcodec/x86/audiodsp.h +++ b/libavcodec/x86/audiodsp.h @@ -20,6 +20,6 @@ #define AVCODEC_X86_AUDIODSP_H void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); + int len, float min, float max); #endif /* AVCODEC_X86_AUDIODSP_H */ diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c index cb550598f9..04cbb90706 100644 --- a/libavcodec/x86/audiodsp_mmx.c +++ b/libavcodec/x86/audiodsp_mmx.c @@ -23,7 +23,7 @@ #if HAVE_INLINE_ASM void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len) + int len, float min, float max) { x86_reg i = (len - 16) * 4; __asm__ volatile ( diff --git a/tests/checkasm/audiodsp.c b/tests/checkasm/audiodsp.c index 456b90bfec..40fa3844e8 100644 --- a/tests/checkasm/audiodsp.c +++ b/tests/checkasm/audiodsp.c @@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void) int i, len; declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src, - float min, float max, unsigned int len); + int len, float min, float max); val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; @@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void) len = rnd() % 128; len = 16 * FFMAX(len, 1); - call_ref(dst0, src, min, max, len); - call_new(dst1, src, min, max, len); + call_ref(dst0, src, len, min, max); + call_new(dst1, src, len, min, max); for (i = 0; i < len; i++) { if (!float_near_ulp_array(dst0, dst1, 3, len)) fail(); } - bench_new(dst1, src, min, max, MAX_SIZE); + bench_new(dst1, src, MAX_SIZE, min, max); } report("audiodsp"); From 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 9 Aug 2016 20:20:00 +0200 Subject: [PATCH 2/2] audiodsp/x86: yasmify vector_clipf_sse --- libavcodec/x86/Makefile | 1 - libavcodec/x86/audiodsp.asm | 43 +++++++++++++++++++++++++ libavcodec/x86/audiodsp_init.c | 2 +- libavcodec/x86/audiodsp_mmx.c | 58 ---------------------------------- 4 files changed, 44 insertions(+), 60 deletions(-) delete mode 100644 libavcodec/x86/audiodsp_mmx.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 204c856340..872b7faddb 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o # GCC inline assembly optimizations # subsystems -MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ x86/hpeldsp_mmx.o diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index d7e63eb0cb..1bc7e32a68 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 %else VECTOR_CLIP_INT32 6, 1, 0, 0 %endif + +; void ff_vector_clipf_sse(float *dst, const float *src, +; int len, float min, float max) +INIT_XMM sse +cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max +%if ARCH_X86_32 + VBROADCASTSS m0, minm + VBROADCASTSS m1, maxm +%elif WIN64 + VBROADCASTSS m0, m3 + VBROADCASTSS m1, maxm +%else ; 64bit sysv + VBROADCASTSS m0, m0 + VBROADCASTSS m1, m1 +%endif + + movsxdifnidn lenq, lend + +.loop + mova m2, [srcq + 4 * lenq - 4 * mmsize] + mova m3, [srcq + 4 * lenq - 3 * mmsize] + mova m4, [srcq + 4 * lenq - 2 * mmsize] + mova m5, [srcq + 4 * lenq - 1 * mmsize] + + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + + mova [dstq + 4 * lenq - 4 * mmsize], m2 + mova [dstq + 4 * lenq - 3 * mmsize], m3 + mova [dstq + 4 * lenq - 2 * mmsize], m4 + mova [dstq + 4 * lenq - 1 * mmsize], m5 + + sub lenq, mmsize + jg .loop + + RET diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index 8eb2e56bdd..23731158e5 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) if (EXTERNAL_MMXEXT(cpu_flags)) c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; - if (INLINE_SSE(cpu_flags)) + if (EXTERNAL_SSE(cpu_flags)) c->vector_clipf = ff_vector_clipf_sse; if (EXTERNAL_SSE2(cpu_flags)) { diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c deleted file mode 100644 index 04cbb90706..0000000000 --- a/libavcodec/x86/audiodsp_mmx.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" -#include "libavutil/x86/asm.h" -#include "audiodsp.h" - -#if HAVE_INLINE_ASM - -void ff_vector_clipf_sse(float *dst, const float *src, - int len, float min, float max) -{ - x86_reg i = (len - 16) * 4; - __asm__ volatile ( - "movss %3, %%xmm4 \n\t" - "movss %4, %%xmm5 \n\t" - "shufps $0, %%xmm4, %%xmm4 \n\t" - "shufps $0, %%xmm5, %%xmm5 \n\t" - "1: \n\t" - "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel - "movaps 16(%2, %0), %%xmm1 \n\t" - "movaps 32(%2, %0), %%xmm2 \n\t" - "movaps 48(%2, %0), %%xmm3 \n\t" - "maxps %%xmm4, %%xmm0 \n\t" - "maxps %%xmm4, %%xmm1 \n\t" - "maxps %%xmm4, %%xmm2 \n\t" - "maxps %%xmm4, %%xmm3 \n\t" - "minps %%xmm5, %%xmm0 \n\t" - "minps %%xmm5, %%xmm1 \n\t" - "minps %%xmm5, %%xmm2 \n\t" - "minps %%xmm5, %%xmm3 \n\t" - "movaps %%xmm0, (%1, %0) \n\t" - "movaps %%xmm1, 16(%1, %0) \n\t" - "movaps %%xmm2, 32(%1, %0) \n\t" - "movaps %%xmm3, 48(%1, %0) \n\t" - "sub $64, %0 \n\t" - "jge 1b \n\t" - : "+&r" (i) - : "r" (dst), "r" (src), "m" (min), "m" (max) - : "memory"); -} - -#endif /* HAVE_INLINE_ASM */