avcodec/x86/hevc_mc: move put/put_uni to h26x/h2656_inter.asm
This enable that the asm optimization can be reused by VVC Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
This commit is contained in:
parent
04c2e246a3
commit
7d9f1f5485
@ -167,6 +167,7 @@ X86ASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_add_res.o \
|
||||
x86/hevc_deblock.o \
|
||||
x86/hevc_idct.o \
|
||||
x86/hevc_mc.o \
|
||||
x86/h26x/h2656_inter.o \
|
||||
x86/hevc_sao.o \
|
||||
x86/hevc_sao_10bit.o
|
||||
X86ASM-OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp.o
|
||||
|
1145
libavcodec/x86/h26x/h2656_inter.asm
Normal file
1145
libavcodec/x86/h26x/h2656_inter.asm
Normal file
File diff suppressed because it is too large
Load Diff
98
libavcodec/x86/h26x/h2656dsp.c
Normal file
98
libavcodec/x86/h26x/h2656dsp.c
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* DSP for HEVC/VVC
|
||||
*
|
||||
* Copyright (C) 2022-2024 Nuo Mi
|
||||
* Copyright (c) 2023-2024 Wu Jianhua
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "h2656dsp.h"
|
||||
|
||||
#define mc_rep_func(name, bitd, step, W, opt) \
|
||||
void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, \
|
||||
const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \
|
||||
{ \
|
||||
int i; \
|
||||
int16_t *dst; \
|
||||
for (i = 0; i < W; i += step) { \
|
||||
const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
|
||||
dst = _dst + i; \
|
||||
ff_h2656_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, hf, vf, width); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define mc_rep_uni_func(name, bitd, step, W, opt) \
|
||||
void ff_h2656_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
|
||||
const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \
|
||||
{ \
|
||||
int i; \
|
||||
uint8_t *dst; \
|
||||
for (i = 0; i < W; i += step) { \
|
||||
const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
|
||||
dst = _dst + (i * ((bitd + 7) / 8)); \
|
||||
ff_h2656_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
|
||||
height, hf, vf, width); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define mc_rep_funcs(name, bitd, step, W, opt) \
|
||||
mc_rep_func(name, bitd, step, W, opt) \
|
||||
mc_rep_uni_func(name, bitd, step, W, opt)
|
||||
|
||||
#define MC_REP_FUNCS_SSE4(fname) \
|
||||
mc_rep_funcs(fname, 8, 16,128, sse4) \
|
||||
mc_rep_funcs(fname, 8, 16, 64, sse4) \
|
||||
mc_rep_funcs(fname, 8, 16, 32, sse4) \
|
||||
mc_rep_funcs(fname, 10, 8,128, sse4) \
|
||||
mc_rep_funcs(fname, 10, 8, 64, sse4) \
|
||||
mc_rep_funcs(fname, 10, 8, 32, sse4) \
|
||||
mc_rep_funcs(fname, 10, 8, 16, sse4) \
|
||||
mc_rep_funcs(fname, 12, 8,128, sse4) \
|
||||
mc_rep_funcs(fname, 12, 8, 64, sse4) \
|
||||
mc_rep_funcs(fname, 12, 8, 32, sse4) \
|
||||
mc_rep_funcs(fname, 12, 8, 16, sse4) \
|
||||
|
||||
MC_REP_FUNCS_SSE4(pixels)
|
||||
MC_REP_FUNCS_SSE4(4tap_h)
|
||||
MC_REP_FUNCS_SSE4(4tap_v)
|
||||
MC_REP_FUNCS_SSE4(4tap_hv)
|
||||
MC_REP_FUNCS_SSE4(8tap_h)
|
||||
MC_REP_FUNCS_SSE4(8tap_v)
|
||||
MC_REP_FUNCS_SSE4(8tap_hv)
|
||||
mc_rep_funcs(8tap_hv, 8, 8, 16, sse4)
|
||||
|
||||
#if HAVE_AVX2_EXTERNAL
|
||||
|
||||
#define MC_REP_FUNCS_AVX2(fname) \
|
||||
mc_rep_funcs(fname, 8, 32, 64, avx2) \
|
||||
mc_rep_funcs(fname, 8, 32,128, avx2) \
|
||||
mc_rep_funcs(fname,10, 16, 32, avx2) \
|
||||
mc_rep_funcs(fname,10, 16, 64, avx2) \
|
||||
mc_rep_funcs(fname,10, 16,128, avx2) \
|
||||
mc_rep_funcs(fname,12, 16, 32, avx2) \
|
||||
mc_rep_funcs(fname,12, 16, 64, avx2) \
|
||||
mc_rep_funcs(fname,12, 16,128, avx2) \
|
||||
|
||||
MC_REP_FUNCS_AVX2(pixels)
|
||||
MC_REP_FUNCS_AVX2(8tap_h)
|
||||
MC_REP_FUNCS_AVX2(8tap_v)
|
||||
MC_REP_FUNCS_AVX2(8tap_hv)
|
||||
MC_REP_FUNCS_AVX2(4tap_h)
|
||||
MC_REP_FUNCS_AVX2(4tap_v)
|
||||
MC_REP_FUNCS_AVX2(4tap_hv)
|
||||
#endif
|
103
libavcodec/x86/h26x/h2656dsp.h
Normal file
103
libavcodec/x86/h26x/h2656dsp.h
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* DSP for HEVC/VVC
|
||||
*
|
||||
* Copyright (C) 2022-2024 Nuo Mi
|
||||
* Copyright (c) 2023-2024 Wu Jianhua
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_X86_H26X_H2656DSP_H
|
||||
#define AVCODEC_X86_H26X_H2656DSP_H
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#define H2656_PEL_PROTOTYPE(name, D, opt) \
|
||||
void ff_h2656_put_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
||||
void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
||||
|
||||
#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \
|
||||
H2656_PEL_PROTOTYPE(fname##4, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##6, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##8, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##12, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##16, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##32, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##64, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##128, bitd, opt)
|
||||
|
||||
H2656_MC_8TAP_PROTOTYPES(pixels , 8, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(pixels , 10, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(pixels , 12, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_h , 8, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_h , 10, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_h , 12, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_v , 8, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_v , 10, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_v , 12, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_hv , 8, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_hv , 10, sse4);
|
||||
H2656_MC_8TAP_PROTOTYPES(8tap_hv , 12, sse4);
|
||||
|
||||
#define H2656_MC_4TAP_PROTOTYPES(fname, bitd, opt) \
|
||||
H2656_PEL_PROTOTYPE(fname##2, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##4, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##6, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##8, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##12, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##16, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##32, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##64, bitd, opt); \
|
||||
H2656_PEL_PROTOTYPE(fname##128, bitd, opt)
|
||||
|
||||
#define H2656_MC_4TAP_PROTOTYPES_SSE4(bitd) \
|
||||
H2656_PEL_PROTOTYPE(pixels2, bitd, sse4); \
|
||||
H2656_MC_4TAP_PROTOTYPES(4tap_h, bitd, sse4); \
|
||||
H2656_MC_4TAP_PROTOTYPES(4tap_v, bitd, sse4); \
|
||||
H2656_MC_4TAP_PROTOTYPES(4tap_hv, bitd, sse4); \
|
||||
|
||||
H2656_MC_4TAP_PROTOTYPES_SSE4(8)
|
||||
H2656_MC_4TAP_PROTOTYPES_SSE4(10)
|
||||
H2656_MC_4TAP_PROTOTYPES_SSE4(12)
|
||||
|
||||
#define H2656_MC_8TAP_PROTOTYPES_AVX2(fname) \
|
||||
H2656_PEL_PROTOTYPE(fname##32 , 8, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##64 , 8, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##128, 8, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##16 ,10, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##32 ,10, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##64 ,10, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##128,10, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##16 ,12, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##32 ,12, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##64 ,12, avx2); \
|
||||
H2656_PEL_PROTOTYPE(fname##128,12, avx2) \
|
||||
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(pixels);
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(8tap_h);
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(8tap_v);
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(8tap_hv);
|
||||
H2656_PEL_PROTOTYPE(8tap_hv16, 8, avx2);
|
||||
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(4tap_h);
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(4tap_v);
|
||||
H2656_MC_8TAP_PROTOTYPES_AVX2(4tap_hv);
|
||||
|
||||
#endif
|
@ -715,35 +715,6 @@ SECTION .text
|
||||
; int height, int mx, int my)
|
||||
; ******************************
|
||||
|
||||
%macro HEVC_PUT_HEVC_PEL_PIXELS 2
|
||||
HEVC_PEL_PIXELS %1, %2
|
||||
HEVC_UNI_PEL_PIXELS %1, %2
|
||||
HEVC_BI_PEL_PIXELS %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro HEVC_PEL_PIXELS 2
|
||||
cglobal hevc_put_hevc_pel_pixels%1_%2, 4, 4, 3, dst, src, srcstride,height
|
||||
pxor m2, m2
|
||||
.loop:
|
||||
SIMPLE_LOAD %1, %2, srcq, m0
|
||||
MC_PIXEL_COMPUTE %1, %2, 1
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%macro HEVC_UNI_PEL_PIXELS 2
|
||||
cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstride,height
|
||||
.loop:
|
||||
SIMPLE_LOAD %1, %2, srcq, m0
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%macro HEVC_BI_PEL_PIXELS 2
|
||||
cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstride, src2, height
|
||||
pxor m2, m2
|
||||
@ -777,32 +748,8 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid
|
||||
%define XMM_REGS 8
|
||||
%endif
|
||||
|
||||
cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, XMM_REGS, dst, src, srcstride, height, mx, rfilter
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
EPEL_FILTER %2, mx, m4, m5, rfilter
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m4, m5, 1
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, XMM_REGS, dst, dststride, src, srcstride, height, mx, rfilter
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
movdqa m6, [pw_%2]
|
||||
EPEL_FILTER %2, mx, m4, m5, rfilter
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m4, m5
|
||||
UNI_COMPUTE %1, %2, m0, m1, m6
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, mx, rfilter
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
movdqa m6, [pw_bi_%2]
|
||||
EPEL_FILTER %2, mx, m4, m5, rfilter
|
||||
.loop:
|
||||
@ -824,36 +771,6 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcst
|
||||
; int height, int mx, int my, int width)
|
||||
; ******************************
|
||||
|
||||
cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, XMM_REGS, dst, src, srcstride, height, r3src, my
|
||||
movifnidn myd, mym
|
||||
sub srcq, srcstrideq
|
||||
EPEL_FILTER %2, my, m4, m5, r3src
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq, srcstride, %1
|
||||
EPEL_COMPUTE %2, %1, m4, m5, 1
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, XMM_REGS, dst, dststride, src, srcstride, height, r3src, my
|
||||
movifnidn myd, mym
|
||||
movdqa m6, [pw_%2]
|
||||
sub srcq, srcstrideq
|
||||
EPEL_FILTER %2, my, m4, m5, r3src
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq, srcstride, %1
|
||||
EPEL_COMPUTE %2, %1, m4, m5
|
||||
UNI_COMPUTE %1, %2, m0, m1, m6
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
|
||||
cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, r3src, my
|
||||
movifnidn myd, mym
|
||||
movdqa m6, [pw_bi_%2]
|
||||
@ -882,135 +799,6 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcst
|
||||
; ******************************
|
||||
|
||||
%macro HEVC_PUT_HEVC_EPEL_HV 2
|
||||
cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, my, r3src
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
sub srcq, srcstrideq
|
||||
EPEL_HV_FILTER %2
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m8, m1
|
||||
%endif
|
||||
SWAP m4, m0
|
||||
add srcq, srcstrideq
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m9, m1
|
||||
%endif
|
||||
SWAP m5, m0
|
||||
add srcq, srcstrideq
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m10, m1
|
||||
%endif
|
||||
SWAP m6, m0
|
||||
add srcq, srcstrideq
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m11, m1
|
||||
%endif
|
||||
SWAP m7, m0
|
||||
punpcklwd m0, m4, m5
|
||||
punpcklwd m2, m6, m7
|
||||
%if %1 > 4
|
||||
punpckhwd m1, m4, m5
|
||||
punpckhwd m3, m6, m7
|
||||
%endif
|
||||
EPEL_COMPUTE 14, %1, m12, m13
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
punpcklwd m4, m8, m9
|
||||
punpcklwd m2, m10, m11
|
||||
punpckhwd m8, m8, m9
|
||||
punpckhwd m3, m10, m11
|
||||
EPEL_COMPUTE 14, %1, m12, m13, m4, m2, m8, m3
|
||||
%if cpuflag(avx2)
|
||||
vinserti128 m2, m0, xm4, 1
|
||||
vperm2i128 m3, m0, m4, q0301
|
||||
PEL_10STORE%1 dstq, m2, m3
|
||||
%else
|
||||
PEL_10STORE%1 dstq, m0, m4
|
||||
%endif
|
||||
%else
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
%endif
|
||||
movdqa m4, m5
|
||||
movdqa m5, m6
|
||||
movdqa m6, m7
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
mova m8, m9
|
||||
mova m9, m10
|
||||
mova m10, m11
|
||||
%endif
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, height, mx, my, r3src
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
sub srcq, srcstrideq
|
||||
EPEL_HV_FILTER %2
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m8, m1
|
||||
%endif
|
||||
SWAP m4, m0
|
||||
add srcq, srcstrideq
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m9, m1
|
||||
%endif
|
||||
SWAP m5, m0
|
||||
add srcq, srcstrideq
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m10, m1
|
||||
%endif
|
||||
SWAP m6, m0
|
||||
add srcq, srcstrideq
|
||||
.loop:
|
||||
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
|
||||
EPEL_COMPUTE %2, %1, m14, m15
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
SWAP m11, m1
|
||||
%endif
|
||||
mova m7, m0
|
||||
punpcklwd m0, m4, m5
|
||||
punpcklwd m2, m6, m7
|
||||
%if %1 > 4
|
||||
punpckhwd m1, m4, m5
|
||||
punpckhwd m3, m6, m7
|
||||
%endif
|
||||
EPEL_COMPUTE 14, %1, m12, m13
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
punpcklwd m4, m8, m9
|
||||
punpcklwd m2, m10, m11
|
||||
punpckhwd m8, m8, m9
|
||||
punpckhwd m3, m10, m11
|
||||
EPEL_COMPUTE 14, %1, m12, m13, m4, m2, m8, m3
|
||||
UNI_COMPUTE %1, %2, m0, m4, [pw_%2]
|
||||
%else
|
||||
UNI_COMPUTE %1, %2, m0, m1, [pw_%2]
|
||||
%endif
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
mova m4, m5
|
||||
mova m5, m6
|
||||
mova m6, m7
|
||||
%if (%1 > 8 && (%2 == 8))
|
||||
mova m8, m9
|
||||
mova m9, m10
|
||||
mova m10, m11
|
||||
%endif
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src
|
||||
%assign %%stride ((%2 + 7)/8)
|
||||
@ -1093,34 +881,6 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride,
|
||||
; ******************************
|
||||
|
||||
%macro HEVC_PUT_HEVC_QPEL 2
|
||||
cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 16, dst, src, srcstride, height, mx, rfilter
|
||||
QPEL_FILTER %2, mx
|
||||
.loop:
|
||||
QPEL_H_LOAD %2, srcq, %1, 10
|
||||
QPEL_COMPUTE %1, %2, 1
|
||||
%if %2 > 8
|
||||
packssdw m0, m1
|
||||
%endif
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 16 , dst, dststride, src, srcstride, height, mx, rfilter
|
||||
mova m9, [pw_%2]
|
||||
QPEL_FILTER %2, mx
|
||||
.loop:
|
||||
QPEL_H_LOAD %2, srcq, %1, 10
|
||||
QPEL_COMPUTE %1, %2
|
||||
%if %2 > 8
|
||||
packssdw m0, m1
|
||||
%endif
|
||||
UNI_COMPUTE %1, %2, m0, m1, m9
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, src2, height, mx, rfilter
|
||||
movdqa m9, [pw_bi_%2]
|
||||
@ -1148,38 +908,6 @@ cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride,
|
||||
; int height, int mx, int my, int width)
|
||||
; ******************************
|
||||
|
||||
cglobal hevc_put_hevc_qpel_v%1_%2, 4, 8, 16, dst, src, srcstride, height, r3src, my, rfilter
|
||||
movifnidn myd, mym
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
QPEL_FILTER %2, my
|
||||
.loop:
|
||||
QPEL_V_LOAD %2, srcq, srcstride, %1, r7
|
||||
QPEL_COMPUTE %1, %2, 1
|
||||
%if %2 > 8
|
||||
packssdw m0, m1
|
||||
%endif
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_qpel_v%1_%2, 5, 9, 16, dst, dststride, src, srcstride, height, r3src, my, rfilter
|
||||
movifnidn myd, mym
|
||||
movdqa m9, [pw_%2]
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
QPEL_FILTER %2, my
|
||||
.loop:
|
||||
QPEL_V_LOAD %2, srcq, srcstride, %1, r8
|
||||
QPEL_COMPUTE %1, %2
|
||||
%if %2 > 8
|
||||
packssdw m0, m1
|
||||
%endif
|
||||
UNI_COMPUTE %1, %2, m0, m1, m9
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter
|
||||
movifnidn myd, mym
|
||||
@ -1210,162 +938,6 @@ cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride,
|
||||
; int height, int mx, int my)
|
||||
; ******************************
|
||||
%macro HEVC_PUT_HEVC_QPEL_HV 2
|
||||
cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 8, 16, dst, src, srcstride, height, mx, my, r3src, rfilter
|
||||
%if cpuflag(avx2)
|
||||
%assign %%shift 4
|
||||
%else
|
||||
%assign %%shift 3
|
||||
%endif
|
||||
sub mxq, 1
|
||||
sub myq, 1
|
||||
shl mxq, %%shift ; multiply by 32
|
||||
shl myq, %%shift ; multiply by 32
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
sub srcq, r3srcq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m8, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m9, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m10, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m11, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m12, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m13, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m14, m0
|
||||
add srcq, srcstrideq
|
||||
.loop:
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m15, m0
|
||||
punpcklwd m0, m8, m9
|
||||
punpcklwd m2, m10, m11
|
||||
punpcklwd m4, m12, m13
|
||||
punpcklwd m6, m14, m15
|
||||
%if %1 > 4
|
||||
punpckhwd m1, m8, m9
|
||||
punpckhwd m3, m10, m11
|
||||
punpckhwd m5, m12, m13
|
||||
punpckhwd m7, m14, m15
|
||||
%endif
|
||||
QPEL_HV_COMPUTE %1, 14, my, ackssdw
|
||||
PEL_10STORE%1 dstq, m0, m1
|
||||
%if %1 <= 4
|
||||
movq m8, m9
|
||||
movq m9, m10
|
||||
movq m10, m11
|
||||
movq m11, m12
|
||||
movq m12, m13
|
||||
movq m13, m14
|
||||
movq m14, m15
|
||||
%else
|
||||
movdqa m8, m9
|
||||
movdqa m9, m10
|
||||
movdqa m10, m11
|
||||
movdqa m11, m12
|
||||
movdqa m12, m13
|
||||
movdqa m13, m14
|
||||
movdqa m14, m15
|
||||
%endif
|
||||
LOOP_END dst, src, srcstride
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_uni_qpel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
|
||||
%if cpuflag(avx2)
|
||||
%assign %%shift 4
|
||||
%else
|
||||
%assign %%shift 3
|
||||
%endif
|
||||
sub mxq, 1
|
||||
sub myq, 1
|
||||
shl mxq, %%shift ; multiply by 32
|
||||
shl myq, %%shift ; multiply by 32
|
||||
lea r3srcq, [srcstrideq*3]
|
||||
sub srcq, r3srcq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m8, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m9, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m10, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m11, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m12, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m13, m0
|
||||
add srcq, srcstrideq
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m14, m0
|
||||
add srcq, srcstrideq
|
||||
.loop:
|
||||
QPEL_H_LOAD %2, srcq, %1, 15
|
||||
QPEL_HV_COMPUTE %1, %2, mx, ackssdw
|
||||
SWAP m15, m0
|
||||
punpcklwd m0, m8, m9
|
||||
punpcklwd m2, m10, m11
|
||||
punpcklwd m4, m12, m13
|
||||
punpcklwd m6, m14, m15
|
||||
%if %1 > 4
|
||||
punpckhwd m1, m8, m9
|
||||
punpckhwd m3, m10, m11
|
||||
punpckhwd m5, m12, m13
|
||||
punpckhwd m7, m14, m15
|
||||
%endif
|
||||
QPEL_HV_COMPUTE %1, 14, my, ackusdw
|
||||
UNI_COMPUTE %1, %2, m0, m1, [pw_%2]
|
||||
PEL_%2STORE%1 dstq, m0, m1
|
||||
|
||||
%if %1 <= 4
|
||||
movq m8, m9
|
||||
movq m9, m10
|
||||
movq m10, m11
|
||||
movq m11, m12
|
||||
movq m12, m13
|
||||
movq m13, m14
|
||||
movq m14, m15
|
||||
%else
|
||||
mova m8, m9
|
||||
mova m9, m10
|
||||
mova m10, m11
|
||||
mova m11, m12
|
||||
mova m12, m13
|
||||
mova m13, m14
|
||||
mova m14, m15
|
||||
%endif
|
||||
add dstq, dststrideq ; dst += dststride
|
||||
add srcq, srcstrideq ; src += srcstride
|
||||
dec heightd ; cmp height
|
||||
jnz .loop ; height loop
|
||||
RET
|
||||
|
||||
cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter
|
||||
%if cpuflag(avx2)
|
||||
@ -1613,22 +1185,22 @@ WEIGHTING_FUNCS 4, 12
|
||||
WEIGHTING_FUNCS 6, 12
|
||||
WEIGHTING_FUNCS 8, 12
|
||||
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 2, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 4, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 6, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 8, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 12, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 16, 8
|
||||
HEVC_BI_PEL_PIXELS 2, 8
|
||||
HEVC_BI_PEL_PIXELS 4, 8
|
||||
HEVC_BI_PEL_PIXELS 6, 8
|
||||
HEVC_BI_PEL_PIXELS 8, 8
|
||||
HEVC_BI_PEL_PIXELS 12, 8
|
||||
HEVC_BI_PEL_PIXELS 16, 8
|
||||
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 2, 10
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 4, 10
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 6, 10
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 8, 10
|
||||
HEVC_BI_PEL_PIXELS 2, 10
|
||||
HEVC_BI_PEL_PIXELS 4, 10
|
||||
HEVC_BI_PEL_PIXELS 6, 10
|
||||
HEVC_BI_PEL_PIXELS 8, 10
|
||||
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 2, 12
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 4, 12
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 6, 12
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 8, 12
|
||||
HEVC_BI_PEL_PIXELS 2, 12
|
||||
HEVC_BI_PEL_PIXELS 4, 12
|
||||
HEVC_BI_PEL_PIXELS 6, 12
|
||||
HEVC_BI_PEL_PIXELS 8, 12
|
||||
|
||||
HEVC_PUT_HEVC_EPEL 2, 8
|
||||
HEVC_PUT_HEVC_EPEL 4, 8
|
||||
@ -1693,8 +1265,8 @@ HEVC_PUT_HEVC_QPEL_HV 8, 12
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2 ; adds ff_ and _avx2 to function name & enables 256b registers : m0 for 256b, xm0 for 128b. cpuflag(avx2) = 1 / notcpuflag(avx) = 0
|
||||
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 32, 8
|
||||
HEVC_PUT_HEVC_PEL_PIXELS 16, 10
|
||||
HEVC_BI_PEL_PIXELS 32, 8
|
||||
HEVC_BI_PEL_PIXELS 16, 10
|
||||
|
||||
HEVC_PUT_HEVC_EPEL 32, 8
|
||||
HEVC_PUT_HEVC_EPEL 16, 10
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Seppo Tomperi
|
||||
* Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
|
||||
* Copyright (c) 2013-2014 Pierre-Edouard Lepere
|
||||
* Copyright (c) 2023-2024 Wu Jianhua
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
@ -27,6 +28,7 @@
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
#include "libavcodec/x86/hevcdsp.h"
|
||||
#include "libavcodec/x86/h26x/h2656dsp.h"
|
||||
|
||||
#define LFC_FUNC(DIR, DEPTH, OPT) \
|
||||
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
|
||||
@ -83,6 +85,110 @@ void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
|
||||
IDCT_FUNCS(sse2)
|
||||
IDCT_FUNCS(avx)
|
||||
|
||||
|
||||
#define ff_hevc_pel_filters ff_hevc_qpel_filters
|
||||
#define DECL_HV_FILTER(f) \
|
||||
const uint8_t *hf = ff_hevc_ ## f ## _filters[mx - 1]; \
|
||||
const uint8_t *vf = ff_hevc_ ## f ## _filters[my - 1];
|
||||
|
||||
#define FW_PUT(p, a, b, depth, opt) \
|
||||
void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, intptr_t mx, intptr_t my,int width) \
|
||||
{ \
|
||||
DECL_HV_FILTER(p) \
|
||||
ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, src, srcstride, height, hf, vf, width); \
|
||||
}
|
||||
|
||||
#define FW_PUT_UNI(p, a, b, depth, opt) \
|
||||
void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int height, intptr_t mx, intptr_t my, int width) \
|
||||
{ \
|
||||
DECL_HV_FILTER(p) \
|
||||
ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
|
||||
}
|
||||
|
||||
#if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
|
||||
|
||||
#define FW_PUT_FUNCS(p, a, b, depth, opt) \
|
||||
FW_PUT(p, a, b, depth, opt) \
|
||||
FW_PUT_UNI(p, a, b, depth, opt)
|
||||
|
||||
#define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
|
||||
|
||||
#define FW_DIR(npel, n, w, depth, opt) \
|
||||
FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
|
||||
FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
|
||||
|
||||
#define FW_DIR_HV(npel, n, w, depth, opt) \
|
||||
FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
|
||||
|
||||
FW_PEL(4, 8, sse4);
|
||||
FW_PEL(6, 8, sse4);
|
||||
FW_PEL(8, 8, sse4);
|
||||
FW_PEL(12, 8, sse4);
|
||||
FW_PEL(16, 8, sse4);
|
||||
FW_PEL(4, 10, sse4);
|
||||
FW_PEL(6, 10, sse4);
|
||||
FW_PEL(8, 10, sse4);
|
||||
FW_PEL(4, 12, sse4);
|
||||
FW_PEL(6, 12, sse4);
|
||||
FW_PEL(8, 12, sse4);
|
||||
|
||||
#define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
|
||||
#define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
|
||||
#define FW_EPEL_FUNCS(w, depth, opt) \
|
||||
FW_EPEL(w, depth, opt) \
|
||||
FW_EPEL_HV(w, depth, opt)
|
||||
|
||||
FW_EPEL(12, 8, sse4);
|
||||
|
||||
FW_EPEL_FUNCS(4, 8, sse4);
|
||||
FW_EPEL_FUNCS(6, 8, sse4);
|
||||
FW_EPEL_FUNCS(8, 8, sse4);
|
||||
FW_EPEL_FUNCS(16, 8, sse4);
|
||||
FW_EPEL_FUNCS(4, 10, sse4);
|
||||
FW_EPEL_FUNCS(6, 10, sse4);
|
||||
FW_EPEL_FUNCS(8, 10, sse4);
|
||||
FW_EPEL_FUNCS(4, 12, sse4);
|
||||
FW_EPEL_FUNCS(6, 12, sse4);
|
||||
FW_EPEL_FUNCS(8, 12, sse4);
|
||||
|
||||
#define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
|
||||
#define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
|
||||
#define FW_QPEL_FUNCS(w, depth, opt) \
|
||||
FW_QPEL(w, depth, opt) \
|
||||
FW_QPEL_HV(w, depth, opt)
|
||||
|
||||
FW_QPEL(12, 8, sse4);
|
||||
FW_QPEL(16, 8, sse4);
|
||||
|
||||
FW_QPEL_FUNCS(4, 8, sse4);
|
||||
FW_QPEL_FUNCS(8, 8, sse4);
|
||||
FW_QPEL_FUNCS(4, 10, sse4);
|
||||
FW_QPEL_FUNCS(8, 10, sse4);
|
||||
FW_QPEL_FUNCS(4, 12, sse4);
|
||||
FW_QPEL_FUNCS(8, 12, sse4);
|
||||
|
||||
#ifdef HAVE_AVX2_EXTERNAL
|
||||
|
||||
FW_PEL(32, 8, avx2);
|
||||
FW_PUT(pel, pel_pixels16, pixels16, 10, avx2);
|
||||
|
||||
FW_EPEL(32, 8, avx2);
|
||||
FW_EPEL(16, 10, avx2);
|
||||
|
||||
FW_EPEL_HV(32, 8, avx2);
|
||||
FW_EPEL_HV(16, 10, avx2);
|
||||
|
||||
FW_QPEL(32, 8, avx2);
|
||||
FW_QPEL(16, 10, avx2);
|
||||
|
||||
FW_QPEL_HV(16, 10, avx2);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define mc_rep_func(name, bitd, step, W, opt) \
|
||||
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
|
||||
const uint8_t *_src, ptrdiff_t _srcstride, int height, \
|
||||
|
Loading…
x
Reference in New Issue
Block a user