vp9: 10/12bpp SIMD (sse2/ssse3/avx) for directional intra prediction.
This commit is contained in:
parent
26ece7a511
commit
061b67fb50
@ -85,3 +85,5 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pd_16) = { 0x0000001000000010ULL, 0x000
|
|||||||
0x0000001000000010ULL, 0x0000001000000010ULL };
|
0x0000001000000010ULL, 0x0000001000000010ULL };
|
||||||
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL,
|
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL,
|
||||||
0x0000002000000020ULL, 0x0000002000000020ULL };
|
0x0000002000000020ULL, 0x0000002000000020ULL };
|
||||||
|
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
|
||||||
|
0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL };
|
||||||
|
@ -65,5 +65,6 @@ extern const xmm_reg ff_ps_neg;
|
|||||||
extern const ymm_reg ff_pd_1;
|
extern const ymm_reg ff_pd_1;
|
||||||
extern const ymm_reg ff_pd_16;
|
extern const ymm_reg ff_pd_16;
|
||||||
extern const ymm_reg ff_pd_32;
|
extern const ymm_reg ff_pd_32;
|
||||||
|
extern const ymm_reg ff_pd_65535;
|
||||||
|
|
||||||
#endif /* AVCODEC_X86_CONSTANTS_H */
|
#endif /* AVCODEC_X86_CONSTANTS_H */
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
SECTION_RODATA 32
|
SECTION_RODATA 32
|
||||||
|
|
||||||
|
cextern pd_65535
|
||||||
cextern pw_1023
|
cextern pw_1023
|
||||||
%define pw_pixel_max pw_1023
|
%define pw_pixel_max pw_1023
|
||||||
cextern pw_16
|
cextern pw_16
|
||||||
@ -42,7 +43,6 @@ unpad: times 8 dw 16*1022/32 ; needs to be mod 16
|
|||||||
tap1: times 4 dw 1, -5
|
tap1: times 4 dw 1, -5
|
||||||
tap2: times 4 dw 20, 20
|
tap2: times 4 dw 20, 20
|
||||||
tap3: times 4 dw -5, 1
|
tap3: times 4 dw -5, 1
|
||||||
pd_0f: times 4 dd 0xffff
|
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
@ -708,7 +708,7 @@ h%1_loop_op:
|
|||||||
psrad m1, 10
|
psrad m1, 10
|
||||||
psrad m2, 10
|
psrad m2, 10
|
||||||
pslld m2, 16
|
pslld m2, 16
|
||||||
pand m1, [pd_0f]
|
pand m1, [pd_65535]
|
||||||
por m1, m2
|
por m1, m2
|
||||||
%if num_mmregs <= 8
|
%if num_mmregs <= 8
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
|
@ -165,6 +165,10 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
|
|||||||
init_ipred_func(type, enum, 16, bpp, opt); \
|
init_ipred_func(type, enum, 16, bpp, opt); \
|
||||||
init_ipred_func(type, enum, 32, bpp, opt)
|
init_ipred_func(type, enum, 32, bpp, opt)
|
||||||
|
|
||||||
|
#define init_ipred_funcs(type, enum, bpp, opt) \
|
||||||
|
init_ipred_func(type, enum, 4, bpp, opt); \
|
||||||
|
init_8_16_32_ipred_funcs(type, enum, bpp, opt)
|
||||||
|
|
||||||
void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp);
|
void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp);
|
||||||
void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp);
|
void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp);
|
||||||
void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
|
void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
|
||||||
|
@ -51,6 +51,18 @@ decl_ipred_fns(h, 16, mmxext, sse2);
|
|||||||
decl_ipred_fns(dc, 16, mmxext, sse2);
|
decl_ipred_fns(dc, 16, mmxext, sse2);
|
||||||
decl_ipred_fns(dc_top, 16, mmxext, sse2);
|
decl_ipred_fns(dc_top, 16, mmxext, sse2);
|
||||||
decl_ipred_fns(dc_left, 16, mmxext, sse2);
|
decl_ipred_fns(dc_left, 16, mmxext, sse2);
|
||||||
|
|
||||||
|
#define decl_ipred_dir_funcs(type) \
|
||||||
|
decl_ipred_fns(type, 16, sse2, sse2); \
|
||||||
|
decl_ipred_fns(type, 16, ssse3, ssse3); \
|
||||||
|
decl_ipred_fns(type, 16, avx, avx)
|
||||||
|
|
||||||
|
decl_ipred_dir_funcs(dl);
|
||||||
|
decl_ipred_dir_funcs(dr);
|
||||||
|
decl_ipred_dir_funcs(vl);
|
||||||
|
decl_ipred_dir_funcs(vr);
|
||||||
|
decl_ipred_dir_funcs(hu);
|
||||||
|
decl_ipred_dir_funcs(hd);
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
|
||||||
av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
|
av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
|
||||||
@ -88,12 +100,33 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
|
|||||||
init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
|
init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
|
||||||
init_8_16_32_ipred_funcs(dc_top, TOP_DC, 16, sse2);
|
init_8_16_32_ipred_funcs(dc_top, TOP_DC, 16, sse2);
|
||||||
init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
|
init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
|
||||||
|
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
|
||||||
|
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
|
||||||
|
init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
|
||||||
|
init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
|
||||||
|
init_ipred_funcs(hu, HOR_UP, 16, sse2);
|
||||||
|
init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
|
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
|
||||||
|
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
|
||||||
|
init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
|
||||||
|
init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
|
||||||
|
init_ipred_funcs(hu, HOR_UP, 16, ssse3);
|
||||||
|
init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
init_fpel_func(2, 0, 32, put, , avx);
|
init_fpel_func(2, 0, 32, put, , avx);
|
||||||
init_fpel_func(1, 0, 64, put, , avx);
|
init_fpel_func(1, 0, 64, put, , avx);
|
||||||
init_fpel_func(0, 0, 128, put, , avx);
|
init_fpel_func(0, 0, 128, put, , avx);
|
||||||
|
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
|
||||||
|
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
|
||||||
|
init_ipred_funcs(vl, VERT_LEFT, 16, avx);
|
||||||
|
init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
|
||||||
|
init_ipred_funcs(hu, HOR_UP, 16, avx);
|
||||||
|
init_ipred_funcs(hd, HOR_DOWN, 16, avx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user