diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits. Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
This commit is contained in:
parent
244d22452c
commit
80721cc1ff
@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEQUANT_SUBBAND(PX) \
|
||||||
|
static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \
|
||||||
|
const int qf, const int qs, int tot_v, int tot_h) \
|
||||||
|
{ \
|
||||||
|
int i, y; \
|
||||||
|
for (y = 0; y < tot_v; y++) { \
|
||||||
|
PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
|
||||||
|
for (i = 0; i < tot_h; i++) { \
|
||||||
|
c = *src_r++; \
|
||||||
|
sign = FFSIGN(c)*(!!c); \
|
||||||
|
c = (FFABS(c)*qf + qs) >> 2; \
|
||||||
|
*dst_r++ = c*sign; \
|
||||||
|
} \
|
||||||
|
src += tot_h << (sizeof(PX) >> 1); \
|
||||||
|
dst += stride; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEQUANT_SUBBAND(int16_t)
|
||||||
|
DEQUANT_SUBBAND(int32_t)
|
||||||
|
|
||||||
#define PIXFUNC(PFX, WIDTH) \
|
#define PIXFUNC(PFX, WIDTH) \
|
||||||
c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
|
c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
|
||||||
c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
|
c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
|
||||||
@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
|
|||||||
c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
|
c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
|
||||||
c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
|
c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
|
||||||
|
|
||||||
|
c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c;
|
||||||
|
c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c;
|
||||||
|
|
||||||
PIXFUNC(put, 8);
|
PIXFUNC(put, 8);
|
||||||
PIXFUNC(put, 16);
|
PIXFUNC(put, 16);
|
||||||
PIXFUNC(put, 32);
|
PIXFUNC(put, 32);
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#define AVCODEC_DIRACDSP_H
|
#define AVCODEC_DIRACDSP_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
|
typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
|
||||||
typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
|
typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
|
||||||
@ -46,6 +47,9 @@ typedef struct {
|
|||||||
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
|
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
|
||||||
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
|
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
|
||||||
|
|
||||||
|
/* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */
|
||||||
|
void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||||
|
|
||||||
dirac_weight_func weight_dirac_pixels_tab[3];
|
dirac_weight_func weight_dirac_pixels_tab[3];
|
||||||
dirac_biweight_func biweight_dirac_pixels_tab[3];
|
dirac_biweight_func biweight_dirac_pixels_tab[3];
|
||||||
} DiracDSPContext;
|
} DiracDSPContext;
|
||||||
|
@ -263,3 +263,40 @@ ADD_RECT sse2
|
|||||||
HPEL_FILTER sse2
|
HPEL_FILTER sse2
|
||||||
ADD_OBMC 32, sse2
|
ADD_OBMC 32, sse2
|
||||||
ADD_OBMC 16, sse2
|
ADD_OBMC 16, sse2
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
|
||||||
|
; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
|
||||||
|
cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
|
||||||
|
movd m2, qfd
|
||||||
|
movd m3, qsd
|
||||||
|
SPLATD m2
|
||||||
|
SPLATD m3
|
||||||
|
mov r4, tot_hq
|
||||||
|
mov r3, dstq
|
||||||
|
|
||||||
|
.loop_v:
|
||||||
|
mov tot_hq, r4
|
||||||
|
mov dstq, r3
|
||||||
|
|
||||||
|
.loop_h:
|
||||||
|
movu m0, [srcq]
|
||||||
|
|
||||||
|
pabsd m1, m0
|
||||||
|
pmulld m1, m2
|
||||||
|
paddd m1, m3
|
||||||
|
psrld m1, 2
|
||||||
|
psignd m1, m0
|
||||||
|
|
||||||
|
movu [dstq], m1
|
||||||
|
|
||||||
|
add srcq, mmsize
|
||||||
|
add dstq, mmsize
|
||||||
|
sub tot_hd, 4
|
||||||
|
jg .loop_h
|
||||||
|
|
||||||
|
add r3, strideq
|
||||||
|
dec tot_vd
|
||||||
|
jg .loop_v
|
||||||
|
|
||||||
|
RET
|
||||||
|
@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src,
|
|||||||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||||
|
|
||||||
|
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
|
|
||||||
#define HPEL_FILTER(MMSIZE, EXT) \
|
#define HPEL_FILTER(MMSIZE, EXT) \
|
||||||
@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
|
|||||||
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
|
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
|
||||||
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
|
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(mm_flags)) {
|
||||||
|
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user