dsputil: create 16/32-bit dctcoef versions of some functions

High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>
2011-07-21 12:39:41 +01:00 · 2011-07-21 12:39:41 +01:00 · 5cc2600964
commit 5cc2600964
parent 0a72533e98
4 changed files with 80 additions and 56 deletions
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
-#define BIT_DEPTH_FUNCS(depth)\
+#define BIT_DEPTH_FUNCS(depth, dct)\
    c->draw_edges                    = FUNCC(draw_edges            , depth);\
    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
-    c->clear_block                   = FUNCC(clear_block           , depth);\
+    c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
-    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
-    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
-    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
 \
@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
    switch (avctx->bits_per_raw_sample) {
    case 9:
-        BIT_DEPTH_FUNCS(9);
+        if (c->dct_bits == 32) {
            BIT_DEPTH_FUNCS(9, _32);
        } else {
            BIT_DEPTH_FUNCS(9, _16);
        }
        break;
    case 10:
-        BIT_DEPTH_FUNCS(10);
+        if (c->dct_bits == 32) {
            BIT_DEPTH_FUNCS(10, _32);
        } else {
            BIT_DEPTH_FUNCS(10, _16);
        }
        break;
    default:
        av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
    case 8:
-        BIT_DEPTH_FUNCS(8);
+        BIT_DEPTH_FUNCS(8, _16);
        break;
    }
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
 * DSPContext.
 */
 typedef struct DSPContext {
    /**
     * Size of DCT coefficients.
     */
    int dct_bits;
    /* pixel ops : interface with DCT */
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
    }
 }
-static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
+#define DCTELEM_FUNCS(dctcoef, suffix)                                  \
-{
+static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
-    int i;
+                                         DCTELEM *_block,               \
-    pixel *restrict pixels = (pixel *restrict)_pixels;
+                                         int line_size)                 \
-    dctcoef *block = (dctcoef*)_block;
+{                                                                       \
-    line_size /= sizeof(pixel);
+    int i;                                                              \
-
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
-    for(i=0;i<8;i++) {
+    dctcoef *block = (dctcoef*)_block;                                  \
-        pixels[0] += block[0];
+    line_size /= sizeof(pixel);                                         \
-        pixels[1] += block[1];
+                                                                        \
-        pixels[2] += block[2];
+    for(i=0;i<8;i++) {                                                  \
-        pixels[3] += block[3];
+        pixels[0] += block[0];                                          \
-        pixels[4] += block[4];
+        pixels[1] += block[1];                                          \
-        pixels[5] += block[5];
+        pixels[2] += block[2];                                          \
-        pixels[6] += block[6];
+        pixels[3] += block[3];                                          \
-        pixels[7] += block[7];
+        pixels[4] += block[4];                                          \
-        pixels += line_size;
+        pixels[5] += block[5];                                          \
-        block += 8;
+        pixels[6] += block[6];                                          \
-    }
+        pixels[7] += block[7];                                          \
        pixels += line_size;                                            \
        block += 8;                                                     \
    }                                                                   \
 }                                                                       \
                                                                        \
 static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \
                                         DCTELEM *_block,               \
                                         int line_size)                 \
 {                                                                       \
    int i;                                                              \
    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
    dctcoef *block = (dctcoef*)_block;                                  \
    line_size /= sizeof(pixel);                                         \
                                                                        \
    for(i=0;i<4;i++) {                                                  \
        pixels[0] += block[0];                                          \
        pixels[1] += block[1];                                          \
        pixels[2] += block[2];                                          \
        pixels[3] += block[3];                                          \
        pixels += line_size;                                            \
        block += 4;                                                     \
    }                                                                   \
 }                                                                       \
                                                                        \
 static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \
 {                                                                       \
    memset(block, 0, sizeof(dctcoef)*64);                               \
 }                                                                       \
                                                                        \
 /**                                                                     \
 * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \
 */                                                                     \
 static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \
 {                                                                       \
    memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
 }
-static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
+DCTELEM_FUNCS(DCTELEM, _16)
-{
+#if BIT_DEPTH > 8
-    int i;
+DCTELEM_FUNCS(dctcoef, _32)
-    pixel *restrict pixels = (pixel *restrict)_pixels;
+#endif
    dctcoef *block = (dctcoef*)_block;
    line_size /= sizeof(pixel);
    for(i=0;i<4;i++) {
        pixels[0] += block[0];
        pixels[1] += block[1];
        pixels[2] += block[2];
        pixels[3] += block[3];
        pixels += line_size;
        block += 4;
    }
 }
 #define PIXOP2(OPNAME, OP) \
 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
    FUNCC(avg_pixels16)(dst, src, stride, 16);
 }
 static void FUNCC(clear_block)(DCTELEM *block)
 {
    memset(block, 0, sizeof(dctcoef)*64);
 }
 /**
 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
 */
 static void FUNCC(clear_blocks)(DCTELEM *blocks)
 {
    memset(blocks, 0, sizeof(dctcoef)*6*64);
 }
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
                    s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
                    dsputil_init(&s->dsp, s->avctx);
                } else {
                    av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);