dsputil: create 16/32-bit dctcoef versions of some functions
High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
		
							parent
							
								
									0a72533e98
								
							
						
					
					
						commit
						5cc2600964
					
				| @ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||||
|     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) |     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #define BIT_DEPTH_FUNCS(depth)\ | #define BIT_DEPTH_FUNCS(depth, dct)\ | ||||||
|     c->draw_edges                    = FUNCC(draw_edges            , depth);\ |     c->draw_edges                    = FUNCC(draw_edges            , depth);\ | ||||||
|     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\ |     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\ | ||||||
|     c->clear_block                   = FUNCC(clear_block           , depth);\ |     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\ | ||||||
|     c->clear_blocks                  = FUNCC(clear_blocks          , depth);\ |     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\ | ||||||
|     c->add_pixels8                   = FUNCC(add_pixels8           , depth);\ |     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\ | ||||||
|     c->add_pixels4                   = FUNCC(add_pixels4           , depth);\ |     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\ | ||||||
|     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\ |     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\ | ||||||
|     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\ |     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\ | ||||||
| \ | \ | ||||||
| @ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||||
| 
 | 
 | ||||||
|     switch (avctx->bits_per_raw_sample) { |     switch (avctx->bits_per_raw_sample) { | ||||||
|     case 9: |     case 9: | ||||||
|         BIT_DEPTH_FUNCS(9); |         if (c->dct_bits == 32) { | ||||||
|  |             BIT_DEPTH_FUNCS(9, _32); | ||||||
|  |         } else { | ||||||
|  |             BIT_DEPTH_FUNCS(9, _16); | ||||||
|  |         } | ||||||
|         break; |         break; | ||||||
|     case 10: |     case 10: | ||||||
|         BIT_DEPTH_FUNCS(10); |         if (c->dct_bits == 32) { | ||||||
|  |             BIT_DEPTH_FUNCS(10, _32); | ||||||
|  |         } else { | ||||||
|  |             BIT_DEPTH_FUNCS(10, _16); | ||||||
|  |         } | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); |         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); | ||||||
|     case 8: |     case 8: | ||||||
|         BIT_DEPTH_FUNCS(8); |         BIT_DEPTH_FUNCS(8, _16); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin | |||||||
|  * DSPContext. |  * DSPContext. | ||||||
|  */ |  */ | ||||||
| typedef struct DSPContext { | typedef struct DSPContext { | ||||||
|  |     /**
 | ||||||
|  |      * Size of DCT coefficients. | ||||||
|  |      */ | ||||||
|  |     int dct_bits; | ||||||
|  | 
 | ||||||
|     /* pixel ops : interface with DCT */ |     /* pixel ops : interface with DCT */ | ||||||
|     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||||||
|     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); |     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||||||
|  | |||||||
| @ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) | #define DCTELEM_FUNCS(dctcoef, suffix)                                  \ | ||||||
| { | static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \ | ||||||
|     int i; |                                          DCTELEM *_block,               \ | ||||||
|     pixel *restrict pixels = (pixel *restrict)_pixels; |                                          int line_size)                 \ | ||||||
|     dctcoef *block = (dctcoef*)_block; | {                                                                       \ | ||||||
|     line_size /= sizeof(pixel); |     int i;                                                              \ | ||||||
| 
 |     pixel *restrict pixels = (pixel *restrict)_pixels;                  \ | ||||||
|     for(i=0;i<8;i++) { |     dctcoef *block = (dctcoef*)_block;                                  \ | ||||||
|         pixels[0] += block[0]; |     line_size /= sizeof(pixel);                                         \ | ||||||
|         pixels[1] += block[1]; |                                                                         \ | ||||||
|         pixels[2] += block[2]; |     for(i=0;i<8;i++) {                                                  \ | ||||||
|         pixels[3] += block[3]; |         pixels[0] += block[0];                                          \ | ||||||
|         pixels[4] += block[4]; |         pixels[1] += block[1];                                          \ | ||||||
|         pixels[5] += block[5]; |         pixels[2] += block[2];                                          \ | ||||||
|         pixels[6] += block[6]; |         pixels[3] += block[3];                                          \ | ||||||
|         pixels[7] += block[7]; |         pixels[4] += block[4];                                          \ | ||||||
|         pixels += line_size; |         pixels[5] += block[5];                                          \ | ||||||
|         block += 8; |         pixels[6] += block[6];                                          \ | ||||||
|     } |         pixels[7] += block[7];                                          \ | ||||||
|  |         pixels += line_size;                                            \ | ||||||
|  |         block += 8;                                                     \ | ||||||
|  |     }                                                                   \ | ||||||
|  | }                                                                       \ | ||||||
|  |                                                                         \ | ||||||
|  | static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \ | ||||||
|  |                                          DCTELEM *_block,               \ | ||||||
|  |                                          int line_size)                 \ | ||||||
|  | {                                                                       \ | ||||||
|  |     int i;                                                              \ | ||||||
|  |     pixel *restrict pixels = (pixel *restrict)_pixels;                  \ | ||||||
|  |     dctcoef *block = (dctcoef*)_block;                                  \ | ||||||
|  |     line_size /= sizeof(pixel);                                         \ | ||||||
|  |                                                                         \ | ||||||
|  |     for(i=0;i<4;i++) {                                                  \ | ||||||
|  |         pixels[0] += block[0];                                          \ | ||||||
|  |         pixels[1] += block[1];                                          \ | ||||||
|  |         pixels[2] += block[2];                                          \ | ||||||
|  |         pixels[3] += block[3];                                          \ | ||||||
|  |         pixels += line_size;                                            \ | ||||||
|  |         block += 4;                                                     \ | ||||||
|  |     }                                                                   \ | ||||||
|  | }                                                                       \ | ||||||
|  |                                                                         \ | ||||||
|  | static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \ | ||||||
|  | {                                                                       \ | ||||||
|  |     memset(block, 0, sizeof(dctcoef)*64);                               \ | ||||||
|  | }                                                                       \ | ||||||
|  |                                                                         \ | ||||||
|  | /**                                                                     \
 | ||||||
|  |  * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \ | ||||||
|  |  */                                                                     \ | ||||||
|  | static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \ | ||||||
|  | {                                                                       \ | ||||||
|  |     memset(blocks, 0, sizeof(dctcoef)*6*64);                            \ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) | DCTELEM_FUNCS(DCTELEM, _16) | ||||||
| { | #if BIT_DEPTH > 8 | ||||||
|     int i; | DCTELEM_FUNCS(dctcoef, _32) | ||||||
|     pixel *restrict pixels = (pixel *restrict)_pixels; | #endif | ||||||
|     dctcoef *block = (dctcoef*)_block; |  | ||||||
|     line_size /= sizeof(pixel); |  | ||||||
| 
 |  | ||||||
|     for(i=0;i<4;i++) { |  | ||||||
|         pixels[0] += block[0]; |  | ||||||
|         pixels[1] += block[1]; |  | ||||||
|         pixels[2] += block[2]; |  | ||||||
|         pixels[3] += block[3]; |  | ||||||
|         pixels += line_size; |  | ||||||
|         block += 4; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| #define PIXOP2(OPNAME, OP) \ | #define PIXOP2(OPNAME, OP) \ | ||||||
| static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||||||
| @ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | |||||||
| void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | ||||||
|     FUNCC(avg_pixels16)(dst, src, stride, 16); |     FUNCC(avg_pixels16)(dst, src, stride, 16); | ||||||
| } | } | ||||||
| 
 |  | ||||||
| static void FUNCC(clear_block)(DCTELEM *block) |  | ||||||
| { |  | ||||||
|     memset(block, 0, sizeof(dctcoef)*64); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * memset(blocks, 0, sizeof(DCTELEM)*6*64) |  | ||||||
|  */ |  | ||||||
| static void FUNCC(clear_blocks)(DCTELEM *blocks) |  | ||||||
| { |  | ||||||
|     memset(blocks, 0, sizeof(dctcoef)*6*64); |  | ||||||
| } |  | ||||||
|  | |||||||
| @ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ | |||||||
| 
 | 
 | ||||||
|                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); |                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); | ||||||
|                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); |                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); | ||||||
|  |                     s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; | ||||||
|                     dsputil_init(&s->dsp, s->avctx); |                     dsputil_init(&s->dsp, s->avctx); | ||||||
|                 } else { |                 } else { | ||||||
|                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); |                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user