XvMC speedup by removing one memcpy and doing MB packing
Originally committed as revision 2442 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		
							parent
							
								
									6b56c616d9
								
							
						
					
					
						commit
						a579db0c4f
					
				| @ -72,6 +72,8 @@ static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred); | |||||||
| #ifdef HAVE_XVMC | #ifdef HAVE_XVMC | ||||||
| extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx); | extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx); | ||||||
| extern int XVMC_field_end(MpegEncContext *s); | extern int XVMC_field_end(MpegEncContext *s); | ||||||
|  | extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp); | ||||||
|  | extern void XVMC_init_block(s);//set s->block
 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||||
| @ -1083,15 +1085,24 @@ static int mpeg_decode_mb(MpegEncContext *s, | |||||||
|         }else |         }else | ||||||
|             memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */ |             memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */ | ||||||
|         s->mb_intra = 1; |         s->mb_intra = 1; | ||||||
|  | #ifdef HAVE_XVMC | ||||||
|  |         //one 1 we memcpy blocks in xvmcvideo
 | ||||||
|  |         if(s->avctx->xvmc_acceleration > 1){ | ||||||
|  |             XVMC_pack_pblocks(s,-1);//inter are always full blocks
 | ||||||
|  |             if(s->swap_uv){ | ||||||
|  |                 exchange_uv(s); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
|         if (s->codec_id == CODEC_ID_MPEG2VIDEO) { |         if (s->codec_id == CODEC_ID_MPEG2VIDEO) { | ||||||
|             for(i=0;i<6;i++) { |             for(i=0;i<6;i++) { | ||||||
|                 if (mpeg2_decode_block_intra(s, block[i], i) < 0) |                 if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0) | ||||||
|                     return -1; |                     return -1; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             for(i=0;i<6;i++) { |             for(i=0;i<6;i++) { | ||||||
|                 if (mpeg1_decode_block_intra(s, block[i], i) < 0) |                 if (mpeg1_decode_block_intra(s, s->pblocks[i], i) < 0) | ||||||
|                     return -1; |                     return -1; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @ -1262,10 +1273,20 @@ static int mpeg_decode_mb(MpegEncContext *s, | |||||||
|             } |             } | ||||||
|             cbp++; |             cbp++; | ||||||
| 
 | 
 | ||||||
|  | #ifdef HAVE_XVMC | ||||||
|  |             //on 1 we memcpy blocks in xvmcvideo
 | ||||||
|  |             if(s->avctx->xvmc_acceleration > 1){ | ||||||
|  |                 XVMC_pack_pblocks(s,cbp); | ||||||
|  |                 if(s->swap_uv){ | ||||||
|  |                     exchange_uv(s); | ||||||
|  |                 } | ||||||
|  |             }     | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|             if (s->codec_id == CODEC_ID_MPEG2VIDEO) { |             if (s->codec_id == CODEC_ID_MPEG2VIDEO) { | ||||||
|                 for(i=0;i<6;i++) { |                 for(i=0;i<6;i++) { | ||||||
|                     if (cbp & 32) { |                     if (cbp & 32) { | ||||||
|                         if (mpeg2_decode_block_non_intra(s, block[i], i) < 0) |                         if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0) | ||||||
|                             return -1; |                             return -1; | ||||||
|                     } else { |                     } else { | ||||||
|                         s->block_last_index[i] = -1; |                         s->block_last_index[i] = -1; | ||||||
| @ -1275,7 +1296,7 @@ static int mpeg_decode_mb(MpegEncContext *s, | |||||||
|             } else { |             } else { | ||||||
|                 for(i=0;i<6;i++) { |                 for(i=0;i<6;i++) { | ||||||
|                     if (cbp & 32) { |                     if (cbp & 32) { | ||||||
|                         if (mpeg1_decode_block_inter(s, block[i], i) < 0) |                         if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0) | ||||||
|                             return -1; |                             return -1; | ||||||
|                     } else { |                     } else { | ||||||
|                         s->block_last_index[i] = -1; |                         s->block_last_index[i] = -1; | ||||||
| @ -1960,10 +1981,12 @@ static void mpeg_decode_extension(AVCodecContext *avctx, | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void exchange_uv(AVFrame *f){ | static void exchange_uv(MpegEncContext *s){ | ||||||
|     uint8_t *t= f->data[1]; | short * tmp; | ||||||
|     f->data[1]= f->data[2]; | 
 | ||||||
|     f->data[2]= t; |     tmp = s->pblocks[4]; | ||||||
|  |     s->pblocks[4] = s->pblocks[5]; | ||||||
|  |     s->pblocks[5] = tmp; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define DECODE_SLICE_FATAL_ERROR -2 | #define DECODE_SLICE_FATAL_ERROR -2 | ||||||
| @ -2093,6 +2116,12 @@ static int mpeg_decode_slice(AVCodecContext *avctx, | |||||||
|     ff_init_block_index(s); |     ff_init_block_index(s); | ||||||
| 
 | 
 | ||||||
|     for(;;) { |     for(;;) { | ||||||
|  | #ifdef HAVE_XVMC | ||||||
|  |         //one 1 we memcpy blocks in xvmcvideo
 | ||||||
|  |         if(s->avctx->xvmc_acceleration > 1) | ||||||
|  |             XVMC_init_block(s);//set s->block
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| 	s->dsp.clear_blocks(s->block[0]); | 	s->dsp.clear_blocks(s->block[0]); | ||||||
| 
 | 
 | ||||||
|         ret = mpeg_decode_mb(s, s->block); |         ret = mpeg_decode_mb(s, s->block); | ||||||
| @ -2133,14 +2162,9 @@ static int mpeg_decode_slice(AVCodecContext *avctx, | |||||||
|         MPV_decode_mb(s, s->block); |         MPV_decode_mb(s, s->block); | ||||||
|          |          | ||||||
|         if (++s->mb_x >= s->mb_width) { |         if (++s->mb_x >= s->mb_width) { | ||||||
|             if(s->avctx->codec_tag == ff_get_fourcc("VCR2")) |  | ||||||
|                 exchange_uv((AVFrame*)s->current_picture_ptr); |  | ||||||
| 
 | 
 | ||||||
|             ff_draw_horiz_band(s, 16*s->mb_y, 16); |             ff_draw_horiz_band(s, 16*s->mb_y, 16); | ||||||
| 
 | 
 | ||||||
|             if(s->avctx->codec_tag == ff_get_fourcc("VCR2")) |  | ||||||
|                 exchange_uv((AVFrame*)s->current_picture_ptr); |  | ||||||
| 
 |  | ||||||
|             s->mb_x = 0; |             s->mb_x = 0; | ||||||
|             s->mb_y++; |             s->mb_y++; | ||||||
| 
 | 
 | ||||||
| @ -2233,8 +2257,6 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict) | |||||||
|                  ff_print_debug_info(s, s->last_picture_ptr); |                  ff_print_debug_info(s, s->last_picture_ptr); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         if(s->avctx->codec_tag == ff_get_fourcc("VCR2")) |  | ||||||
|             exchange_uv(pict); |  | ||||||
| 
 | 
 | ||||||
|         return 1; |         return 1; | ||||||
|     } else { |     } else { | ||||||
| @ -2294,11 +2316,13 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||||||
|         //get_format() or set_video(width,height,aspect,pix_fmt);
 |         //get_format() or set_video(width,height,aspect,pix_fmt);
 | ||||||
|         //until then pix_fmt may be changed right after codec init
 |         //until then pix_fmt may be changed right after codec init
 | ||||||
|         if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT ) |         if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT ) | ||||||
|             avctx->idct_algo = FF_IDCT_SIMPLE; |             if( avctx->idct_algo == FF_IDCT_AUTO ) | ||||||
|  |                 avctx->idct_algo = FF_IDCT_SIMPLE; | ||||||
| 
 | 
 | ||||||
|         if (MPV_common_init(s) < 0) |         if (MPV_common_init(s) < 0) | ||||||
|             return -1; |             return -1; | ||||||
|         s1->mpeg_enc_ctx_allocated = 1; |         s1->mpeg_enc_ctx_allocated = 1; | ||||||
|  |         s->swap_uv = 0;//just in case vcr2 and mpeg2 stream have been concatinated
 | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     skip_bits(&s->gb, 10); /* vbv_buffer_size */ |     skip_bits(&s->gb, 10); /* vbv_buffer_size */ | ||||||
| @ -2378,10 +2402,13 @@ static int vcr2_init_sequence(AVCodecContext *avctx) | |||||||
|     //get_format() or set_video(width,height,aspect,pix_fmt);
 |     //get_format() or set_video(width,height,aspect,pix_fmt);
 | ||||||
|     //until then pix_fmt may be changed right after codec init
 |     //until then pix_fmt may be changed right after codec init
 | ||||||
|     if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT ) |     if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT ) | ||||||
|         avctx->idct_algo = FF_IDCT_SIMPLE; |         if( avctx->idct_algo == FF_IDCT_AUTO ) | ||||||
|  |             avctx->idct_algo = FF_IDCT_SIMPLE; | ||||||
|      |      | ||||||
|     if (MPV_common_init(s) < 0) |     if (MPV_common_init(s) < 0) | ||||||
|         return -1; |         return -1; | ||||||
|  |     exchange_uv(s);//common init reset pblocks, so we swap them here
 | ||||||
|  |     s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB 
 | ||||||
|     s1->mpeg_enc_ctx_allocated = 1; |     s1->mpeg_enc_ctx_allocated = 1; | ||||||
| 
 | 
 | ||||||
|     for(i=0;i<64;i++) { |     for(i=0;i<64;i++) { | ||||||
| @ -2634,14 +2661,14 @@ static int mpeg_mc_decode_init(AVCodecContext *avctx){ | |||||||
| 
 | 
 | ||||||
|     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) ) |     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) ) | ||||||
|         return -1; |         return -1; | ||||||
|     if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ) |     if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){ | ||||||
|         dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n"); |         dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n"); | ||||||
| 
 |     } | ||||||
|     mpeg_decode_init(avctx); |     mpeg_decode_init(avctx); | ||||||
|     s = avctx->priv_data; |     s = avctx->priv_data; | ||||||
| 
 | 
 | ||||||
|     avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT; |     avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT; | ||||||
|     avctx->xvmc_acceleration = 1; |     avctx->xvmc_acceleration = 2;//2 - the blocks are packed!
 | ||||||
| 
 | 
 | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  | |||||||
| @ -56,7 +56,7 @@ static int sse_mb(MpegEncContext *s); | |||||||
| #ifdef HAVE_XVMC | #ifdef HAVE_XVMC | ||||||
| extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx); | extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx); | ||||||
| extern void XVMC_field_end(MpegEncContext *s); | extern void XVMC_field_end(MpegEncContext *s); | ||||||
| extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); | extern void XVMC_decode_mb(MpegEncContext *s); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c; | void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c; | ||||||
| @ -519,6 +519,10 @@ int MPV_common_init(MpegEncContext *s) | |||||||
|      |      | ||||||
|     s->block= s->blocks[0]; |     s->block= s->blocks[0]; | ||||||
| 
 | 
 | ||||||
|  |     for(i=0;i<12;i++){ | ||||||
|  |         s->pblocks[i] = (short *)(&s->block[i]); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     s->parse_context.state= -1; |     s->parse_context.state= -1; | ||||||
| 
 | 
 | ||||||
|     s->context_initialized = 1; |     s->context_initialized = 1; | ||||||
| @ -2485,7 +2489,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||||||
|     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; |     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; | ||||||
| #ifdef HAVE_XVMC | #ifdef HAVE_XVMC | ||||||
|     if(s->avctx->xvmc_acceleration){ |     if(s->avctx->xvmc_acceleration){ | ||||||
|         XVMC_decode_mb(s,block); |         XVMC_decode_mb(s);//xvmc uses pblocks
 | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|  | |||||||
| @ -655,6 +655,8 @@ typedef struct MpegEncContext { | |||||||
|     int rtp_payload_size; |     int rtp_payload_size; | ||||||
|     void (*rtp_callback)(void *data, int size, int packet_number); |     void (*rtp_callback)(void *data, int size, int packet_number); | ||||||
|     uint8_t *ptr_lastgob; |     uint8_t *ptr_lastgob; | ||||||
|  |     int swap_uv;//vcr2 codec is mpeg2 varint with UV swaped
 | ||||||
|  |     short * pblocks[12]; | ||||||
|      |      | ||||||
|     DCTELEM (*block)[64]; ///< points to one of the following blocks 
 |     DCTELEM (*block)[64]; ///< points to one of the following blocks 
 | ||||||
|     DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
 |     DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
 | ||||||
|  | |||||||
| @ -41,6 +41,33 @@ | |||||||
| 
 | 
 | ||||||
| //#include "xvmc_debug.h"
 | //#include "xvmc_debug.h"
 | ||||||
| 
 | 
 | ||||||
|  | //set s->block
 | ||||||
|  | inline void XVMC_init_block(MpegEncContext *s){ | ||||||
|  | xvmc_render_state_t * render; | ||||||
|  |     render = (xvmc_render_state_t*)s->current_picture.data[2]; | ||||||
|  |     assert(render != NULL); | ||||||
|  |     if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){ | ||||||
|  |         assert(0); | ||||||
|  |         return;//make sure that this is render packet
 | ||||||
|  |     } | ||||||
|  |     s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void XVMC_pack_pblocks(MpegEncContext *s, int cbp){ | ||||||
|  | int i,j; | ||||||
|  | #define numblocks 6 | ||||||
|  | 
 | ||||||
|  |     j=0; | ||||||
|  |     for(i=0;i<numblocks;i++){ | ||||||
|  |         if(cbp & (1<<(numblocks-1-i)) ){ | ||||||
|  |            s->pblocks[i] = (short *)(&s->block[(j++)]); | ||||||
|  |         }else{ | ||||||
|  |            s->pblocks[i] = NULL; | ||||||
|  |         } | ||||||
|  | //        printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
 | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int calc_cbp(MpegEncContext *s, int blocknum){ | static int calc_cbp(MpegEncContext *s, int blocknum){ | ||||||
| /* compute cbp */ | /* compute cbp */ | ||||||
| // for I420 bit_offset=5
 | // for I420 bit_offset=5
 | ||||||
| @ -110,7 +137,7 @@ xvmc_render_state_t * render; | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]){ | void XVMC_decode_mb(MpegEncContext *s){ | ||||||
| XvMCMacroBlock * mv_block; | XvMCMacroBlock * mv_block; | ||||||
| xvmc_render_state_t * render; | xvmc_render_state_t * render; | ||||||
| int i,cbp,blocks_per_mb; | int i,cbp,blocks_per_mb; | ||||||
| @ -242,14 +269,14 @@ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; | |||||||
| */ | */ | ||||||
|     if(s->flags & CODEC_FLAG_GRAY){ |     if(s->flags & CODEC_FLAG_GRAY){ | ||||||
|         if(s->mb_intra){//intra frames are alwasy full chroma block
 |         if(s->mb_intra){//intra frames are alwasy full chroma block
 | ||||||
|             memset(block[4],0,sizeof(short)*8*8);//so we need to clear them
 |             for(i=4; i<blocks_per_mb; i++){ | ||||||
|             memset(block[5],0,sizeof(short)*8*8); |                 memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
 | ||||||
|             if(!render->unsigned_intra) |                 if(!render->unsigned_intra) | ||||||
|                 block[4][0] = block[5][0] = 1<<10; |                     s->pblocks[i][0] = 1<<10; | ||||||
|         } |             } | ||||||
|         else |         }else | ||||||
|             blocks_per_mb = 4;//Luminance blocks only
 |             blocks_per_mb = 4;//Luminance blocks only
 | ||||||
|     }; |     } | ||||||
|     cbp = calc_cbp(s,blocks_per_mb); |     cbp = calc_cbp(s,blocks_per_mb); | ||||||
|     mv_block->coded_block_pattern = cbp; |     mv_block->coded_block_pattern = cbp; | ||||||
|     if(cbp == 0) |     if(cbp == 0) | ||||||
| @ -259,14 +286,24 @@ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; | |||||||
|         if(s->block_last_index[i] >= 0){ |         if(s->block_last_index[i] >= 0){ | ||||||
|             // i do not have unsigned_intra MOCO to test, hope it is OK
 |             // i do not have unsigned_intra MOCO to test, hope it is OK
 | ||||||
|             if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) ) |             if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) ) | ||||||
|                 block[i][0]-=1<<10; |                 s->pblocks[i][0]-=1<<10; | ||||||
|             if(!render->idct){ |             if(!render->idct){ | ||||||
|                 s->dsp.idct(block[i]); |                 s->dsp.idct(s->pblocks[i]); | ||||||
|                 //!!TODO!clip!!!
 |                 //!!TODO!clip!!!
 | ||||||
|             } |             } | ||||||
| //TODO:avoid block copy by modifying s->block pointer
 | //copy blocks only if the codec doesn't support pblocks reordering
 | ||||||
|             memcpy(&render->data_blocks[(render->next_free_data_block_num++)*64], |             if(s->avctx->xvmc_acceleration == 1){ | ||||||
|                     block[i],sizeof(short)*8*8); |                 memcpy(&render->data_blocks[(render->next_free_data_block_num)*64], | ||||||
|  |                         s->pblocks[i],sizeof(short)*8*8); | ||||||
|  |             }else{ | ||||||
|  | /*              if(s->pblocks[i] != &render->data_blocks[
 | ||||||
|  |                         (render->next_free_data_block_num)*64]){ | ||||||
|  |                    printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n", | ||||||
|  |                    s->mb_x,s->mb_y, s->pblocks[i],  | ||||||
|  |                    &render->data_blocks[(render->next_free_data_block_num)*64]); | ||||||
|  |                 }*/ | ||||||
|  |             } | ||||||
|  |             render->next_free_data_block_num++; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     render->filled_mv_blocks_num++; |     render->filled_mv_blocks_num++; | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user