sizeof(s->block) isnt 64*6*2 anymore bugfix
mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		
							parent
							
								
									cf8039b2cf
								
							
						
					
					
						commit
						649c00c96d
					
				@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 | 
				
			|||||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 | 
					void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 | 
				
			||||||
 | 
					void (*clear_blocks)(DCTELEM *blocks);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
op_pixels_abs_func pix_abs16x16;
 | 
					op_pixels_abs_func pix_abs16x16;
 | 
				
			||||||
op_pixels_abs_func pix_abs16x16_x2;
 | 
					op_pixels_abs_func pix_abs16x16_x2;
 | 
				
			||||||
@ -866,6 +867,11 @@ void block_permute(INT16 *block)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void clear_blocks_c(DCTELEM *blocks)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    memset(blocks, 0, sizeof(DCTELEM)*6*64);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void dsputil_init(void)
 | 
					void dsputil_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i, j;
 | 
					    int i, j;
 | 
				
			||||||
@ -890,6 +896,7 @@ void dsputil_init(void)
 | 
				
			|||||||
    put_pixels_clamped = put_pixels_clamped_c;
 | 
					    put_pixels_clamped = put_pixels_clamped_c;
 | 
				
			||||||
    add_pixels_clamped = add_pixels_clamped_c;
 | 
					    add_pixels_clamped = add_pixels_clamped_c;
 | 
				
			||||||
    gmc1= gmc1_c;
 | 
					    gmc1= gmc1_c;
 | 
				
			||||||
 | 
					    clear_blocks= clear_blocks_c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pix_abs16x16     = pix_abs16x16_c;
 | 
					    pix_abs16x16     = pix_abs16x16_c;
 | 
				
			||||||
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
 | 
					    pix_abs16x16_x2  = pix_abs16x16_x2_c;
 | 
				
			||||||
 | 
				
			|||||||
@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 | 
				
			|||||||
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 | 
					extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 | 
				
			||||||
 | 
					extern void (*clear_blocks)(DCTELEM *blocks);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
 | 
					void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
 | 
				
			||||||
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
					void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 | 
				
			||||||
 | 
					void clear_blocks_c(DCTELEM *blocks);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* add and put pixel (decoding) */
 | 
					/* add and put pixel (decoding) */
 | 
				
			||||||
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
 | 
					typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
 | 
				
			||||||
 | 
				
			|||||||
@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
 | 
				
			|||||||
        if (s->mb_y && !s->h263_pred) {
 | 
					        if (s->mb_y && !s->h263_pred) {
 | 
				
			||||||
            s->first_gob_line = h263_decode_gob_header(s);
 | 
					            s->first_gob_line = h263_decode_gob_header(s);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
 | 
					        s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
 | 
				
			||||||
        s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
 | 
					        s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
 | 
				
			||||||
        s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
 | 
					        s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
 | 
				
			||||||
@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx,
 | 
				
			|||||||
                s->y_dc_scale = 8;
 | 
					                s->y_dc_scale = 8;
 | 
				
			||||||
                s->c_dc_scale = 8;
 | 
					                s->c_dc_scale = 8;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            clear_blocks(s->block[0]);
 | 
				
			||||||
#ifdef HAVE_MMX
 | 
					            
 | 
				
			||||||
            if (mm_flags & MM_MMX) {
 | 
					 | 
				
			||||||
                asm volatile(
 | 
					 | 
				
			||||||
			"pxor %%mm7, %%mm7		\n\t"
 | 
					 | 
				
			||||||
			"movl $-128*6, %%eax		\n\t"
 | 
					 | 
				
			||||||
			"1:				\n\t"
 | 
					 | 
				
			||||||
			"movq %%mm7, (%0, %%eax)	\n\t"
 | 
					 | 
				
			||||||
			"movq %%mm7, 8(%0, %%eax)	\n\t"
 | 
					 | 
				
			||||||
			"movq %%mm7, 16(%0, %%eax)	\n\t"
 | 
					 | 
				
			||||||
			"movq %%mm7, 24(%0, %%eax)	\n\t"
 | 
					 | 
				
			||||||
			"addl $32, %%eax		\n\t"
 | 
					 | 
				
			||||||
			" js 1b				\n\t"
 | 
					 | 
				
			||||||
			: : "r" (((int)s->block)+128*6)
 | 
					 | 
				
			||||||
			: "%eax"
 | 
					 | 
				
			||||||
                );
 | 
					 | 
				
			||||||
            }else{
 | 
					 | 
				
			||||||
                memset(s->block, 0, sizeof(s->block));
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
            memset(s->block, 0, sizeof(s->block));
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
            s->mv_dir = MV_DIR_FORWARD;
 | 
					            s->mv_dir = MV_DIR_FORWARD;
 | 
				
			||||||
            s->mv_type = MV_TYPE_16X16; 
 | 
					            s->mv_type = MV_TYPE_16X16; 
 | 
				
			||||||
            if (s->h263_msmpeg4) {
 | 
					            if (s->h263_msmpeg4) {
 | 
				
			||||||
 | 
				
			|||||||
@ -1025,6 +1025,23 @@ static void   sub_pixels_xy2_mmx( DCTELEM  *block, const UINT8 *pixels, int line
 | 
				
			|||||||
  } while(--h);
 | 
					  } while(--h);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void clear_blocks_mmx(DCTELEM *blocks)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					        asm volatile(
 | 
				
			||||||
 | 
					                "pxor %%mm7, %%mm7		\n\t"
 | 
				
			||||||
 | 
					                "movl $-128*6, %%eax		\n\t"
 | 
				
			||||||
 | 
					                "1:				\n\t"
 | 
				
			||||||
 | 
					                "movq %%mm7, (%0, %%eax)	\n\t"
 | 
				
			||||||
 | 
					                "movq %%mm7, 8(%0, %%eax)	\n\t"
 | 
				
			||||||
 | 
					                "movq %%mm7, 16(%0, %%eax)	\n\t"
 | 
				
			||||||
 | 
					                "movq %%mm7, 24(%0, %%eax)	\n\t"
 | 
				
			||||||
 | 
					                "addl $32, %%eax		\n\t"
 | 
				
			||||||
 | 
					                " js 1b				\n\t"
 | 
				
			||||||
 | 
					                : : "r" (((int)blocks)+128*6)
 | 
				
			||||||
 | 
					                : "%eax"
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void just_return() { return; }
 | 
					static void just_return() { return; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void dsputil_init_mmx(void)
 | 
					void dsputil_init_mmx(void)
 | 
				
			||||||
@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void)
 | 
				
			|||||||
        get_pixels = get_pixels_mmx;
 | 
					        get_pixels = get_pixels_mmx;
 | 
				
			||||||
        put_pixels_clamped = put_pixels_clamped_mmx;
 | 
					        put_pixels_clamped = put_pixels_clamped_mmx;
 | 
				
			||||||
        add_pixels_clamped = add_pixels_clamped_mmx;
 | 
					        add_pixels_clamped = add_pixels_clamped_mmx;
 | 
				
			||||||
        
 | 
					        clear_blocks= clear_blocks_mmx;
 | 
				
			||||||
 | 
					       
 | 
				
			||||||
        pix_abs16x16     = pix_abs16x16_mmx;
 | 
					        pix_abs16x16     = pix_abs16x16_mmx;
 | 
				
			||||||
        pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
 | 
					        pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
 | 
				
			||||||
        pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
 | 
					        pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
 | 
				
			||||||
 | 
				
			|||||||
@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(;;) {
 | 
					    for(;;) {
 | 
				
			||||||
        memset(s->block, 0, sizeof(s->block));
 | 
					        clear_blocks(s->block[0]);
 | 
				
			||||||
        ret = mpeg_decode_mb(s, s->block);
 | 
					        ret = mpeg_decode_mb(s, s->block);
 | 
				
			||||||
        dprintf("ret=%d\n", ret);
 | 
					        dprintf("ret=%d\n", ret);
 | 
				
			||||||
        if (ret < 0)
 | 
					        if (ret < 0)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user