avoid one transpose (730->680 dezicycles on duron)
Originally committed as revision 4332 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
85bbfcd4ee
commit
e4b36d4434
@ -333,6 +333,8 @@ typedef struct H264Context{
|
|||||||
uint8_t *direct_table;
|
uint8_t *direct_table;
|
||||||
uint8_t direct_cache[5*8];
|
uint8_t direct_cache[5*8];
|
||||||
|
|
||||||
|
uint8_t zigzag_scan[16];
|
||||||
|
uint8_t field_scan[16];
|
||||||
}H264Context;
|
}H264Context;
|
||||||
|
|
||||||
static VLC coeff_token_vlc[4];
|
static VLC coeff_token_vlc[4];
|
||||||
@ -2721,6 +2723,18 @@ static int decode_init(AVCodecContext *avctx){
|
|||||||
s->low_delay= 1;
|
s->low_delay= 1;
|
||||||
avctx->pix_fmt= PIX_FMT_YUV420P;
|
avctx->pix_fmt= PIX_FMT_YUV420P;
|
||||||
|
|
||||||
|
if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
|
||||||
|
memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
|
||||||
|
memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
|
||||||
|
}else{
|
||||||
|
int i;
|
||||||
|
for(i=0; i<16; i++){
|
||||||
|
#define T(x) (x>>2) | ((x<<2) & 0xF)
|
||||||
|
h->zigzag_scan[i] = T(zigzag_scan[i]);
|
||||||
|
h-> field_scan[i] = T( field_scan[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
decode_init_vlc(h);
|
decode_init_vlc(h);
|
||||||
|
|
||||||
if(avctx->extradata_size > 0 && avctx->extradata &&
|
if(avctx->extradata_size > 0 && avctx->extradata &&
|
||||||
@ -4591,10 +4605,10 @@ decode_intra_mb:
|
|||||||
// fill_non_zero_count_cache(h);
|
// fill_non_zero_count_cache(h);
|
||||||
|
|
||||||
if(IS_INTERLACED(mb_type)){
|
if(IS_INTERLACED(mb_type)){
|
||||||
scan= field_scan;
|
scan= h->field_scan;
|
||||||
dc_scan= luma_dc_field_scan;
|
dc_scan= luma_dc_field_scan;
|
||||||
}else{
|
}else{
|
||||||
scan= zigzag_scan;
|
scan= h->zigzag_scan;
|
||||||
dc_scan= luma_dc_zigzag_scan;
|
dc_scan= luma_dc_zigzag_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5575,10 +5589,10 @@ decode_intra_mb:
|
|||||||
int dqp;
|
int dqp;
|
||||||
|
|
||||||
if(IS_INTERLACED(mb_type)){
|
if(IS_INTERLACED(mb_type)){
|
||||||
scan= field_scan;
|
scan= h->field_scan;
|
||||||
dc_scan= luma_dc_field_scan;
|
dc_scan= luma_dc_field_scan;
|
||||||
}else{
|
}else{
|
||||||
scan= zigzag_scan;
|
scan= h->zigzag_scan;
|
||||||
dc_scan= luma_dc_zigzag_scan;
|
dc_scan= luma_dc_zigzag_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -673,14 +673,11 @@ void ff_h264_idct_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
|||||||
/* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
|
/* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
|
||||||
IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 )
|
IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 )
|
||||||
|
|
||||||
/* in: 2,4,1,3 out: 2,3,0,1 */
|
|
||||||
TRANSPOSE4( %%mm2, %%mm4, %%mm1, %%mm3, %%mm0 )
|
|
||||||
|
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
:: "m"(ff_pw_32));
|
:: "m"(ff_pw_32));
|
||||||
|
|
||||||
STORE_DIFF_4P( %%mm2, %%mm4, %%mm7, &dst[0*stride] );
|
STORE_DIFF_4P( %%mm2, %%mm0, %%mm7, &dst[0*stride] );
|
||||||
STORE_DIFF_4P( %%mm3, %%mm4, %%mm7, &dst[1*stride] );
|
STORE_DIFF_4P( %%mm4, %%mm0, %%mm7, &dst[1*stride] );
|
||||||
STORE_DIFF_4P( %%mm0, %%mm4, %%mm7, &dst[2*stride] );
|
STORE_DIFF_4P( %%mm1, %%mm0, %%mm7, &dst[2*stride] );
|
||||||
STORE_DIFF_4P( %%mm1, %%mm4, %%mm7, &dst[3*stride] );
|
STORE_DIFF_4P( %%mm3, %%mm0, %%mm7, &dst[3*stride] );
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user