Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
once when the scaler is initialized, instead of building them and freeing them over and over. This gives massive performance improvements. patch by Alan Curry, pacman*at*TheWorld*dot*com Originally committed as revision 17589 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
		
							parent
							
								
									c9fa86df9d
								
							
						
					
					
						commit
						d33d485e83
					
				@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
 | 
				
			|||||||
				c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
 | 
									c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
 | 
				
			||||||
				(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
 | 
									(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
 | 
				
			||||||
				srcFilter->chrV, dstFilter->chrV, c->param);
 | 
									srcFilter->chrV, dstFilter->chrV, c->param);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef HAVE_ALTIVEC
 | 
				
			||||||
 | 
							c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
 | 
				
			||||||
 | 
							c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
 | 
				
			||||||
 | 
					                  int j;
 | 
				
			||||||
 | 
							  short *p = (short *)&c->vYCoeffsBank[i];
 | 
				
			||||||
 | 
							  for (j=0;j<8;j++)
 | 
				
			||||||
 | 
							    p[j] = c->vLumFilter[i];
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i=0;i<c->vChrFilterSize*c->dstH;i++) {
 | 
				
			||||||
 | 
					                  int j;
 | 
				
			||||||
 | 
							  short *p = (short *)&c->vCCoeffsBank[i];
 | 
				
			||||||
 | 
							  for (j=0;j<8;j++)
 | 
				
			||||||
 | 
							    p[j] = c->vChrFilter[i];
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Calculate Buffer Sizes so that they won't run out while handling these damn slices
 | 
						// Calculate Buffer Sizes so that they won't run out while handling these damn slices
 | 
				
			||||||
@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){
 | 
				
			|||||||
	c->hLumFilter = NULL;
 | 
						c->hLumFilter = NULL;
 | 
				
			||||||
	if(c->hChrFilter) free(c->hChrFilter);
 | 
						if(c->hChrFilter) free(c->hChrFilter);
 | 
				
			||||||
	c->hChrFilter = NULL;
 | 
						c->hChrFilter = NULL;
 | 
				
			||||||
 | 
					#ifdef HAVE_ALTIVEC
 | 
				
			||||||
 | 
						if(c->vYCoeffsBank) free(c->vYCoeffsBank);
 | 
				
			||||||
 | 
						c->vYCoeffsBank = NULL;
 | 
				
			||||||
 | 
						if(c->vCCoeffsBank) free(c->vCCoeffsBank);
 | 
				
			||||||
 | 
						c->vCCoeffsBank = NULL;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if(c->vLumFilterPos) free(c->vLumFilterPos);
 | 
						if(c->vLumFilterPos) free(c->vLumFilterPos);
 | 
				
			||||||
	c->vLumFilterPos = NULL;
 | 
						c->vLumFilterPos = NULL;
 | 
				
			||||||
 | 
				
			|||||||
@ -154,6 +154,7 @@ typedef struct SwsContext{
 | 
				
			|||||||
  vector signed short   CGV;
 | 
					  vector signed short   CGV;
 | 
				
			||||||
  vector signed short   OY;
 | 
					  vector signed short   OY;
 | 
				
			||||||
  vector unsigned short CSHIFT;
 | 
					  vector unsigned short CSHIFT;
 | 
				
			||||||
 | 
					  vector signed short *vYCoeffsBank, *vCCoeffsBank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c,
 | 
				
			|||||||
		       uint8_t *dest, int dstW, int dstY)
 | 
							       uint8_t *dest, int dstW, int dstY)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int i,j;
 | 
					  int i,j;
 | 
				
			||||||
  short tmp __attribute__((aligned (16)));
 | 
					 | 
				
			||||||
  int16_t *p;
 | 
					 | 
				
			||||||
  short *f;
 | 
					  short *f;
 | 
				
			||||||
  vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
 | 
					  vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
 | 
				
			||||||
  vector signed short R0,G0,B0,R1,G1,B1;
 | 
					  vector signed short R0,G0,B0,R1,G1,B1;
 | 
				
			||||||
@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c,
 | 
				
			|||||||
  vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
 | 
					  vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
 | 
				
			||||||
  unsigned long scratch[16] __attribute__ ((aligned (16)));
 | 
					  unsigned long scratch[16] __attribute__ ((aligned (16)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  vector signed short *vYCoeffsBank, *vCCoeffsBank;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  vector signed short *YCoeffs, *CCoeffs;
 | 
					  vector signed short *YCoeffs, *CCoeffs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
 | 
					  YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
 | 
				
			||||||
  vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
 | 
					  CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (i=0;i<lumFilterSize*c->dstH;i++) {
 | 
					 | 
				
			||||||
    tmp = c->vLumFilter[i];
 | 
					 | 
				
			||||||
    p = &vYCoeffsBank[i];
 | 
					 | 
				
			||||||
    for (j=0;j<8;j++)
 | 
					 | 
				
			||||||
      p[j] = tmp;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (i=0;i<chrFilterSize*c->dstH;i++) {
 | 
					 | 
				
			||||||
    tmp = c->vChrFilter[i];
 | 
					 | 
				
			||||||
    p = &vCCoeffsBank[i];
 | 
					 | 
				
			||||||
    for (j=0;j<8;j++)
 | 
					 | 
				
			||||||
      p[j] = tmp;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
 | 
					 | 
				
			||||||
  CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  out = (vector unsigned char *)dest;
 | 
					  out = (vector unsigned char *)dest;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c,
 | 
				
			|||||||
    memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
 | 
					    memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (vYCoeffsBank) free (vYCoeffsBank);
 | 
					 | 
				
			||||||
  if (vCCoeffsBank) free (vCCoeffsBank);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user