2% faster horizontal mmx2 scaler Originally committed as revision 5453 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
		
			
				
	
	
		
			140 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			140 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
    Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
 | 
						|
 | 
						|
    This program is free software; you can redistribute it and/or modify
 | 
						|
    it under the terms of the GNU General Public License as published by
 | 
						|
    the Free Software Foundation; either version 2 of the License, or
 | 
						|
    (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU General Public License
 | 
						|
    along with this program; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
/* values for the flags, the stuff on the command line is different */
 | 
						|
#define SWS_FAST_BILINEAR 1
 | 
						|
#define SWS_BILINEAR 2
 | 
						|
#define SWS_BICUBIC  4
 | 
						|
#define SWS_X        8
 | 
						|
#define SWS_POINT    0x10
 | 
						|
#define SWS_AREA     0x20
 | 
						|
 | 
						|
//the following 4 flags are not completly implemented
 | 
						|
//internal chrominace subsamling info
 | 
						|
#define SWS_FULL_CHR_V		0x100
 | 
						|
#define SWS_FULL_CHR_H_INT	0x200
 | 
						|
//input subsampling info
 | 
						|
#define SWS_FULL_CHR_H_INP	0x400
 | 
						|
#define SWS_DIRECT_BGR		0x800
 | 
						|
 | 
						|
#define SWS_PRINT_INFO 0x1000
 | 
						|
 | 
						|
#define SWS_MAX_REDUCE_CUTOFF 0.002
 | 
						|
 | 
						|
/* this struct should be aligned on at least 32-byte boundary */
 | 
						|
typedef struct SwsContext{
 | 
						|
	int srcW, srcH, dstW, dstH;
 | 
						|
	int chrSrcW, chrSrcH, chrDstW, chrDstH;
 | 
						|
	int lumXInc, chrXInc;
 | 
						|
	int lumYInc, chrYInc;
 | 
						|
	int dstFormat, srcFormat;
 | 
						|
 | 
						|
	int16_t **lumPixBuf;
 | 
						|
	int16_t **chrPixBuf;
 | 
						|
	int16_t *hLumFilter;
 | 
						|
	int16_t *hLumFilterPos;
 | 
						|
	int16_t *hChrFilter;
 | 
						|
	int16_t *hChrFilterPos;
 | 
						|
	int16_t *vLumFilter;
 | 
						|
	int16_t *vLumFilterPos;
 | 
						|
	int16_t *vChrFilter;
 | 
						|
	int16_t *vChrFilterPos;
 | 
						|
 | 
						|
// Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
 | 
						|
	int16_t  *lumMmxFilter;
 | 
						|
	int16_t  *chrMmxFilter;
 | 
						|
	uint8_t formatConvBuffer[4000]; //FIXME dynamic alloc, but we have to change alot of code for this to be usefull
 | 
						|
 | 
						|
	int hLumFilterSize;
 | 
						|
	int hChrFilterSize;
 | 
						|
	int vLumFilterSize;
 | 
						|
	int vChrFilterSize;
 | 
						|
	int vLumBufSize;
 | 
						|
	int vChrBufSize;
 | 
						|
 | 
						|
	uint8_t __attribute__((aligned(32))) funnyYCode[10000];
 | 
						|
	uint8_t __attribute__((aligned(32))) funnyUVCode[10000];
 | 
						|
	int32_t *lumMmx2FilterPos;
 | 
						|
	int32_t *chrMmx2FilterPos;
 | 
						|
	int16_t *lumMmx2Filter;
 | 
						|
	int16_t *chrMmx2Filter;
 | 
						|
 | 
						|
	int canMMX2BeUsed;
 | 
						|
 | 
						|
	int lastInLumBuf;
 | 
						|
	int lastInChrBuf;
 | 
						|
	int lumBufIndex;
 | 
						|
	int chrBufIndex;
 | 
						|
	int dstY;
 | 
						|
	int flags;
 | 
						|
 | 
						|
	void (*swScale)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
 | 
						|
             int srcSliceH, uint8_t* dst[], int dstStride[]);
 | 
						|
} SwsContext;
 | 
						|
//FIXME check init (where 0)
 | 
						|
 | 
						|
// when used for filters they must have an odd number of elements
 | 
						|
// coeffs cannot be shared between vectors
 | 
						|
typedef struct {
 | 
						|
	double *coeff;
 | 
						|
	int length;
 | 
						|
} SwsVector;
 | 
						|
 | 
						|
// vectors can be shared
 | 
						|
typedef struct {
 | 
						|
	SwsVector *lumH;
 | 
						|
	SwsVector *lumV;
 | 
						|
	SwsVector *chrH;
 | 
						|
	SwsVector *chrV;
 | 
						|
} SwsFilter;
 | 
						|
 | 
						|
 | 
						|
// *** bilinear scaling and yuv->rgb & yuv->yuv conversion of yv12 slices:
 | 
						|
// *** Note: it's called multiple times while decoding a frame, first time y==0
 | 
						|
// dstbpp == 12 -> yv12 output
 | 
						|
// will use sws_flags
 | 
						|
void SwScale_YV12slice(unsigned char* src[],int srcStride[], int srcSliceY,
 | 
						|
			     int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
 | 
						|
			     int srcW, int srcH, int dstW, int dstH);
 | 
						|
 | 
						|
// Obsolete, will be removed soon
 | 
						|
void SwScale_Init();
 | 
						|
 | 
						|
 | 
						|
 | 
						|
void freeSwsContext(SwsContext *swsContext);
 | 
						|
 | 
						|
SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat);
 | 
						|
SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
 | 
						|
			 SwsFilter *srcFilter, SwsFilter *dstFilter);
 | 
						|
 | 
						|
SwsVector *getGaussianVec(double variance, double quality);
 | 
						|
SwsVector *getConstVec(double c, int length);
 | 
						|
SwsVector *getIdentityVec(void);
 | 
						|
void scaleVec(SwsVector *a, double scalar);
 | 
						|
void normalizeVec(SwsVector *a, double height);
 | 
						|
void convVec(SwsVector *a, SwsVector *b);
 | 
						|
void addVec(SwsVector *a, SwsVector *b);
 | 
						|
void subVec(SwsVector *a, SwsVector *b);
 | 
						|
void shiftVec(SwsVector *a, int shift);
 | 
						|
SwsVector *cloneVec(SwsVector *a);
 | 
						|
 | 
						|
void printVec(SwsVector *a);
 | 
						|
void freeVec(SwsVector *a);
 | 
						|
 |