Use updated motion compensation routines.
Originally committed as revision 713 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		
							parent
							
								
									3530320dbf
								
							
						
					
					
						commit
						dde3f77dbc
					
				| @ -105,37 +105,23 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, | |||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| /* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
 | static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | ||||||
|    Since the immediate result could be greater than 255, we do the |  | ||||||
|    shift first. The result is too low by one if the bytes were both |  | ||||||
|    odd, so we need to add (l1 & l2) & BYTE_VEC(0x01).  */ |  | ||||||
| static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2) |  | ||||||
| { | { | ||||||
|     UINT64 correction = (l1 & l2) & BYTE_VEC(0x01); |     return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | ||||||
|     l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; |  | ||||||
|     l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; |  | ||||||
|     return l1 + l2 + correction; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
 | static inline uint64_t avg2(uint64_t a, uint64_t b) | ||||||
|    The '1' only has an effect when one byte is even and the other odd, |  | ||||||
|    i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01). |  | ||||||
|    Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01).  */ |  | ||||||
| static inline UINT64 avg2(UINT64 l1, UINT64 l2) |  | ||||||
| { | { | ||||||
|     UINT64 correction = (l1 | l2) & BYTE_VEC(0x01); |     return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);     | ||||||
|     l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; |  | ||||||
|     l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; |  | ||||||
|     return l1 + l2 + correction; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) | static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | ||||||
| { | { | ||||||
|     UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |     uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l2 & ~BYTE_VEC(0x03)) >> 2) | 		+ ((l2 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l3 & ~BYTE_VEC(0x03)) >> 2) | 		+ ((l3 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l4 & ~BYTE_VEC(0x03)) >> 2); | 		+ ((l4 & ~BYTE_VEC(0x03)) >> 2); | ||||||
|     UINT64 r2 = ((  (l1 & BYTE_VEC(0x03)) |     uint64_t r2 = ((  (l1 & BYTE_VEC(0x03)) | ||||||
| 		    + (l2 & BYTE_VEC(0x03)) | 		    + (l2 & BYTE_VEC(0x03)) | ||||||
| 		    + (l3 & BYTE_VEC(0x03)) | 		    + (l3 & BYTE_VEC(0x03)) | ||||||
| 		    + (l4 & BYTE_VEC(0x03)) | 		    + (l4 & BYTE_VEC(0x03)) | ||||||
| @ -143,13 +129,14 @@ static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) | |||||||
|     return r1 + r2; |     return r1 + r2; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) | static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2, | ||||||
|  | 				   uint64_t l3, uint64_t l4) | ||||||
| { | { | ||||||
|     UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |     uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l2 & ~BYTE_VEC(0x03)) >> 2) | 		+ ((l2 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l3 & ~BYTE_VEC(0x03)) >> 2) | 		+ ((l3 & ~BYTE_VEC(0x03)) >> 2) | ||||||
| 		+ ((l4 & ~BYTE_VEC(0x03)) >> 2); | 		+ ((l4 & ~BYTE_VEC(0x03)) >> 2); | ||||||
|     UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) |     uint64_t r2 = ((  (l1 & BYTE_VEC(0x03)) | ||||||
| 		    + (l2 & BYTE_VEC(0x03)) | 		    + (l2 & BYTE_VEC(0x03)) | ||||||
| 		    + (l3 & BYTE_VEC(0x03)) | 		    + (l3 & BYTE_VEC(0x03)) | ||||||
| 		    + (l4 & BYTE_VEC(0x03)) | 		    + (l4 & BYTE_VEC(0x03)) | ||||||
| @ -157,80 +144,98 @@ static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) | |||||||
|     return r1 + r2; |     return r1 + r2; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define PIXOPNAME(suffix) put ## suffix | #define OP(LOAD, STORE, INCR)			\ | ||||||
| #define BTYPE UINT8 |     do {					\ | ||||||
|  | 	STORE(LOAD(pixels), block);		\ | ||||||
|  | 	pixels += line_size;			\ | ||||||
|  | 	block += INCR;				\ | ||||||
|  |     } while (--h) | ||||||
|  | 
 | ||||||
|  | #define OP_X2(LOAD, STORE, INCR)				\ | ||||||
|  |     do {							\ | ||||||
|  | 	uint64_t pix1, pix2;					\ | ||||||
|  | 								\ | ||||||
|  | 	pix1 = LOAD(pixels);					\ | ||||||
|  | 	pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);	\ | ||||||
|  | 	STORE(AVG2(pix1, pix2), block);				\ | ||||||
|  | 	pixels += line_size;					\ | ||||||
|  | 	block += INCR;						\ | ||||||
|  |     } while (--h) | ||||||
|  | 
 | ||||||
|  | #define OP_Y2(LOAD, STORE, INCR)		\ | ||||||
|  |     do {					\ | ||||||
|  | 	uint64_t pix = LOAD(pixels);		\ | ||||||
|  | 	do {					\ | ||||||
|  | 	    uint64_t next_pix;			\ | ||||||
|  | 						\ | ||||||
|  | 	    pixels += line_size;		\ | ||||||
|  | 	    next_pix = LOAD(pixels);		\ | ||||||
|  | 	    STORE(AVG2(pix, next_pix), block);	\ | ||||||
|  | 	    block += INCR;			\ | ||||||
|  | 	    pix = next_pix;			\ | ||||||
|  | 	} while (--h);				\ | ||||||
|  |     } while (0) | ||||||
|  | 
 | ||||||
|  | #define OP_XY2(LOAD, STORE, INCR)					\ | ||||||
|  |     do {								\ | ||||||
|  | 	uint64_t pix1 = LOAD(pixels);					\ | ||||||
|  | 	uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);	\ | ||||||
|  | 									\ | ||||||
|  | 	do {								\ | ||||||
|  | 	    uint64_t next_pix1, next_pix2;				\ | ||||||
|  | 									\ | ||||||
|  | 	    pixels += line_size;					\ | ||||||
|  | 	    next_pix1 = LOAD(pixels);					\ | ||||||
|  | 	    next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56);	\ | ||||||
|  | 									\ | ||||||
|  | 	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);	\ | ||||||
|  | 									\ | ||||||
|  | 	    block += INCR;						\ | ||||||
|  | 	    pix1 = next_pix1;						\ | ||||||
|  | 	    pix2 = next_pix2;						\ | ||||||
|  | 	} while (--h);							\ | ||||||
|  |     } while (0) | ||||||
|  | 
 | ||||||
|  | #define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR)		\ | ||||||
|  | static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block,		\ | ||||||
|  | 					      const uint8_t *pixels,	\ | ||||||
|  | 					      int line_size, int h)	\ | ||||||
|  | {									\ | ||||||
|  |     if ((size_t) pixels & 0x7) {					\ | ||||||
|  | 	OPKIND(uldq, STORE, INCR);					\ | ||||||
|  |     } else {								\ | ||||||
|  | 	OPKIND(ldq, STORE, INCR);					\ | ||||||
|  |     }									\ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #define PIXOP(BTYPE, OPNAME, STORE, INCR)		\ | ||||||
|  |     MAKE_OP(BTYPE, OPNAME, ,	 OP,	 STORE, INCR);	\ | ||||||
|  |     MAKE_OP(BTYPE, OPNAME, _x2,	 OP_X2,	 STORE, INCR);	\ | ||||||
|  |     MAKE_OP(BTYPE, OPNAME, _y2,	 OP_Y2,	 STORE, INCR);	\ | ||||||
|  |     MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); | ||||||
|  | 
 | ||||||
|  | /* Rounding primitives.  */ | ||||||
| #define AVG2 avg2 | #define AVG2 avg2 | ||||||
| #define AVG4 avg4 | #define AVG4 avg4 | ||||||
| #define STORE(l, b) stq(l, b) | #define STORE(l, b) stq(l, b) | ||||||
| #include "pixops.h" | PIXOP(uint8_t, put, STORE, line_size); | ||||||
| #undef PIXOPNAME | 
 | ||||||
| #undef BTYPE | #undef STORE | ||||||
|  | #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | ||||||
|  | PIXOP(uint8_t, avg, STORE, line_size); | ||||||
|  | 
 | ||||||
|  | /* Not rounding primitives.  */ | ||||||
| #undef AVG2 | #undef AVG2 | ||||||
| #undef AVG4 | #undef AVG4 | ||||||
| #undef STORE | #undef STORE | ||||||
| 
 |  | ||||||
| #define PIXOPNAME(suffix) put_no_rnd ## suffix |  | ||||||
| #define BTYPE UINT8 |  | ||||||
| #define AVG2 avg2_no_rnd | #define AVG2 avg2_no_rnd | ||||||
| #define AVG4 avg4_no_rnd | #define AVG4 avg4_no_rnd | ||||||
| #define STORE(l, b) stq(l, b) | #define STORE(l, b) stq(l, b) | ||||||
| #include "pixops.h" | PIXOP(uint8_t, put_no_rnd, STORE, line_size); | ||||||
| #undef PIXOPNAME | 
 | ||||||
| #undef BTYPE |  | ||||||
| #undef AVG2 |  | ||||||
| #undef AVG4 |  | ||||||
| #undef STORE | #undef STORE | ||||||
| 
 |  | ||||||
| /* The following functions are untested.  */ |  | ||||||
| #if 0 |  | ||||||
| 
 |  | ||||||
| #define PIXOPNAME(suffix) avg ## suffix |  | ||||||
| #define BTYPE UINT8 |  | ||||||
| #define AVG2 avg2 |  | ||||||
| #define AVG4 avg4 |  | ||||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | ||||||
| #include "pixops.h" | PIXOP(uint8_t, avg_no_rnd, STORE, line_size); | ||||||
| #undef PIXOPNAME |  | ||||||
| #undef BTYPE |  | ||||||
| #undef AVG2 |  | ||||||
| #undef AVG4 |  | ||||||
| #undef STORE |  | ||||||
| 
 |  | ||||||
| #define PIXOPNAME(suffix) avg_no_rnd ## suffix |  | ||||||
| #define BTYPE UINT8 |  | ||||||
| #define AVG2 avg2_no_rnd |  | ||||||
| #define AVG4 avg4_no_rnd |  | ||||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); |  | ||||||
| #include "pixops.h" |  | ||||||
| #undef PIXOPNAME |  | ||||||
| #undef BTYPE |  | ||||||
| #undef AVG2 |  | ||||||
| #undef AVG4 |  | ||||||
| #undef STORE |  | ||||||
| 
 |  | ||||||
| #define PIXOPNAME(suffix) sub ## suffix |  | ||||||
| #define BTYPE DCTELEM |  | ||||||
| #define AVG2 avg2 |  | ||||||
| #define AVG4 avg4 |  | ||||||
| #define STORE(l, block) do {		\ |  | ||||||
|     UINT64 xxx = l;			\ |  | ||||||
|     (block)[0] -= (xxx >>  0) & 0xff;	\ |  | ||||||
|     (block)[1] -= (xxx >>  8) & 0xff;	\ |  | ||||||
|     (block)[2] -= (xxx >> 16) & 0xff;	\ |  | ||||||
|     (block)[3] -= (xxx >> 24) & 0xff;	\ |  | ||||||
|     (block)[4] -= (xxx >> 32) & 0xff;	\ |  | ||||||
|     (block)[5] -= (xxx >> 40) & 0xff;	\ |  | ||||||
|     (block)[6] -= (xxx >> 48) & 0xff;	\ |  | ||||||
|     (block)[7] -= (xxx >> 56) & 0xff;	\ |  | ||||||
| } while (0) |  | ||||||
| #include "pixops.h" |  | ||||||
| #undef PIXOPNAME |  | ||||||
| #undef BTYPE |  | ||||||
| #undef AVG2 |  | ||||||
| #undef AVG4 |  | ||||||
| #undef STORE |  | ||||||
| 
 |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| void dsputil_init_alpha(void) | void dsputil_init_alpha(void) | ||||||
| { | { | ||||||
| @ -244,6 +249,16 @@ void dsputil_init_alpha(void) | |||||||
|     put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; |     put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; | ||||||
|     put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; |     put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; | ||||||
| 
 | 
 | ||||||
|  |     avg_pixels_tab[0] = avg_pixels_axp; | ||||||
|  |     avg_pixels_tab[1] = avg_pixels_x2_axp; | ||||||
|  |     avg_pixels_tab[2] = avg_pixels_y2_axp; | ||||||
|  |     avg_pixels_tab[3] = avg_pixels_xy2_axp; | ||||||
|  | 
 | ||||||
|  |     avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; | ||||||
|  |     avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; | ||||||
|  |     avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; | ||||||
|  |     avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; | ||||||
|  | 
 | ||||||
|     /* amask clears all bits that correspond to present features.  */ |     /* amask clears all bits that correspond to present features.  */ | ||||||
|     if (amask(AMASK_MVI) == 0) { |     if (amask(AMASK_MVI) == 0) { | ||||||
|         put_pixels_clamped = put_pixels_clamped_mvi_asm; |         put_pixels_clamped = put_pixels_clamped_mvi_asm; | ||||||
|  | |||||||
| @ -1,135 +0,0 @@ | |||||||
| /*
 |  | ||||||
|  * Alpha optimized DSP utils |  | ||||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |  | ||||||
|  * |  | ||||||
|  * This library is free software; you can redistribute it and/or |  | ||||||
|  * modify it under the terms of the GNU Lesser General Public |  | ||||||
|  * License as published by the Free Software Foundation; either |  | ||||||
|  * version 2 of the License, or (at your option) any later version. |  | ||||||
|  * |  | ||||||
|  * This library is distributed in the hope that it will be useful, |  | ||||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU |  | ||||||
|  * Lesser General Public License for more details. |  | ||||||
|  * |  | ||||||
|  * You should have received a copy of the GNU Lesser General Public |  | ||||||
|  * License along with this library; if not, write to the Free Software |  | ||||||
|  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| /* This file is intended to be #included with proper definitions of
 |  | ||||||
|  * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE.  */ |  | ||||||
| 
 |  | ||||||
| static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels, |  | ||||||
| 				   int line_size, int h) |  | ||||||
| { |  | ||||||
|     if ((size_t) pixels & 0x7) { |  | ||||||
| 	do { |  | ||||||
| 	    STORE(uldq(pixels), block); |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    block  += line_size; |  | ||||||
| 	} while (--h); |  | ||||||
|     } else { |  | ||||||
| 	do { |  | ||||||
| 	    STORE(ldq(pixels), block); |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    block  += line_size; |  | ||||||
| 	} while (--h); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels, |  | ||||||
| 				      int line_size, int h) |  | ||||||
| { |  | ||||||
|     if ((size_t) pixels & 0x7) { |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 pix1, pix2; |  | ||||||
| 
 |  | ||||||
| 	    pix1 = uldq(pixels); |  | ||||||
| 	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 	    STORE(AVG2(pix1, pix2), block); |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    block += line_size; |  | ||||||
| 	} while (--h); |  | ||||||
|     } else { |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 pix1, pix2; |  | ||||||
| 
 |  | ||||||
| 	    pix1 = ldq(pixels); |  | ||||||
| 	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 	    STORE(AVG2(pix1, pix2), block); |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    block += line_size; |  | ||||||
| 	} while (--h); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels, |  | ||||||
| 				      int line_size, int h) |  | ||||||
| { |  | ||||||
|     if ((size_t) pixels & 0x7) { |  | ||||||
| 	UINT64 pix = uldq(pixels); |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 next_pix; |  | ||||||
| 
 |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    next_pix = uldq(pixels); |  | ||||||
| 	    STORE(AVG2(pix, next_pix), block); |  | ||||||
| 	    block += line_size; |  | ||||||
| 	    pix = next_pix; |  | ||||||
| 	} while (--h); |  | ||||||
|     } else { |  | ||||||
| 	UINT64 pix = ldq(pixels); |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 next_pix; |  | ||||||
| 
 |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    next_pix = ldq(pixels); |  | ||||||
| 	    STORE(AVG2(pix, next_pix), block); |  | ||||||
| 	    block += line_size; |  | ||||||
| 	    pix = next_pix; |  | ||||||
| 	} while (--h); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* This could be further sped up by recycling AVG4 intermediate
 |  | ||||||
|   results from the previous loop pass.  */ |  | ||||||
| static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels, |  | ||||||
| 				       int line_size, int h) |  | ||||||
| { |  | ||||||
|     if ((size_t) pixels & 0x7) { |  | ||||||
| 	UINT64 pix1 = uldq(pixels); |  | ||||||
| 	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 
 |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 next_pix1, next_pix2; |  | ||||||
| 
 |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    next_pix1 = uldq(pixels); |  | ||||||
| 	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 
 |  | ||||||
| 	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); |  | ||||||
| 
 |  | ||||||
| 	    block += line_size; |  | ||||||
| 	    pix1 = next_pix1; |  | ||||||
| 	    pix2 = next_pix2; |  | ||||||
| 	} while (--h); |  | ||||||
|     } else { |  | ||||||
| 	UINT64 pix1 = ldq(pixels); |  | ||||||
| 	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 
 |  | ||||||
| 	do { |  | ||||||
| 	    UINT64 next_pix1, next_pix2; |  | ||||||
| 
 |  | ||||||
| 	    pixels += line_size; |  | ||||||
| 	    next_pix1 = ldq(pixels); |  | ||||||
| 	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); |  | ||||||
| 
 |  | ||||||
| 	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); |  | ||||||
| 
 |  | ||||||
| 	    block += line_size; |  | ||||||
| 	    pix1 = next_pix1; |  | ||||||
| 	    pix2 = next_pix2; |  | ||||||
| 	} while (--h); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user