rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2 functions.
Many functions have such a prefix, but do not actually use any instructions or features from that set, thus giving the false impression that swscale is highly optimized for a particular system, whereas in reality it is not.
This commit is contained in:
		
							parent
							
								
									836b82e3c9
								
							
						
					
					
						commit
						522d65ba25
					
				| @ -111,7 +111,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL; | |||||||
| #undef COMPILE_TEMPLATE_SSE2 | #undef COMPILE_TEMPLATE_SSE2 | ||||||
| #undef COMPILE_TEMPLATE_AMD3DNOW | #undef COMPILE_TEMPLATE_AMD3DNOW | ||||||
| #define COMPILE_TEMPLATE_MMX2 0 | #define COMPILE_TEMPLATE_MMX2 0 | ||||||
| #define COMPILE_TEMPLATE_SSE2 1 | #define COMPILE_TEMPLATE_SSE2 0 | ||||||
| #define COMPILE_TEMPLATE_AMD3DNOW 1 | #define COMPILE_TEMPLATE_AMD3DNOW 1 | ||||||
| #define RENAME(a) a ## _3DNOW | #define RENAME(a) a ## _3DNOW | ||||||
| #include "rgb2rgb_template.c" | #include "rgb2rgb_template.c" | ||||||
|  | |||||||
| @ -30,15 +30,8 @@ | |||||||
| #undef MOVNTQ | #undef MOVNTQ | ||||||
| #undef EMMS | #undef EMMS | ||||||
| #undef SFENCE | #undef SFENCE | ||||||
| #undef MMREG_SIZE |  | ||||||
| #undef PAVGB | #undef PAVGB | ||||||
| 
 | 
 | ||||||
| #if COMPILE_TEMPLATE_SSE2 |  | ||||||
| #define MMREG_SIZE 16 |  | ||||||
| #else |  | ||||||
| #define MMREG_SIZE 8 |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| #if COMPILE_TEMPLATE_AMD3DNOW | #if COMPILE_TEMPLATE_AMD3DNOW | ||||||
| #define PREFETCH  "prefetch" | #define PREFETCH  "prefetch" | ||||||
| #define PAVGB     "pavgusb" | #define PAVGB     "pavgusb" | ||||||
| @ -64,6 +57,10 @@ | |||||||
| #define SFENCE " # nop" | #define SFENCE " # nop" | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_SSE2 | ||||||
|  | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
|  | 
 | ||||||
| static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | ||||||
| { | { | ||||||
|     uint8_t *dest = dst; |     uint8_t *dest = dst; | ||||||
| @ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||||||
|                      SFENCE"     \n\t" |                      SFENCE"     \n\t" | ||||||
|                      :::"memory"); |                      :::"memory"); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
|  | #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) | static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) | ||||||
| { | { | ||||||
|     long x,y; |     long x,y; | ||||||
| @ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||||
|     dst+= dstStride; |     dst+= dstStride; | ||||||
| 
 | 
 | ||||||
|     for (y=1; y<srcHeight; y++) { |     for (y=1; y<srcHeight; y++) { | ||||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW |  | ||||||
|         const x86_reg mmxSize= srcWidth&~15; |         const x86_reg mmxSize= srcWidth&~15; | ||||||
|         __asm__ volatile( |         __asm__ volatile( | ||||||
|             "mov           %4, %%"REG_a"            \n\t" |             "mov           %4, %%"REG_a"            \n\t" | ||||||
| @ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||||
|             "punpckhbw              %%mm3, %%mm7    \n\t" |             "punpckhbw              %%mm3, %%mm7    \n\t" | ||||||
|             "punpcklbw              %%mm2, %%mm4    \n\t" |             "punpcklbw              %%mm2, %%mm4    \n\t" | ||||||
|             "punpckhbw              %%mm2, %%mm6    \n\t" |             "punpckhbw              %%mm2, %%mm6    \n\t" | ||||||
| #if 1 |  | ||||||
|             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t" |             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t" | ||||||
|             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t" |             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t" | ||||||
|             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t" |             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t" | ||||||
|             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t" |             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t" | ||||||
| #else |  | ||||||
|             "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t" |  | ||||||
|             "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t" |  | ||||||
|             "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t" |  | ||||||
|             "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t" |  | ||||||
| #endif |  | ||||||
|             "add                       $8, %%"REG_a"            \n\t" |             "add                       $8, %%"REG_a"            \n\t" | ||||||
|             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t" |             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t" | ||||||
|             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t" |             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t" | ||||||
| @ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||||
|                "g" (-mmxSize) |                "g" (-mmxSize) | ||||||
|             : "%"REG_a |             : "%"REG_a | ||||||
|         ); |         ); | ||||||
| #else |  | ||||||
|         const x86_reg mmxSize=1; |  | ||||||
| 
 |  | ||||||
|         dst[0        ]= (3*src[0] +   src[srcStride])>>2; |  | ||||||
|         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2; |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
|         for (x=mmxSize-1; x<srcWidth-1; x++) { |         for (x=mmxSize-1; x<srcWidth-1; x++) { | ||||||
|             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2; |             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2; | ||||||
| @ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // last line
 |     // last line
 | ||||||
| #if 1 |  | ||||||
|     dst[0]= src[0]; |     dst[0]= src[0]; | ||||||
| 
 | 
 | ||||||
|     for (x=0; x<srcWidth-1; x++) { |     for (x=0; x<srcWidth-1; x++) { | ||||||
| @ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||||
|         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2; |         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2; | ||||||
|     } |     } | ||||||
|     dst[2*srcWidth-1]= src[srcWidth-1]; |     dst[2*srcWidth-1]= src[srcWidth-1]; | ||||||
| #else |  | ||||||
|     for (x=0; x<srcWidth; x++) { |  | ||||||
|         dst[2*x+0]= |  | ||||||
|         dst[2*x+1]= src[x]; |  | ||||||
|     } |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
|     __asm__ volatile(EMMS"       \n\t" |     __asm__ volatile(EMMS"       \n\t" | ||||||
|                      SFENCE"     \n\t" |                      SFENCE"     \n\t" | ||||||
|                      :::"memory"); |                      :::"memory"); | ||||||
| } | } | ||||||
|  | #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| /**
 | /**
 | ||||||
|  * Height should be a multiple of 2 and width should be a multiple of 16. |  * Height should be a multiple of 2 and width should be a multiple of 16. | ||||||
|  * (If this is a problem for anyone then tell me, and I will fix it.) |  * (If this is a problem for anyone then tell me, and I will fix it.) | ||||||
| @ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||||||
|                      SFENCE"     \n\t" |                      SFENCE"     \n\t" | ||||||
|                      :::"memory"); |                      :::"memory"); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * Height should be a multiple of 2 and width should be a multiple of 2. |  * Height should be a multiple of 2 and width should be a multiple of 2. | ||||||
| @ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||||
| 
 | 
 | ||||||
|      rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); |      rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_SSE2 */ | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, | static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, | ||||||
|                                     long width, long height, long src1Stride, |                                     long width, long height, long src1Stride, | ||||||
|                                     long src2Stride, long dstStride) |                                     long src2Stride, long dstStride) | ||||||
| @ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui | |||||||
|             ::: "memory" |             ::: "memory" | ||||||
|             ); |             ); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_SSE2 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | ||||||
|                                        uint8_t *dst1, uint8_t *dst2, |                                        uint8_t *dst1, uint8_t *dst2, | ||||||
|                                        long width, long height, |                                        long width, long height, | ||||||
| @ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 | |||||||
|             ::: "memory" |             ::: "memory" | ||||||
|         ); |         ); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
| static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) | static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) | ||||||
| { | { | ||||||
| @ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | ||||||
| { | { | ||||||
|     dst0+=   count; |     dst0+=   count; | ||||||
| @ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds | |||||||
|         count++; |         count++; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
| static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) | static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) | ||||||
| { | { | ||||||
| @ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | ||||||
| { | { | ||||||
|     dst0+=   count; |     dst0+=   count; | ||||||
| @ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst | |||||||
|         count++; |         count++; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
| static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) | static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) | ||||||
| { | { | ||||||
| @ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co | |||||||
|         ); |         ); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | ||||||
|                                  long width, long height, |                                  long width, long height, | ||||||
|                                  long lumStride, long chromStride, long srcStride) |                                  long lumStride, long chromStride, long srcStride) | ||||||
| @ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co | |||||||
|             ::: "memory" |             ::: "memory" | ||||||
|         ); |         ); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| 
 | 
 | ||||||
| static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | ||||||
|                                  long width, long height, |                                  long width, long height, | ||||||
| @ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co | |||||||
|         ); |         ); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
| static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | ||||||
|                                  long width, long height, |                                  long width, long height, | ||||||
|                                  long lumStride, long chromStride, long srcStride) |                                  long lumStride, long chromStride, long srcStride) | ||||||
| @ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co | |||||||
|             ::: "memory" |             ::: "memory" | ||||||
|         ); |         ); | ||||||
| } | } | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
|  | #endif /* !COMPILE_TEMPLATE_SSE2 */ | ||||||
| 
 | 
 | ||||||
| static inline void RENAME(rgb2rgb_init)(void) | static inline void RENAME(rgb2rgb_init)(void) | ||||||
| { | { | ||||||
|  | #if !COMPILE_TEMPLATE_SSE2 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
|     rgb15to16          = RENAME(rgb15to16); |     rgb15to16          = RENAME(rgb15to16); | ||||||
|     rgb15tobgr24       = RENAME(rgb15tobgr24); |     rgb15tobgr24       = RENAME(rgb15tobgr24); | ||||||
|     rgb15to32          = RENAME(rgb15to32); |     rgb15to32          = RENAME(rgb15to32); | ||||||
| @ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void) | |||||||
|     yuv422ptoyuy2      = RENAME(yuv422ptoyuy2); |     yuv422ptoyuy2      = RENAME(yuv422ptoyuy2); | ||||||
|     yuv422ptouyvy      = RENAME(yuv422ptouyvy); |     yuv422ptouyvy      = RENAME(yuv422ptouyvy); | ||||||
|     yuy2toyv12         = RENAME(yuy2toyv12); |     yuy2toyv12         = RENAME(yuy2toyv12); | ||||||
|     planar2x           = RENAME(planar2x); |  | ||||||
|     rgb24toyv12        = RENAME(rgb24toyv12); |  | ||||||
|     interleaveBytes    = RENAME(interleaveBytes); |  | ||||||
|     vu9_to_vu12        = RENAME(vu9_to_vu12); |     vu9_to_vu12        = RENAME(vu9_to_vu12); | ||||||
|     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2); |     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2); | ||||||
| 
 |  | ||||||
|     uyvytoyuv420       = RENAME(uyvytoyuv420); |  | ||||||
|     uyvytoyuv422       = RENAME(uyvytoyuv422); |     uyvytoyuv422       = RENAME(uyvytoyuv422); | ||||||
|     yuyvtoyuv420       = RENAME(yuyvtoyuv420); |  | ||||||
|     yuyvtoyuv422       = RENAME(yuyvtoyuv422); |     yuyvtoyuv422       = RENAME(yuyvtoyuv422); | ||||||
|  | #endif /* !COMPILE_TEMPLATE_SSE2 */ | ||||||
|  | 
 | ||||||
|  | #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||||
|  |     planar2x           = RENAME(planar2x); | ||||||
|  | #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
|  |     rgb24toyv12        = RENAME(rgb24toyv12); | ||||||
|  | 
 | ||||||
|  |     yuyvtoyuv420       = RENAME(yuyvtoyuv420); | ||||||
|  |     uyvytoyuv420       = RENAME(uyvytoyuv420); | ||||||
|  | #endif /* COMPILE_TEMPLATE_SSE2 */ | ||||||
|  | 
 | ||||||
|  | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||||
|  |     interleaveBytes    = RENAME(interleaveBytes); | ||||||
|  | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user