postproc: Replaced inline asm for prefetching with prefetch functions
Prefetching functions are defined in postprocess_template using the RENAME macro so that prefetching is used when available. For x86 targets inline asm is used and the functions are non-empty only for cpus where prefetching is available. For non x86 targets the gcc bultin prefetch is used if it is available, otherwise no prefetching is done. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		
							parent
							
								
									748d4816d9
								
							
						
					
					
						commit
						6264b6227c
					
				@ -168,37 +168,6 @@ static const char * const replaceTable[]=
 | 
			
		||||
    NULL //End Marker
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if ARCH_X86 && HAVE_INLINE_ASM
 | 
			
		||||
static inline void prefetchnta(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetchnta (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void prefetcht0(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void prefetcht1(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void prefetcht2(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The horizontal functions exist only in C because the MMX
 | 
			
		||||
 * code is faster with vertical filters and transposing. */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3242,6 +3242,69 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if ARCH_X86 && TEMPLATE_PP_MMXEXT
 | 
			
		||||
static inline void RENAME(prefetchnta)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetchnta (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void RENAME(prefetcht0)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void RENAME(prefetcht1)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void RENAME(prefetcht2)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
 | 
			
		||||
        : : "r" (p)
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
#elif !ARCH_X86 && AV_GCC_VERSION_AT_LEAST(3,2)
 | 
			
		||||
static inline void RENAME(prefetchnta)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __builtin_prefetch(p,0,0);
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht0)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __builtin_prefetch(p,0,1);
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht1)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __builtin_prefetch(p,0,2);
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht2)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    __builtin_prefetch(p,0,3);
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static inline void RENAME(prefetchnta)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht0)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht1)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
static inline void RENAME(prefetcht2)(const void *p)
 | 
			
		||||
{
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
/**
 | 
			
		||||
 * Filter array of bytes (Y or U or V values)
 | 
			
		||||
 */
 | 
			
		||||
@ -3368,34 +3431,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 | 
			
		||||
        // finish 1 block before the next otherwise we might have a problem
 | 
			
		||||
        // with the L1 Cache of the P4 ... or only a few blocks at a time or something
 | 
			
		||||
        for(x=0; x<width; x+=BLOCK_SIZE){
 | 
			
		||||
 | 
			
		||||
#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
 | 
			
		||||
/*
 | 
			
		||||
            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
 | 
			
		||||
            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
 | 
			
		||||
            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
 | 
			
		||||
            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
            __asm__(
 | 
			
		||||
                "mov %4, %%"REG_a"              \n\t"
 | 
			
		||||
                "shr $2, %%"REG_a"              \n\t"
 | 
			
		||||
                "and $6, %%"REG_a"              \n\t"
 | 
			
		||||
                "add %5, %%"REG_a"              \n\t"
 | 
			
		||||
                "mov %%"REG_a", %%"REG_d"       \n\t"
 | 
			
		||||
                "imul %1, %%"REG_a"             \n\t"
 | 
			
		||||
                "imul %3, %%"REG_d"             \n\t"
 | 
			
		||||
                "prefetchnta 32(%%"REG_a", %0)  \n\t"
 | 
			
		||||
                "prefetcht0 32(%%"REG_d", %2)   \n\t"
 | 
			
		||||
                "add %1, %%"REG_a"              \n\t"
 | 
			
		||||
                "add %3, %%"REG_d"              \n\t"
 | 
			
		||||
                "prefetchnta 32(%%"REG_a", %0)  \n\t"
 | 
			
		||||
                "prefetcht0 32(%%"REG_d", %2)   \n\t"
 | 
			
		||||
                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 | 
			
		||||
                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 | 
			
		||||
                : "%"REG_a, "%"REG_d
 | 
			
		||||
            );
 | 
			
		||||
#endif
 | 
			
		||||
            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
 | 
			
		||||
            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
 | 
			
		||||
            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
 | 
			
		||||
            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 | 
			
		||||
 | 
			
		||||
            RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
 | 
			
		||||
                              srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
 | 
			
		||||
@ -3474,33 +3513,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 | 
			
		||||
            uint8_t *dstBlockStart = dstBlock;
 | 
			
		||||
            const uint8_t *srcBlockStart = srcBlock;
 | 
			
		||||
          for(; x < endx; x+=BLOCK_SIZE){
 | 
			
		||||
#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
 | 
			
		||||
/*
 | 
			
		||||
            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
 | 
			
		||||
            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
 | 
			
		||||
            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
 | 
			
		||||
            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
            __asm__(
 | 
			
		||||
                "mov %4, %%"REG_a"              \n\t"
 | 
			
		||||
                "shr $2, %%"REG_a"              \n\t"
 | 
			
		||||
                "and $6, %%"REG_a"              \n\t"
 | 
			
		||||
                "add %5, %%"REG_a"              \n\t"
 | 
			
		||||
                "mov %%"REG_a", %%"REG_d"       \n\t"
 | 
			
		||||
                "imul %1, %%"REG_a"             \n\t"
 | 
			
		||||
                "imul %3, %%"REG_d"             \n\t"
 | 
			
		||||
                "prefetchnta 32(%%"REG_a", %0)  \n\t"
 | 
			
		||||
                "prefetcht0 32(%%"REG_d", %2)   \n\t"
 | 
			
		||||
                "add %1, %%"REG_a"              \n\t"
 | 
			
		||||
                "add %3, %%"REG_d"              \n\t"
 | 
			
		||||
                "prefetchnta 32(%%"REG_a", %0)  \n\t"
 | 
			
		||||
                "prefetcht0 32(%%"REG_d", %2)   \n\t"
 | 
			
		||||
                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 | 
			
		||||
                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 | 
			
		||||
                : "%"REG_a, "%"REG_d
 | 
			
		||||
            );
 | 
			
		||||
#endif
 | 
			
		||||
            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
 | 
			
		||||
            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
 | 
			
		||||
            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
 | 
			
		||||
            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 | 
			
		||||
 | 
			
		||||
            RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
 | 
			
		||||
                              srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user