Remove all Alpha architecture optimizations
Alpha has been end-of-lifed and no more test machines are available.
This commit is contained in:
		
							parent
							
								
									16381923fb
								
							
						
					
					
						commit
						c7f7bfc9e3
					
				| @ -1,9 +0,0 @@ | ||||
| OBJS += alpha/dsputil_alpha.o                                           \
 | ||||
|         alpha/dsputil_alpha_asm.o                                       \
 | ||||
|         alpha/motion_est_alpha.o                                        \
 | ||||
|         alpha/motion_est_mvi_asm.o                                      \
 | ||||
|         alpha/simple_idct_alpha.o                                       \
 | ||||
| 
 | ||||
| OBJS-$(CONFIG_HPELDSP)                  += alpha/hpeldsp_alpha.o        \
 | ||||
|                                            alpha/hpeldsp_alpha_asm.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO)                += alpha/mpegvideo_alpha.o | ||||
| @ -1,186 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #ifndef AVCODEC_ALPHA_ASM_H | ||||
| #define AVCODEC_ALPHA_ASM_H | ||||
| 
 | ||||
| #include <inttypes.h> | ||||
| 
 | ||||
| #include "libavutil/common.h" | ||||
| 
 | ||||
| #if AV_GCC_VERSION_AT_LEAST(2,96) | ||||
| # define likely(x)      __builtin_expect((x) != 0, 1) | ||||
| # define unlikely(x)    __builtin_expect((x) != 0, 0) | ||||
| #else | ||||
| # define likely(x)      (x) | ||||
| # define unlikely(x)    (x) | ||||
| #endif | ||||
| 
 | ||||
| #define AMASK_BWX (1 << 0) | ||||
| #define AMASK_FIX (1 << 1) | ||||
| #define AMASK_CIX (1 << 2) | ||||
| #define AMASK_MVI (1 << 8) | ||||
| 
 | ||||
| static inline uint64_t BYTE_VEC(uint64_t x) | ||||
| { | ||||
|     x |= x <<  8; | ||||
|     x |= x << 16; | ||||
|     x |= x << 32; | ||||
|     return x; | ||||
| } | ||||
| static inline uint64_t WORD_VEC(uint64_t x) | ||||
| { | ||||
|     x |= x << 16; | ||||
|     x |= x << 32; | ||||
|     return x; | ||||
| } | ||||
| 
 | ||||
| #define sextw(x) ((int16_t) (x)) | ||||
| 
 | ||||
| #ifdef __GNUC__ | ||||
| #define ldq(p)                                                  \ | ||||
|     (((const union {                                            \ | ||||
|         uint64_t __l;                                           \ | ||||
|         __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];  \ | ||||
|     } *) (p))->__l) | ||||
| #define ldl(p)                                                  \ | ||||
|     (((const union {                                            \ | ||||
|         int32_t __l;                                            \ | ||||
|         __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];   \ | ||||
|     } *) (p))->__l) | ||||
| #define stq(l, p)                                                       \ | ||||
|     do {                                                                \ | ||||
|         (((union {                                                      \ | ||||
|             uint64_t __l;                                               \ | ||||
|             __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];      \ | ||||
|         } *) (p))->__l) = l;                                            \ | ||||
|     } while (0) | ||||
| #define stl(l, p)                                                       \ | ||||
|     do {                                                                \ | ||||
|         (((union {                                                      \ | ||||
|             int32_t __l;                                                \ | ||||
|             __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];       \ | ||||
|         } *) (p))->__l) = l;                                            \ | ||||
|     } while (0) | ||||
| struct unaligned_long { uint64_t l; } __attribute__((packed)); | ||||
| #define ldq_u(p)        (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | ||||
| #define uldq(a)         (((const struct unaligned_long *) (a))->l) | ||||
| 
 | ||||
| #if AV_GCC_VERSION_AT_LEAST(3,3) | ||||
| #define prefetch(p)     __builtin_prefetch((p), 0, 1) | ||||
| #define prefetch_en(p)  __builtin_prefetch((p), 0, 0) | ||||
| #define prefetch_m(p)   __builtin_prefetch((p), 1, 1) | ||||
| #define prefetch_men(p) __builtin_prefetch((p), 1, 0) | ||||
| #define cmpbge          __builtin_alpha_cmpbge | ||||
| /* Avoid warnings.  */ | ||||
| #define extql(a, b)     __builtin_alpha_extql(a, (uint64_t) (b)) | ||||
| #define extwl(a, b)     __builtin_alpha_extwl(a, (uint64_t) (b)) | ||||
| #define extqh(a, b)     __builtin_alpha_extqh(a, (uint64_t) (b)) | ||||
| #define zap             __builtin_alpha_zap | ||||
| #define zapnot          __builtin_alpha_zapnot | ||||
| #define amask           __builtin_alpha_amask | ||||
| #define implver         __builtin_alpha_implver | ||||
| #define rpcc            __builtin_alpha_rpcc | ||||
| #else | ||||
| #define prefetch(p)     __asm__ volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_en(p)  __asm__ volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_m(p)   __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extql(a, b)  ({ uint64_t __r; __asm__ ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extwl(a, b)  ({ uint64_t __r; __asm__ ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extqh(a, b)  ({ uint64_t __r; __asm__ ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zap(a, b)    ({ uint64_t __r; __asm__ ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define amask(a)     ({ uint64_t __r; __asm__ ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; }) | ||||
| #define implver()    ({ uint64_t __r; __asm__ ("implver %0"         : "=r" (__r));                       __r; }) | ||||
| #define rpcc()       ({ uint64_t __r; __asm__ volatile ("rpcc %0"   : "=r" (__r));                       __r; }) | ||||
| #endif | ||||
| #define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory") | ||||
| 
 | ||||
| #if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__) | ||||
| #define minub8  __builtin_alpha_minub8 | ||||
| #define minsb8  __builtin_alpha_minsb8 | ||||
| #define minuw4  __builtin_alpha_minuw4 | ||||
| #define minsw4  __builtin_alpha_minsw4 | ||||
| #define maxub8  __builtin_alpha_maxub8 | ||||
| #define maxsb8  __builtin_alpha_maxsb8 | ||||
| #define maxuw4  __builtin_alpha_maxuw4 | ||||
| #define maxsw4  __builtin_alpha_maxsw4 | ||||
| #define perr    __builtin_alpha_perr | ||||
| #define pklb    __builtin_alpha_pklb | ||||
| #define pkwb    __builtin_alpha_pkwb | ||||
| #define unpkbl  __builtin_alpha_unpkbl | ||||
| #define unpkbw  __builtin_alpha_unpkbw | ||||
| #else | ||||
| #define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define perr(a, b)   ({ uint64_t __r; __asm__ (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | ||||
| #define pklb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define pkwb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbl(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbw(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #endif | ||||
| 
 | ||||
| #elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */ | ||||
| 
 | ||||
| #include <c_asm.h> | ||||
| #define ldq(p) (*(const uint64_t *) (p)) | ||||
| #define ldl(p) (*(const int32_t *)  (p)) | ||||
| #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | ||||
| #define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0) | ||||
| #define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a) | ||||
| #define uldq(a)      (*(const __unaligned uint64_t *) (a)) | ||||
| #define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b) | ||||
| #define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b) | ||||
| #define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b) | ||||
| #define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b) | ||||
| #define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b) | ||||
| #define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b) | ||||
| #define amask(a)     asm ("amask   %a0,%v0", a) | ||||
| #define implver()    asm ("implver %v0") | ||||
| #define rpcc()       asm ("rpcc           %v0") | ||||
| #define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b) | ||||
| #define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b) | ||||
| #define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b) | ||||
| #define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b) | ||||
| #define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b) | ||||
| #define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b) | ||||
| #define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b) | ||||
| #define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b) | ||||
| #define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b) | ||||
| #define pklb(a)      asm ("pklb    %a0,%v0", a) | ||||
| #define pkwb(a)      asm ("pkwb    %a0,%v0", a) | ||||
| #define unpkbl(a)    asm ("unpkbl  %a0,%v0", a) | ||||
| #define unpkbw(a)    asm ("unpkbw  %a0,%v0", a) | ||||
| #define wh64(a)      asm ("wh64    %a0", a) | ||||
| 
 | ||||
| #else | ||||
| #error "Unknown compiler!" | ||||
| #endif | ||||
| 
 | ||||
| #endif /* AVCODEC_ALPHA_ASM_H */ | ||||
| @ -1,157 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavcodec/dsputil.h" | ||||
| #include "dsputil_alpha.h" | ||||
| #include "asm.h" | ||||
| 
 | ||||
| void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, | ||||
|                                  int line_size); | ||||
| void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, | ||||
|                                  int line_size); | ||||
| 
 | ||||
| #if 0 | ||||
| /* These functions were the base for the optimized assembler routines,
 | ||||
|    and remain here for documentation purposes.  */ | ||||
| static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | ||||
|                                    ptrdiff_t line_size) | ||||
| { | ||||
|     int i = 8; | ||||
|     uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ | ||||
| 
 | ||||
|     do { | ||||
|         uint64_t shorts0, shorts1; | ||||
| 
 | ||||
|         shorts0 = ldq(block); | ||||
|         shorts0 = maxsw4(shorts0, 0); | ||||
|         shorts0 = minsw4(shorts0, clampmask); | ||||
|         stl(pkwb(shorts0), pixels); | ||||
| 
 | ||||
|         shorts1 = ldq(block + 4); | ||||
|         shorts1 = maxsw4(shorts1, 0); | ||||
|         shorts1 = minsw4(shorts1, clampmask); | ||||
|         stl(pkwb(shorts1), pixels + 4); | ||||
| 
 | ||||
|         pixels += line_size; | ||||
|         block += 8; | ||||
|     } while (--i); | ||||
| } | ||||
| 
 | ||||
| void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | ||||
|                             ptrdiff_t line_size) | ||||
| { | ||||
|     int h = 8; | ||||
|     /* Keep this function a leaf function by generating the constants
 | ||||
|        manually (mainly for the hack value ;-).  */ | ||||
|     uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ | ||||
|     uint64_t signmask  = zap(-1, 0x33); | ||||
|     signmask ^= signmask >> 1;  /* 0x8000800080008000 */ | ||||
| 
 | ||||
|     do { | ||||
|         uint64_t shorts0, pix0, signs0; | ||||
|         uint64_t shorts1, pix1, signs1; | ||||
| 
 | ||||
|         shorts0 = ldq(block); | ||||
|         shorts1 = ldq(block + 4); | ||||
| 
 | ||||
|         pix0    = unpkbw(ldl(pixels)); | ||||
|         /* Signed subword add (MMX paddw).  */ | ||||
|         signs0  = shorts0 & signmask; | ||||
|         shorts0 &= ~signmask; | ||||
|         shorts0 += pix0; | ||||
|         shorts0 ^= signs0; | ||||
|         /* Clamp. */ | ||||
|         shorts0 = maxsw4(shorts0, 0); | ||||
|         shorts0 = minsw4(shorts0, clampmask); | ||||
| 
 | ||||
|         /* Next 4.  */ | ||||
|         pix1    = unpkbw(ldl(pixels + 4)); | ||||
|         signs1  = shorts1 & signmask; | ||||
|         shorts1 &= ~signmask; | ||||
|         shorts1 += pix1; | ||||
|         shorts1 ^= signs1; | ||||
|         shorts1 = maxsw4(shorts1, 0); | ||||
|         shorts1 = minsw4(shorts1, clampmask); | ||||
| 
 | ||||
|         stl(pkwb(shorts0), pixels); | ||||
|         stl(pkwb(shorts1), pixels + 4); | ||||
| 
 | ||||
|         pixels += line_size; | ||||
|         block += 8; | ||||
|     } while (--h); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| static void clear_blocks_axp(int16_t *blocks) { | ||||
|     uint64_t *p = (uint64_t *) blocks; | ||||
|     int n = sizeof(int16_t) * 6 * 64; | ||||
| 
 | ||||
|     do { | ||||
|         p[0] = 0; | ||||
|         p[1] = 0; | ||||
|         p[2] = 0; | ||||
|         p[3] = 0; | ||||
|         p[4] = 0; | ||||
|         p[5] = 0; | ||||
|         p[6] = 0; | ||||
|         p[7] = 0; | ||||
|         p += 8; | ||||
|         n -= 8 * 8; | ||||
|     } while (n); | ||||
| } | ||||
| 
 | ||||
| av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) | ||||
| { | ||||
|     const int high_bit_depth = avctx->bits_per_raw_sample > 8; | ||||
| 
 | ||||
|     if (!high_bit_depth) { | ||||
|     c->clear_blocks = clear_blocks_axp; | ||||
|     } | ||||
| 
 | ||||
|     /* amask clears all bits that correspond to present features.  */ | ||||
|     if (amask(AMASK_MVI) == 0) { | ||||
|         c->put_pixels_clamped = put_pixels_clamped_mvi_asm; | ||||
|         c->add_pixels_clamped = add_pixels_clamped_mvi_asm; | ||||
| 
 | ||||
|         if (!high_bit_depth) | ||||
|             c->get_pixels   = get_pixels_mvi; | ||||
|         c->diff_pixels      = diff_pixels_mvi; | ||||
|         c->sad[0]           = pix_abs16x16_mvi_asm; | ||||
|         c->sad[1]           = pix_abs8x8_mvi; | ||||
|         c->pix_abs[0][0]    = pix_abs16x16_mvi_asm; | ||||
|         c->pix_abs[1][0]    = pix_abs8x8_mvi; | ||||
|         c->pix_abs[0][1]    = pix_abs16x16_x2_mvi; | ||||
|         c->pix_abs[0][2]    = pix_abs16x16_y2_mvi; | ||||
|         c->pix_abs[0][3]    = pix_abs16x16_xy2_mvi; | ||||
|     } | ||||
| 
 | ||||
|     put_pixels_clamped_axp_p = c->put_pixels_clamped; | ||||
|     add_pixels_clamped_axp_p = c->add_pixels_clamped; | ||||
| 
 | ||||
|     if (avctx->bits_per_raw_sample <= 8 && | ||||
|         (avctx->idct_algo == FF_IDCT_AUTO || | ||||
|          avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) { | ||||
|         c->idct_put = ff_simple_idct_put_axp; | ||||
|         c->idct_add = ff_simple_idct_add_axp; | ||||
|         c->idct =     ff_simple_idct_axp; | ||||
|     } | ||||
| } | ||||
| @ -1,49 +0,0 @@ | ||||
| /*
 | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #ifndef AVCODEC_ALPHA_DSPUTIL_ALPHA_H | ||||
| #define AVCODEC_ALPHA_DSPUTIL_ALPHA_H | ||||
| 
 | ||||
| #include <stddef.h> | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| void ff_simple_idct_axp(int16_t *block); | ||||
| void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); | ||||
| void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | ||||
| 
 | ||||
| void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
|                                 int line_size); | ||||
| void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
|                                 int line_size); | ||||
| extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, | ||||
|                                         int line_size); | ||||
| extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, | ||||
|                                         int line_size); | ||||
| 
 | ||||
| void get_pixels_mvi(int16_t *restrict block, | ||||
|                     const uint8_t *restrict pixels, int line_size); | ||||
| void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, | ||||
|                      int stride); | ||||
| int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | ||||
| int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | ||||
| int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | ||||
| int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | ||||
| int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | ||||
| 
 | ||||
| 
 | ||||
| #endif /* AVCODEC_ALPHA_DSPUTIL_ALPHA_H */ | ||||
| @ -1,167 +0,0 @@ | ||||
| /* | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
 | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or
 | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either
 | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software
 | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| /* | ||||
|  * These functions are scheduled for pca56. They should work | ||||
|  * reasonably on ev6, though. | ||||
|  */ | ||||
| 
 | ||||
| #include "regdef.h" | ||||
| 
 | ||||
|         .set noat
 | ||||
|         .set noreorder
 | ||||
|         .arch pca56
 | ||||
|         .text | ||||
| 
 | ||||
| /************************************************************************ | ||||
|  * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
|  *                                 int line_size) | ||||
|  */ | ||||
|         .align 6
 | ||||
|         .globl put_pixels_clamped_mvi_asm
 | ||||
|         .ent put_pixels_clamped_mvi_asm
 | ||||
| put_pixels_clamped_mvi_asm: | ||||
|         .frame sp, 0, ra | ||||
|         .prologue 0
 | ||||
| 
 | ||||
|         lda     t8, -1 | ||||
|         lda     t9, 8           # loop counter | ||||
|         zap     t8, 0xaa, t8    # 00ff00ff00ff00ff | ||||
| 
 | ||||
|         .align 4
 | ||||
| 1:      ldq     t0,  0(a0) | ||||
|         ldq     t1,  8(a0) | ||||
|         ldq     t2, 16(a0) | ||||
|         ldq     t3, 24(a0) | ||||
| 
 | ||||
|         maxsw4  t0, zero, t0 | ||||
|         subq    t9, 2, t9 | ||||
|         maxsw4  t1, zero, t1 | ||||
|         lda     a0, 32(a0) | ||||
| 
 | ||||
|         maxsw4  t2, zero, t2 | ||||
|         addq    a1, a2, ta | ||||
|         maxsw4  t3, zero, t3 | ||||
|         minsw4  t0, t8, t0 | ||||
| 
 | ||||
|         minsw4  t1, t8, t1 | ||||
|         minsw4  t2, t8, t2 | ||||
|         minsw4  t3, t8, t3 | ||||
|         pkwb    t0, t0 | ||||
| 
 | ||||
|         pkwb    t1, t1 | ||||
|         pkwb    t2, t2 | ||||
|         pkwb    t3, t3 | ||||
|         stl     t0, 0(a1) | ||||
| 
 | ||||
|         stl     t1, 4(a1) | ||||
|         addq    ta, a2, a1 | ||||
|         stl     t2, 0(ta) | ||||
|         stl     t3, 4(ta) | ||||
| 
 | ||||
|         bne     t9, 1b | ||||
|         ret | ||||
|         .end put_pixels_clamped_mvi_asm
 | ||||
| 
 | ||||
| /************************************************************************ | ||||
|  * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
|  *                                 int line_size) | ||||
|  */ | ||||
|         .align 6
 | ||||
|         .globl add_pixels_clamped_mvi_asm
 | ||||
|         .ent add_pixels_clamped_mvi_asm
 | ||||
| add_pixels_clamped_mvi_asm: | ||||
|         .frame sp, 0, ra | ||||
|         .prologue 0
 | ||||
| 
 | ||||
|         lda     t1, -1 | ||||
|         lda     th, 8 | ||||
|         zap     t1, 0x33, tg | ||||
|         nop | ||||
| 
 | ||||
|         srl     tg, 1, t0 | ||||
|         xor     tg, t0, tg      # 0x8000800080008000 | ||||
|         zap     t1, 0xaa, tf    # 0x00ff00ff00ff00ff | ||||
| 
 | ||||
|         .align 4
 | ||||
| 1:      ldl     t1, 0(a1)       # pix0 (try to hit cache line soon) | ||||
|         ldl     t4, 4(a1)       # pix1 | ||||
|         addq    a1, a2, te      # pixels += line_size | ||||
|         ldq     t0, 0(a0)       # shorts0 | ||||
| 
 | ||||
|         ldl     t7, 0(te)       # pix2 (try to hit cache line soon) | ||||
|         ldl     ta, 4(te)       # pix3 | ||||
|         ldq     t3, 8(a0)       # shorts1 | ||||
|         ldq     t6, 16(a0)      # shorts2 | ||||
| 
 | ||||
|         ldq     t9, 24(a0)      # shorts3 | ||||
|         unpkbw  t1, t1          # 0 0 (quarter/op no.) | ||||
|         and     t0, tg, t2      # 0 1 | ||||
|         unpkbw  t4, t4          # 1 0 | ||||
| 
 | ||||
|         bic     t0, tg, t0      # 0 2 | ||||
|         unpkbw  t7, t7          # 2 0 | ||||
|         and     t3, tg, t5      # 1 1 | ||||
|         addq    t0, t1, t0      # 0 3 | ||||
| 
 | ||||
|         xor     t0, t2, t0      # 0 4 | ||||
|         unpkbw  ta, ta          # 3 0 | ||||
|         and     t6, tg, t8      # 2 1 | ||||
|         maxsw4  t0, zero, t0    # 0 5 | ||||
| 
 | ||||
|         bic     t3, tg, t3      # 1 2 | ||||
|         bic     t6, tg, t6      # 2 2 | ||||
|         minsw4  t0, tf, t0      # 0 6 | ||||
|         addq    t3, t4, t3      # 1 3 | ||||
| 
 | ||||
|         pkwb    t0, t0          # 0 7 | ||||
|         xor     t3, t5, t3      # 1 4 | ||||
|         maxsw4  t3, zero, t3    # 1 5 | ||||
|         addq    t6, t7, t6      # 2 3 | ||||
| 
 | ||||
|         xor     t6, t8, t6      # 2 4 | ||||
|         and     t9, tg, tb      # 3 1 | ||||
|         minsw4  t3, tf, t3      # 1 6 | ||||
|         bic     t9, tg, t9      # 3 2 | ||||
| 
 | ||||
|         maxsw4  t6, zero, t6    # 2 5 | ||||
|         addq    t9, ta, t9      # 3 3 | ||||
|         stl     t0, 0(a1)       # 0 8 | ||||
|         minsw4  t6, tf, t6      # 2 6 | ||||
| 
 | ||||
|         xor     t9, tb, t9      # 3 4 | ||||
|         maxsw4  t9, zero, t9    # 3 5 | ||||
|         lda     a0, 32(a0)      # block += 16;
 | ||||
|         pkwb    t3, t3          # 1 7 | ||||
| 
 | ||||
|         minsw4  t9, tf, t9      # 3 6 | ||||
|         subq    th, 2, th | ||||
|         pkwb    t6, t6          # 2 7 | ||||
|         pkwb    t9, t9          # 3 7 | ||||
| 
 | ||||
|         stl     t3, 4(a1)       # 1 8 | ||||
|         addq    te, a2, a1      # pixels += line_size | ||||
|         stl     t6, 0(te)       # 2 8 | ||||
|         stl     t9, 4(te)       # 3 8 | ||||
| 
 | ||||
|         bne     th, 1b | ||||
|         ret | ||||
|         .end add_pixels_clamped_mvi_asm
 | ||||
| @ -1,213 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavcodec/hpeldsp.h" | ||||
| #include "hpeldsp_alpha.h" | ||||
| #include "asm.h" | ||||
| 
 | ||||
| static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | ||||
| { | ||||
|     return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | ||||
| } | ||||
| 
 | ||||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | ||||
| { | ||||
|     return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | ||||
| } | ||||
| 
 | ||||
| #if 0 | ||||
| /* The XY2 routines basically utilize this scheme, but reuse parts in
 | ||||
|    each iteration.  */ | ||||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | ||||
| { | ||||
|     uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | ||||
|     uint64_t r2 = ((  (l1 & BYTE_VEC(0x03)) | ||||
|                     + (l2 & BYTE_VEC(0x03)) | ||||
|                     + (l3 & BYTE_VEC(0x03)) | ||||
|                     + (l4 & BYTE_VEC(0x03)) | ||||
|                     + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | ||||
|     return r1 + r2; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #define OP(LOAD, STORE)                         \ | ||||
|     do {                                        \ | ||||
|         STORE(LOAD(pixels), block);             \ | ||||
|         pixels += line_size;                    \ | ||||
|         block += line_size;                     \ | ||||
|     } while (--h) | ||||
| 
 | ||||
| #define OP_X2(LOAD, STORE)                                      \ | ||||
|     do {                                                        \ | ||||
|         uint64_t pix1, pix2;                                    \ | ||||
|                                                                 \ | ||||
|         pix1 = LOAD(pixels);                                    \ | ||||
|         pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);        \ | ||||
|         STORE(AVG2(pix1, pix2), block);                         \ | ||||
|         pixels += line_size;                                    \ | ||||
|         block += line_size;                                     \ | ||||
|     } while (--h) | ||||
| 
 | ||||
| #define OP_Y2(LOAD, STORE)                      \ | ||||
|     do {                                        \ | ||||
|         uint64_t pix = LOAD(pixels);            \ | ||||
|         do {                                    \ | ||||
|             uint64_t next_pix;                  \ | ||||
|                                                 \ | ||||
|             pixels += line_size;                \ | ||||
|             next_pix = LOAD(pixels);            \ | ||||
|             STORE(AVG2(pix, next_pix), block);  \ | ||||
|             block += line_size;                 \ | ||||
|             pix = next_pix;                     \ | ||||
|         } while (--h);                          \ | ||||
|     } while (0) | ||||
| 
 | ||||
| #define OP_XY2(LOAD, STORE)                                                 \ | ||||
|     do {                                                                    \ | ||||
|         uint64_t pix1 = LOAD(pixels);                                       \ | ||||
|         uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);           \ | ||||
|         uint64_t pix_l = (pix1 & BYTE_VEC(0x03))                            \ | ||||
|                        + (pix2 & BYTE_VEC(0x03));                           \ | ||||
|         uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2)                    \ | ||||
|                        + ((pix2 & ~BYTE_VEC(0x03)) >> 2);                   \ | ||||
|                                                                             \ | ||||
|         do {                                                                \ | ||||
|             uint64_t npix1, npix2;                                          \ | ||||
|             uint64_t npix_l, npix_h;                                        \ | ||||
|             uint64_t avg;                                                   \ | ||||
|                                                                             \ | ||||
|             pixels += line_size;                                            \ | ||||
|             npix1 = LOAD(pixels);                                           \ | ||||
|             npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56);              \ | ||||
|             npix_l = (npix1 & BYTE_VEC(0x03))                               \ | ||||
|                    + (npix2 & BYTE_VEC(0x03));                              \ | ||||
|             npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2)                       \ | ||||
|                    + ((npix2 & ~BYTE_VEC(0x03)) >> 2);                      \ | ||||
|             avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | ||||
|                 + pix_h + npix_h;                                           \ | ||||
|             STORE(avg, block);                                              \ | ||||
|                                                                             \ | ||||
|             block += line_size;                                             \ | ||||
|             pix_l = npix_l;                                                 \ | ||||
|             pix_h = npix_h;                                                 \ | ||||
|         } while (--h);                                                      \ | ||||
|     } while (0) | ||||
| 
 | ||||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE)                                \ | ||||
| static void OPNAME ## _pixels ## SUFF ## _axp                               \ | ||||
|         (uint8_t *restrict block, const uint8_t *restrict pixels,           \ | ||||
|          ptrdiff_t line_size, int h)                                        \ | ||||
| {                                                                           \ | ||||
|     if ((size_t) pixels & 0x7) {                                            \ | ||||
|         OPKIND(uldq, STORE);                                                \ | ||||
|     } else {                                                                \ | ||||
|         OPKIND(ldq, STORE);                                                 \ | ||||
|     }                                                                       \ | ||||
| }                                                                           \ | ||||
|                                                                             \ | ||||
| static void OPNAME ## _pixels16 ## SUFF ## _axp                             \ | ||||
|         (uint8_t *restrict block, const uint8_t *restrict pixels,           \ | ||||
|          ptrdiff_t line_size, int h)                                        \ | ||||
| {                                                                           \ | ||||
|     OPNAME ## _pixels ## SUFF ## _axp(block,     pixels,     line_size, h); \ | ||||
|     OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | ||||
| } | ||||
| 
 | ||||
| #define PIXOP(OPNAME, STORE)                    \ | ||||
|     MAKE_OP(OPNAME, ,     OP,     STORE)        \ | ||||
|     MAKE_OP(OPNAME, _x2,  OP_X2,  STORE)        \ | ||||
|     MAKE_OP(OPNAME, _y2,  OP_Y2,  STORE)        \ | ||||
|     MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) | ||||
| 
 | ||||
| /* Rounding primitives.  */ | ||||
| #define AVG2 avg2 | ||||
| #define AVG4 avg4 | ||||
| #define AVG4_ROUNDER BYTE_VEC(0x02) | ||||
| #define STORE(l, b) stq(l, b) | ||||
| PIXOP(put, STORE); | ||||
| 
 | ||||
| #undef STORE | ||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | ||||
| PIXOP(avg, STORE); | ||||
| 
 | ||||
| /* Not rounding primitives.  */ | ||||
| #undef AVG2 | ||||
| #undef AVG4 | ||||
| #undef AVG4_ROUNDER | ||||
| #undef STORE | ||||
| #define AVG2 avg2_no_rnd | ||||
| #define AVG4 avg4_no_rnd | ||||
| #define AVG4_ROUNDER BYTE_VEC(0x01) | ||||
| #define STORE(l, b) stq(l, b) | ||||
| PIXOP(put_no_rnd, STORE); | ||||
| 
 | ||||
| #undef STORE | ||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | ||||
| PIXOP(avg_no_rnd, STORE); | ||||
| 
 | ||||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | ||||
|                                  ptrdiff_t line_size, int h) | ||||
| { | ||||
|     put_pixels_axp_asm(block,     pixels,     line_size, h); | ||||
|     put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | ||||
| } | ||||
| 
 | ||||
| av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags) | ||||
| { | ||||
|     c->put_pixels_tab[0][0] = put_pixels16_axp_asm; | ||||
|     c->put_pixels_tab[0][1] = put_pixels16_x2_axp; | ||||
|     c->put_pixels_tab[0][2] = put_pixels16_y2_axp; | ||||
|     c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; | ||||
| 
 | ||||
|     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; | ||||
|     c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; | ||||
|     c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; | ||||
|     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; | ||||
| 
 | ||||
|     c->avg_pixels_tab[0][0] = avg_pixels16_axp; | ||||
|     c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; | ||||
|     c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; | ||||
|     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; | ||||
| 
 | ||||
|     c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; | ||||
|     c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; | ||||
|     c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; | ||||
|     c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; | ||||
| 
 | ||||
|     c->put_pixels_tab[1][0] = put_pixels_axp_asm; | ||||
|     c->put_pixels_tab[1][1] = put_pixels_x2_axp; | ||||
|     c->put_pixels_tab[1][2] = put_pixels_y2_axp; | ||||
|     c->put_pixels_tab[1][3] = put_pixels_xy2_axp; | ||||
| 
 | ||||
|     c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; | ||||
|     c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; | ||||
|     c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; | ||||
|     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; | ||||
| 
 | ||||
|     c->avg_pixels_tab[1][0] = avg_pixels_axp; | ||||
|     c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; | ||||
|     c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; | ||||
|     c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; | ||||
| } | ||||
| @ -1,28 +0,0 @@ | ||||
| /*
 | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H | ||||
| #define AVCODEC_ALPHA_HPELDSP_ALPHA_H | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <stddef.h> | ||||
| 
 | ||||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | ||||
|                         ptrdiff_t line_size, int h); | ||||
| 
 | ||||
| #endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */ | ||||
| @ -1,124 +0,0 @@ | ||||
| /* | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
 | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or
 | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either
 | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software
 | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| /* | ||||
|  * These functions are scheduled for pca56. They should work | ||||
|  * reasonably on ev6, though. | ||||
|  */ | ||||
| 
 | ||||
| #include "regdef.h" | ||||
| 
 | ||||
|         .set noat
 | ||||
|         .set noreorder
 | ||||
|         .arch pca56
 | ||||
|         .text | ||||
| 
 | ||||
| /************************************************************************ | ||||
|  * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | ||||
|  *                         int line_size, int h) | ||||
|  */ | ||||
|         .align 6
 | ||||
|         .globl put_pixels_axp_asm
 | ||||
|         .ent put_pixels_axp_asm
 | ||||
| put_pixels_axp_asm: | ||||
|         .frame sp, 0, ra | ||||
|         .prologue 0
 | ||||
| 
 | ||||
|         and     a1, 7, t0 | ||||
|         beq     t0, $aligned | ||||
| 
 | ||||
|         .align 4
 | ||||
| $unaligned: | ||||
|         ldq_u   t0, 0(a1) | ||||
|         ldq_u   t1, 8(a1) | ||||
|         addq    a1, a2, a1 | ||||
|         nop | ||||
| 
 | ||||
|         ldq_u   t2, 0(a1) | ||||
|         ldq_u   t3, 8(a1) | ||||
|         addq    a1, a2, a1 | ||||
|         nop | ||||
| 
 | ||||
|         ldq_u   t4, 0(a1) | ||||
|         ldq_u   t5, 8(a1) | ||||
|         addq    a1, a2, a1 | ||||
|         nop | ||||
| 
 | ||||
|         ldq_u   t6, 0(a1) | ||||
|         ldq_u   t7, 8(a1) | ||||
|         extql   t0, a1, t0 | ||||
|         addq    a1, a2, a1 | ||||
| 
 | ||||
|         extqh   t1, a1, t1 | ||||
|         addq    a0, a2, t8 | ||||
|         extql   t2, a1, t2 | ||||
|         addq    t8, a2, t9 | ||||
| 
 | ||||
|         extqh   t3, a1, t3 | ||||
|         addq    t9, a2, ta | ||||
|         extql   t4, a1, t4 | ||||
|         or      t0, t1, t0 | ||||
| 
 | ||||
|         extqh   t5, a1, t5 | ||||
|         or      t2, t3, t2 | ||||
|         extql   t6, a1, t6 | ||||
|         or      t4, t5, t4 | ||||
| 
 | ||||
|         extqh   t7, a1, t7 | ||||
|         or      t6, t7, t6 | ||||
|         stq     t0, 0(a0) | ||||
|         stq     t2, 0(t8) | ||||
| 
 | ||||
|         stq     t4, 0(t9) | ||||
|         subq    a3, 4, a3 | ||||
|         stq     t6, 0(ta) | ||||
|         addq    ta, a2, a0 | ||||
| 
 | ||||
|         bne     a3, $unaligned | ||||
|         ret | ||||
| 
 | ||||
|         .align 4
 | ||||
| $aligned: | ||||
|         ldq     t0, 0(a1) | ||||
|         addq    a1, a2, a1 | ||||
|         ldq     t1, 0(a1) | ||||
|         addq    a1, a2, a1 | ||||
| 
 | ||||
|         ldq     t2, 0(a1) | ||||
|         addq    a1, a2, a1 | ||||
|         ldq     t3, 0(a1) | ||||
| 
 | ||||
|         addq    a0, a2, t4 | ||||
|         addq    a1, a2, a1 | ||||
|         addq    t4, a2, t5 | ||||
|         subq    a3, 4, a3 | ||||
| 
 | ||||
|         stq     t0, 0(a0) | ||||
|         addq    t5, a2, t6 | ||||
|         stq     t1, 0(t4) | ||||
|         addq    t6, a2, a0 | ||||
| 
 | ||||
|         stq     t2, 0(t5) | ||||
|         stq     t3, 0(t6) | ||||
| 
 | ||||
|         bne     a3, $aligned | ||||
|         ret | ||||
|         .end put_pixels_axp_asm
 | ||||
| @ -1,345 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "dsputil_alpha.h" | ||||
| #include "asm.h" | ||||
| 
 | ||||
| void get_pixels_mvi(int16_t *restrict block, | ||||
|                     const uint8_t *restrict pixels, int line_size) | ||||
| { | ||||
|     int h = 8; | ||||
| 
 | ||||
|     do { | ||||
|         uint64_t p; | ||||
| 
 | ||||
|         p = ldq(pixels); | ||||
|         stq(unpkbw(p),       block); | ||||
|         stq(unpkbw(p >> 32), block + 4); | ||||
| 
 | ||||
|         pixels += line_size; | ||||
|         block += 8; | ||||
|     } while (--h); | ||||
| } | ||||
| 
 | ||||
| void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, | ||||
|                      int stride) { | ||||
|     int h = 8; | ||||
|     uint64_t mask = 0x4040; | ||||
| 
 | ||||
|     mask |= mask << 16; | ||||
|     mask |= mask << 32; | ||||
|     do { | ||||
|         uint64_t x, y, c, d, a; | ||||
|         uint64_t signs; | ||||
| 
 | ||||
|         x = ldq(s1); | ||||
|         y = ldq(s2); | ||||
|         c = cmpbge(x, y); | ||||
|         d = x - y; | ||||
|         a = zap(mask, c);       /* We use 0x4040404040404040 here...  */ | ||||
|         d += 4 * a;             /* ...so we can use s4addq here.      */ | ||||
|         signs = zap(-1, c); | ||||
| 
 | ||||
|         stq(unpkbw(d)       | (unpkbw(signs)       << 8), block); | ||||
|         stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); | ||||
| 
 | ||||
|         s1 += stride; | ||||
|         s2 += stride; | ||||
|         block += 8; | ||||
|     } while (--h); | ||||
| } | ||||
| 
 | ||||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | ||||
| { | ||||
|     return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | ||||
| } | ||||
| 
 | ||||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | ||||
| { | ||||
|     uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l2 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | ||||
|                 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | ||||
|     uint64_t r2 = ((  (l1 & BYTE_VEC(0x03)) | ||||
|                     + (l2 & BYTE_VEC(0x03)) | ||||
|                     + (l3 & BYTE_VEC(0x03)) | ||||
|                     + (l4 & BYTE_VEC(0x03)) | ||||
|                     + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | ||||
|     return r1 + r2; | ||||
| } | ||||
| 
 | ||||
| int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||||
| { | ||||
|     int result = 0; | ||||
| 
 | ||||
|     if ((size_t) pix2 & 0x7) { | ||||
|         /* works only when pix2 is actually unaligned */ | ||||
|         do {                    /* do 8 pixel a time */ | ||||
|             uint64_t p1, p2; | ||||
| 
 | ||||
|             p1  = ldq(pix1); | ||||
|             p2  = uldq(pix2); | ||||
|             result += perr(p1, p2); | ||||
| 
 | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
|         } while (--h); | ||||
|     } else { | ||||
|         do { | ||||
|             uint64_t p1, p2; | ||||
| 
 | ||||
|             p1 = ldq(pix1); | ||||
|             p2 = ldq(pix2); | ||||
|             result += perr(p1, p2); | ||||
| 
 | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
|         } while (--h); | ||||
|     } | ||||
| 
 | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| #if 0                           /* now done in assembly */
 | ||||
| int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) | ||||
| { | ||||
|     int result = 0; | ||||
|     int h = 16; | ||||
| 
 | ||||
|     if ((size_t) pix2 & 0x7) { | ||||
|         /* works only when pix2 is actually unaligned */ | ||||
|         do {                    /* do 16 pixel a time */ | ||||
|             uint64_t p1_l, p1_r, p2_l, p2_r; | ||||
|             uint64_t t; | ||||
| 
 | ||||
|             p1_l  = ldq(pix1); | ||||
|             p1_r  = ldq(pix1 + 8); | ||||
|             t     = ldq_u(pix2 + 8); | ||||
|             p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2); | ||||
|             p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
| 
 | ||||
|             result += perr(p1_l, p2_l) | ||||
|                     + perr(p1_r, p2_r); | ||||
|         } while (--h); | ||||
|     } else { | ||||
|         do { | ||||
|             uint64_t p1_l, p1_r, p2_l, p2_r; | ||||
| 
 | ||||
|             p1_l = ldq(pix1); | ||||
|             p1_r = ldq(pix1 + 8); | ||||
|             p2_l = ldq(pix2); | ||||
|             p2_r = ldq(pix2 + 8); | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
| 
 | ||||
|             result += perr(p1_l, p2_l) | ||||
|                     + perr(p1_r, p2_r); | ||||
|         } while (--h); | ||||
|     } | ||||
| 
 | ||||
|     return result; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||||
| { | ||||
|     int result = 0; | ||||
|     uint64_t disalign = (size_t) pix2 & 0x7; | ||||
| 
 | ||||
|     switch (disalign) { | ||||
|     case 0: | ||||
|         do { | ||||
|             uint64_t p1_l, p1_r, p2_l, p2_r; | ||||
|             uint64_t l, r; | ||||
| 
 | ||||
|             p1_l = ldq(pix1); | ||||
|             p1_r = ldq(pix1 + 8); | ||||
|             l    = ldq(pix2); | ||||
|             r    = ldq(pix2 + 8); | ||||
|             p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56)); | ||||
|             p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56)); | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
| 
 | ||||
|             result += perr(p1_l, p2_l) | ||||
|                     + perr(p1_r, p2_r); | ||||
|         } while (--h); | ||||
|         break; | ||||
|     case 7: | ||||
|         /* |.......l|lllllllr|rrrrrrr*|
 | ||||
|            This case is special because disalign1 would be 8, which | ||||
|            gets treated as 0 by extqh.  At least it is a bit faster | ||||
|            that way :)  */ | ||||
|         do { | ||||
|             uint64_t p1_l, p1_r, p2_l, p2_r; | ||||
|             uint64_t l, m, r; | ||||
| 
 | ||||
|             p1_l = ldq(pix1); | ||||
|             p1_r = ldq(pix1 + 8); | ||||
|             l     = ldq_u(pix2); | ||||
|             m     = ldq_u(pix2 + 8); | ||||
|             r     = ldq_u(pix2 + 16); | ||||
|             p2_l  = avg2(extql(l, disalign) | extqh(m, disalign), m); | ||||
|             p2_r  = avg2(extql(m, disalign) | extqh(r, disalign), r); | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
| 
 | ||||
|             result += perr(p1_l, p2_l) | ||||
|                     + perr(p1_r, p2_r); | ||||
|         } while (--h); | ||||
|         break; | ||||
|     default: | ||||
|         do { | ||||
|             uint64_t disalign1 = disalign + 1; | ||||
|             uint64_t p1_l, p1_r, p2_l, p2_r; | ||||
|             uint64_t l, m, r; | ||||
| 
 | ||||
|             p1_l  = ldq(pix1); | ||||
|             p1_r  = ldq(pix1 + 8); | ||||
|             l     = ldq_u(pix2); | ||||
|             m     = ldq_u(pix2 + 8); | ||||
|             r     = ldq_u(pix2 + 16); | ||||
|             p2_l  = avg2(extql(l, disalign) | extqh(m, disalign), | ||||
|                          extql(l, disalign1) | extqh(m, disalign1)); | ||||
|             p2_r  = avg2(extql(m, disalign) | extqh(r, disalign), | ||||
|                          extql(m, disalign1) | extqh(r, disalign1)); | ||||
|             pix1 += line_size; | ||||
|             pix2 += line_size; | ||||
| 
 | ||||
|             result += perr(p1_l, p2_l) | ||||
|                     + perr(p1_r, p2_r); | ||||
|         } while (--h); | ||||
|         break; | ||||
|     } | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||||
| { | ||||
|     int result = 0; | ||||
| 
 | ||||
|     if ((size_t) pix2 & 0x7) { | ||||
|         uint64_t t, p2_l, p2_r; | ||||
|         t     = ldq_u(pix2 + 8); | ||||
|         p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2); | ||||
|         p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); | ||||
| 
 | ||||
|         do { | ||||
|             uint64_t p1_l, p1_r, np2_l, np2_r; | ||||
|             uint64_t t; | ||||
| 
 | ||||
|             p1_l  = ldq(pix1); | ||||
|             p1_r  = ldq(pix1 + 8); | ||||
|             pix2 += line_size; | ||||
|             t     = ldq_u(pix2 + 8); | ||||
|             np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); | ||||
|             np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); | ||||
| 
 | ||||
|             result += perr(p1_l, avg2(p2_l, np2_l)) | ||||
|                     + perr(p1_r, avg2(p2_r, np2_r)); | ||||
| 
 | ||||
|             pix1 += line_size; | ||||
|             p2_l  = np2_l; | ||||
|             p2_r  = np2_r; | ||||
| 
 | ||||
|         } while (--h); | ||||
|     } else { | ||||
|         uint64_t p2_l, p2_r; | ||||
|         p2_l = ldq(pix2); | ||||
|         p2_r = ldq(pix2 + 8); | ||||
|         do { | ||||
|             uint64_t p1_l, p1_r, np2_l, np2_r; | ||||
| 
 | ||||
|             p1_l = ldq(pix1); | ||||
|             p1_r = ldq(pix1 + 8); | ||||
|             pix2 += line_size; | ||||
|             np2_l = ldq(pix2); | ||||
|             np2_r = ldq(pix2 + 8); | ||||
| 
 | ||||
|             result += perr(p1_l, avg2(p2_l, np2_l)) | ||||
|                     + perr(p1_r, avg2(p2_r, np2_r)); | ||||
| 
 | ||||
|             pix1 += line_size; | ||||
|             p2_l  = np2_l; | ||||
|             p2_r  = np2_r; | ||||
|         } while (--h); | ||||
|     } | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||||
| { | ||||
|     int result = 0; | ||||
| 
 | ||||
|     uint64_t p1_l, p1_r; | ||||
|     uint64_t p2_l, p2_r, p2_x; | ||||
| 
 | ||||
|     p1_l = ldq(pix1); | ||||
|     p1_r = ldq(pix1 + 8); | ||||
| 
 | ||||
|     if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ | ||||
|         p2_l = uldq(pix2); | ||||
|         p2_r = uldq(pix2 + 8); | ||||
|         p2_x = (uint64_t) pix2[16] << 56; | ||||
|     } else { | ||||
|         p2_l = ldq(pix2); | ||||
|         p2_r = ldq(pix2 + 8); | ||||
|         p2_x = ldq(pix2 + 16) << 56; | ||||
|     } | ||||
| 
 | ||||
|     do { | ||||
|         uint64_t np1_l, np1_r; | ||||
|         uint64_t np2_l, np2_r, np2_x; | ||||
| 
 | ||||
|         pix1 += line_size; | ||||
|         pix2 += line_size; | ||||
| 
 | ||||
|         np1_l = ldq(pix1); | ||||
|         np1_r = ldq(pix1 + 8); | ||||
| 
 | ||||
|         if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ | ||||
|             np2_l = uldq(pix2); | ||||
|             np2_r = uldq(pix2 + 8); | ||||
|             np2_x = (uint64_t) pix2[16] << 56; | ||||
|         } else { | ||||
|             np2_l = ldq(pix2); | ||||
|             np2_r = ldq(pix2 + 8); | ||||
|             np2_x = ldq(pix2 + 16) << 56; | ||||
|         } | ||||
| 
 | ||||
|         result += perr(p1_l, | ||||
|                        avg4( p2_l, ( p2_l >> 8) | ((uint64_t)  p2_r << 56), | ||||
|                             np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56))) | ||||
|                 + perr(p1_r, | ||||
|                        avg4( p2_r, ( p2_r >> 8) | ((uint64_t)  p2_x), | ||||
|                             np2_r, (np2_r >> 8) | ((uint64_t) np2_x))); | ||||
| 
 | ||||
|         p1_l = np1_l; | ||||
|         p1_r = np1_r; | ||||
|         p2_l = np2_l; | ||||
|         p2_r = np2_r; | ||||
|         p2_x = np2_x; | ||||
|     } while (--h); | ||||
| 
 | ||||
|     return result; | ||||
| } | ||||
| @ -1,179 +0,0 @@ | ||||
| /* | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
 | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or
 | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either
 | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software
 | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "regdef.h" | ||||
| 
 | ||||
| /* Some nicer register names.  */ | ||||
| #define ta t10 | ||||
| #define tb t11 | ||||
| #define tc t12 | ||||
| #define td AT | ||||
| /* Danger: these overlap with the argument list and the return value */ | ||||
| #define te a5 | ||||
| #define tf a4 | ||||
| #define tg a3 | ||||
| #define th v0 | ||||
| 
 | ||||
|         .set noat
 | ||||
|         .set noreorder
 | ||||
|         .arch pca56
 | ||||
|         .text | ||||
| 
 | ||||
| /***************************************************************************** | ||||
|  * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) | ||||
|  * | ||||
|  * This code is written with a pca56 in mind. For ev6, one should | ||||
|  * really take the increased latency of 3 cycles for MVI instructions | ||||
|  * into account. | ||||
|  * | ||||
|  * It is important to keep the loading and first use of a register as | ||||
|  * far apart as possible, because if a register is accessed before it | ||||
|  * has been fetched from memory, the CPU will stall. | ||||
|  */ | ||||
|         .align 4
 | ||||
|         .globl pix_abs16x16_mvi_asm
 | ||||
|         .ent pix_abs16x16_mvi_asm
 | ||||
| pix_abs16x16_mvi_asm: | ||||
|         .frame sp, 0, ra, 0 | ||||
|         .prologue 0
 | ||||
| 
 | ||||
|         and     a2, 7, t0 | ||||
|         clr     v0 | ||||
|         beq     t0, $aligned | ||||
|         .align 4
 | ||||
| $unaligned: | ||||
|         /* Registers: | ||||
|            line 0: | ||||
|            t0:  left_u -> left lo -> left | ||||
|            t1:  mid | ||||
|            t2:  right_u -> right hi -> right | ||||
|            t3:  ref left | ||||
|            t4:  ref right | ||||
|            line 1: | ||||
|            t5:  left_u -> left lo -> left | ||||
|            t6:  mid | ||||
|            t7:  right_u -> right hi -> right | ||||
|            t8:  ref left | ||||
|            t9:  ref right | ||||
|            temp: | ||||
|            ta:  left hi | ||||
|            tb:  right lo | ||||
|            tc:  error left | ||||
|            td:  error right  */ | ||||
| 
 | ||||
|         /* load line 0 */ | ||||
|         ldq_u   t0, 0(a2)       # left_u | ||||
|         ldq_u   t1, 8(a2)       # mid | ||||
|         ldq_u   t2, 16(a2)      # right_u | ||||
|         ldq     t3, 0(a1)       # ref left | ||||
|         ldq     t4, 8(a1)       # ref right | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         /* load line 1 */ | ||||
|         ldq_u   t5, 0(a2)       # left_u | ||||
|         ldq_u   t6, 8(a2)       # mid | ||||
|         ldq_u   t7, 16(a2)      # right_u | ||||
|         ldq     t8, 0(a1)       # ref left | ||||
|         ldq     t9, 8(a1)       # ref right | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         /* calc line 0 */ | ||||
|         extql   t0, a2, t0      # left lo | ||||
|         extqh   t1, a2, ta      # left hi | ||||
|         extql   t1, a2, tb      # right lo | ||||
|         or      t0, ta, t0      # left | ||||
|         extqh   t2, a2, t2      # right hi | ||||
|         perr    t3, t0, tc      # error left | ||||
|         or      t2, tb, t2      # right | ||||
|         perr    t4, t2, td      # error right | ||||
|         addq    v0, tc, v0      # add error left | ||||
|         addq    v0, td, v0      # add error left | ||||
|         /* calc line 1 */ | ||||
|         extql   t5, a2, t5      # left lo | ||||
|         extqh   t6, a2, ta      # left hi | ||||
|         extql   t6, a2, tb      # right lo | ||||
|         or      t5, ta, t5      # left | ||||
|         extqh   t7, a2, t7      # right hi | ||||
|         perr    t8, t5, tc      # error left | ||||
|         or      t7, tb, t7      # right | ||||
|         perr    t9, t7, td      # error right | ||||
|         addq    v0, tc, v0      # add error left | ||||
|         addq    v0, td, v0      # add error left | ||||
|         /* loop */ | ||||
|         subq    a4,  2, a4      # h -= 2 | ||||
|         bne     a4, $unaligned | ||||
|         ret | ||||
| 
 | ||||
|         .align 4
 | ||||
| $aligned: | ||||
|         /* load line 0 */ | ||||
|         ldq     t0, 0(a2)       # left | ||||
|         ldq     t1, 8(a2)       # right | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         ldq     t2, 0(a1)       # ref left | ||||
|         ldq     t3, 8(a1)       # ref right | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         /* load line 1 */ | ||||
|         ldq     t4, 0(a2)       # left | ||||
|         ldq     t5, 8(a2)       # right | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         ldq     t6, 0(a1)       # ref left | ||||
|         ldq     t7, 8(a1)       # ref right | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         /* load line 2 */ | ||||
|         ldq     t8, 0(a2)       # left | ||||
|         ldq     t9, 8(a2)       # right | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         ldq     ta, 0(a1)       # ref left | ||||
|         ldq     tb, 8(a1)       # ref right | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         /* load line 3 */ | ||||
|         ldq     tc, 0(a2)       # left | ||||
|         ldq     td, 8(a2)       # right | ||||
|         addq    a2, a3, a2      # pix2 | ||||
|         ldq     te, 0(a1)       # ref left | ||||
|         ldq     a0, 8(a1)       # ref right | ||||
|         /* calc line 0 */ | ||||
|         perr    t0, t2, t0      # error left | ||||
|         addq    a1, a3, a1      # pix1 | ||||
|         perr    t1, t3, t1      # error right | ||||
|         addq    v0, t0, v0      # add error left | ||||
|         /* calc line 1 */ | ||||
|         perr    t4, t6, t0      # error left | ||||
|         addq    v0, t1, v0      # add error right | ||||
|         perr    t5, t7, t1      # error right | ||||
|         addq    v0, t0, v0      # add error left | ||||
|         /* calc line 2 */ | ||||
|         perr    t8, ta, t0      # error left | ||||
|         addq    v0, t1, v0      # add error right | ||||
|         perr    t9, tb, t1      # error right | ||||
|         addq    v0, t0, v0      # add error left | ||||
|         /* calc line 3 */ | ||||
|         perr    tc, te, t0      # error left | ||||
|         addq    v0, t1, v0      # add error right | ||||
|         perr    td, a0, t1      # error right | ||||
|         addq    v0, t0, v0      # add error left | ||||
|         addq    v0, t1, v0      # add error right | ||||
|         /* loop */ | ||||
|         subq    a4,  4, a4      # h -= 4 | ||||
|         bne     a4, $aligned | ||||
|         ret | ||||
|         .end pix_abs16x16_mvi_asm
 | ||||
| @ -1,110 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavcodec/mpegvideo.h" | ||||
| #include "asm.h" | ||||
| 
 | ||||
| static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs, | ||||
|                                     uint64_t qscale, uint64_t qadd) | ||||
| { | ||||
|     uint64_t qmul = qscale << 1; | ||||
|     uint64_t correction = WORD_VEC(qmul * 255 >> 8); | ||||
|     int i; | ||||
| 
 | ||||
|     qadd = WORD_VEC(qadd); | ||||
| 
 | ||||
|     for(i = 0; i <= n_coeffs; block += 4, i += 4) { | ||||
|         uint64_t levels, negmask, zeros, add, sub; | ||||
| 
 | ||||
|         levels = ldq(block); | ||||
|         if (levels == 0) | ||||
|             continue; | ||||
| 
 | ||||
| #ifdef __alpha_max__ | ||||
|         /* I don't think the speed difference justifies runtime
 | ||||
|            detection.  */ | ||||
|         negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ | ||||
|         negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ | ||||
| #else | ||||
|         negmask = cmpbge(WORD_VEC(0x7fff), levels); | ||||
|         negmask &= (negmask >> 1) | (1 << 7); | ||||
|         negmask = zap(-1, negmask); | ||||
| #endif | ||||
| 
 | ||||
|         zeros = cmpbge(0, levels); | ||||
|         zeros &= zeros >> 1; | ||||
|         /* zeros |= zeros << 1 is not needed since qadd <= 255, so
 | ||||
|            zapping the lower byte suffices.  */ | ||||
| 
 | ||||
|         levels *= qmul; | ||||
|         levels -= correction & (negmask << 16); | ||||
| 
 | ||||
|         add = qadd & ~negmask; | ||||
|         sub = qadd &  negmask; | ||||
|         /* Set qadd to 0 for levels == 0.  */ | ||||
|         add = zap(add, zeros); | ||||
|         levels += add; | ||||
|         levels -= sub; | ||||
| 
 | ||||
|         stq(levels, block); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block, | ||||
|                                     int n, int qscale) | ||||
| { | ||||
|     int n_coeffs; | ||||
|     uint64_t qadd; | ||||
|     int16_t block0 = block[0]; | ||||
| 
 | ||||
|     if (!s->h263_aic) { | ||||
|         if (n < 4) | ||||
|             block0 *= s->y_dc_scale; | ||||
|         else | ||||
|             block0 *= s->c_dc_scale; | ||||
|         qadd = (qscale - 1) | 1; | ||||
|     } else { | ||||
|         qadd = 0; | ||||
|     } | ||||
| 
 | ||||
|     if(s->ac_pred) | ||||
|         n_coeffs = 63; | ||||
|     else | ||||
|         n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | ||||
| 
 | ||||
|     dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd); | ||||
| 
 | ||||
|     block[0] = block0; | ||||
| } | ||||
| 
 | ||||
| static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block, | ||||
|                                     int n, int qscale) | ||||
| { | ||||
|     int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | ||||
|     dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1); | ||||
| } | ||||
| 
 | ||||
| av_cold void ff_MPV_common_init_axp(MpegEncContext *s) | ||||
| { | ||||
|     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; | ||||
|     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; | ||||
| } | ||||
| @ -1,77 +0,0 @@ | ||||
| /*
 | ||||
|  * Alpha optimized DSP utils | ||||
|  * copyright (c) 2002 Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| /* Some BSDs don't seem to have regdef.h... sigh  */ | ||||
| #ifndef AVCODEC_ALPHA_REGDEF_H | ||||
| #define AVCODEC_ALPHA_REGDEF_H | ||||
| 
 | ||||
| #define v0      $0      /* function return value */ | ||||
| 
 | ||||
| #define t0      $1      /* temporary registers (caller-saved) */ | ||||
| #define t1      $2 | ||||
| #define t2      $3 | ||||
| #define t3      $4 | ||||
| #define t4      $5 | ||||
| #define t5      $6 | ||||
| #define t6      $7 | ||||
| #define t7      $8 | ||||
| 
 | ||||
| #define s0      $9      /* saved-registers (callee-saved registers) */ | ||||
| #define s1      $10 | ||||
| #define s2      $11 | ||||
| #define s3      $12 | ||||
| #define s4      $13 | ||||
| #define s5      $14 | ||||
| #define s6      $15 | ||||
| #define fp      s6      /* frame-pointer (s6 in frame-less procedures) */ | ||||
| 
 | ||||
| #define a0      $16     /* argument registers (caller-saved) */ | ||||
| #define a1      $17 | ||||
| #define a2      $18 | ||||
| #define a3      $19 | ||||
| #define a4      $20 | ||||
| #define a5      $21 | ||||
| 
 | ||||
| #define t8      $22     /* more temps (caller-saved) */ | ||||
| #define t9      $23 | ||||
| #define t10     $24 | ||||
| #define t11     $25 | ||||
| #define ra      $26     /* return address register */ | ||||
| #define t12     $27 | ||||
| 
 | ||||
| #define pv      t12     /* procedure-variable register */ | ||||
| #define AT      $at     /* assembler temporary */ | ||||
| #define gp      $29     /* global pointer */ | ||||
| #define sp      $30     /* stack pointer */ | ||||
| #define zero    $31     /* reads as zero, writes are noops */ | ||||
| 
 | ||||
| /* Some nicer register names.  */ | ||||
| #define ta t10 | ||||
| #define tb t11 | ||||
| #define tc t12 | ||||
| #define td AT | ||||
| /* Danger: these overlap with the argument list and the return value */ | ||||
| #define te a5 | ||||
| #define tf a4 | ||||
| #define tg a3 | ||||
| #define th v0 | ||||
| 
 | ||||
| #endif /* AVCODEC_ALPHA_REGDEF_H */ | ||||
| @ -1,303 +0,0 @@ | ||||
| /*
 | ||||
|  * Simple IDCT (Alpha optimized) | ||||
|  * | ||||
|  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | ||||
|  * | ||||
|  * based upon some outcommented C code from mpeg2dec (idct_mmx.c | ||||
|  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) | ||||
|  * | ||||
|  * Alpha optimizations by Måns Rullgård <mans@mansr.com> | ||||
|  *                     and Falk Hueffner <falk@debian.org> | ||||
|  * | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
| 
 | ||||
| #include "dsputil_alpha.h" | ||||
| #include "asm.h" | ||||
| 
 | ||||
| // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
 | ||||
| // W4 is actually exactly 16384, but using 16383 works around
 | ||||
| // accumulating rounding errors for some encoders
 | ||||
| #define W1 22725 | ||||
| #define W2 21407 | ||||
| #define W3 19266 | ||||
| #define W4 16383 | ||||
| #define W5 12873 | ||||
| #define W6  8867 | ||||
| #define W7  4520 | ||||
| #define ROW_SHIFT 11 | ||||
| #define COL_SHIFT 20 | ||||
| 
 | ||||
| /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */ | ||||
| static inline int idct_row(int16_t *row) | ||||
| { | ||||
|     int a0, a1, a2, a3, b0, b1, b2, b3, t; | ||||
|     uint64_t l, r, t2; | ||||
|     l = ldq(row); | ||||
|     r = ldq(row + 4); | ||||
| 
 | ||||
|     if (l == 0 && r == 0) | ||||
|         return 0; | ||||
| 
 | ||||
|     a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1)); | ||||
| 
 | ||||
|     if (((l & ~0xffffUL) | r) == 0) { | ||||
|         a0 >>= ROW_SHIFT; | ||||
|         t2 = (uint16_t) a0; | ||||
|         t2 |= t2 << 16; | ||||
|         t2 |= t2 << 32; | ||||
| 
 | ||||
|         stq(t2, row); | ||||
|         stq(t2, row + 4); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     a1 = a0; | ||||
|     a2 = a0; | ||||
|     a3 = a0; | ||||
| 
 | ||||
|     t = extwl(l, 4);            /* row[2] */ | ||||
|     if (t != 0) { | ||||
|         t = sextw(t); | ||||
|         a0 += W2 * t; | ||||
|         a1 += W6 * t; | ||||
|         a2 -= W6 * t; | ||||
|         a3 -= W2 * t; | ||||
|     } | ||||
| 
 | ||||
|     t = extwl(r, 0);            /* row[4] */ | ||||
|     if (t != 0) { | ||||
|         t = sextw(t); | ||||
|         a0 += W4 * t; | ||||
|         a1 -= W4 * t; | ||||
|         a2 -= W4 * t; | ||||
|         a3 += W4 * t; | ||||
|     } | ||||
| 
 | ||||
|     t = extwl(r, 4);            /* row[6] */ | ||||
|     if (t != 0) { | ||||
|         t = sextw(t); | ||||
|         a0 += W6 * t; | ||||
|         a1 -= W2 * t; | ||||
|         a2 += W2 * t; | ||||
|         a3 -= W6 * t; | ||||
|     } | ||||
| 
 | ||||
|     t = extwl(l, 2);            /* row[1] */ | ||||
|     if (t != 0) { | ||||
|         t = sextw(t); | ||||
|         b0 = W1 * t; | ||||
|         b1 = W3 * t; | ||||
|         b2 = W5 * t; | ||||
|         b3 = W7 * t; | ||||
|     } else { | ||||
|         b0 = 0; | ||||
|         b1 = 0; | ||||
|         b2 = 0; | ||||
|         b3 = 0; | ||||
|     } | ||||
| 
 | ||||
|     t = extwl(l, 6);            /* row[3] */ | ||||
|     if (t) { | ||||
|         t = sextw(t); | ||||
|         b0 += W3 * t; | ||||
|         b1 -= W7 * t; | ||||
|         b2 -= W1 * t; | ||||
|         b3 -= W5 * t; | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     t = extwl(r, 2);            /* row[5] */ | ||||
|     if (t) { | ||||
|         t = sextw(t); | ||||
|         b0 += W5 * t; | ||||
|         b1 -= W1 * t; | ||||
|         b2 += W7 * t; | ||||
|         b3 += W3 * t; | ||||
|     } | ||||
| 
 | ||||
|     t = extwl(r, 6);            /* row[7] */ | ||||
|     if (t) { | ||||
|         t = sextw(t); | ||||
|         b0 += W7 * t; | ||||
|         b1 -= W5 * t; | ||||
|         b2 += W3 * t; | ||||
|         b3 -= W1 * t; | ||||
|     } | ||||
| 
 | ||||
|     row[0] = (a0 + b0) >> ROW_SHIFT; | ||||
|     row[1] = (a1 + b1) >> ROW_SHIFT; | ||||
|     row[2] = (a2 + b2) >> ROW_SHIFT; | ||||
|     row[3] = (a3 + b3) >> ROW_SHIFT; | ||||
|     row[4] = (a3 - b3) >> ROW_SHIFT; | ||||
|     row[5] = (a2 - b2) >> ROW_SHIFT; | ||||
|     row[6] = (a1 - b1) >> ROW_SHIFT; | ||||
|     row[7] = (a0 - b0) >> ROW_SHIFT; | ||||
| 
 | ||||
|     return 2; | ||||
| } | ||||
| 
 | ||||
| static inline void idct_col(int16_t *col) | ||||
| { | ||||
|     int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
| 
 | ||||
|     col[0] += (1 << (COL_SHIFT - 1)) / W4; | ||||
| 
 | ||||
|     a0 = W4 * col[8 * 0]; | ||||
|     a1 = W4 * col[8 * 0]; | ||||
|     a2 = W4 * col[8 * 0]; | ||||
|     a3 = W4 * col[8 * 0]; | ||||
| 
 | ||||
|     if (col[8 * 2]) { | ||||
|         a0 += W2 * col[8 * 2]; | ||||
|         a1 += W6 * col[8 * 2]; | ||||
|         a2 -= W6 * col[8 * 2]; | ||||
|         a3 -= W2 * col[8 * 2]; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 4]) { | ||||
|         a0 += W4 * col[8 * 4]; | ||||
|         a1 -= W4 * col[8 * 4]; | ||||
|         a2 -= W4 * col[8 * 4]; | ||||
|         a3 += W4 * col[8 * 4]; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 6]) { | ||||
|         a0 += W6 * col[8 * 6]; | ||||
|         a1 -= W2 * col[8 * 6]; | ||||
|         a2 += W2 * col[8 * 6]; | ||||
|         a3 -= W6 * col[8 * 6]; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 1]) { | ||||
|         b0 = W1 * col[8 * 1]; | ||||
|         b1 = W3 * col[8 * 1]; | ||||
|         b2 = W5 * col[8 * 1]; | ||||
|         b3 = W7 * col[8 * 1]; | ||||
|     } else { | ||||
|         b0 = 0; | ||||
|         b1 = 0; | ||||
|         b2 = 0; | ||||
|         b3 = 0; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 3]) { | ||||
|         b0 += W3 * col[8 * 3]; | ||||
|         b1 -= W7 * col[8 * 3]; | ||||
|         b2 -= W1 * col[8 * 3]; | ||||
|         b3 -= W5 * col[8 * 3]; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 5]) { | ||||
|         b0 += W5 * col[8 * 5]; | ||||
|         b1 -= W1 * col[8 * 5]; | ||||
|         b2 += W7 * col[8 * 5]; | ||||
|         b3 += W3 * col[8 * 5]; | ||||
|     } | ||||
| 
 | ||||
|     if (col[8 * 7]) { | ||||
|         b0 += W7 * col[8 * 7]; | ||||
|         b1 -= W5 * col[8 * 7]; | ||||
|         b2 += W3 * col[8 * 7]; | ||||
|         b3 -= W1 * col[8 * 7]; | ||||
|     } | ||||
| 
 | ||||
|     col[8 * 0] = (a0 + b0) >> COL_SHIFT; | ||||
|     col[8 * 7] = (a0 - b0) >> COL_SHIFT; | ||||
|     col[8 * 1] = (a1 + b1) >> COL_SHIFT; | ||||
|     col[8 * 6] = (a1 - b1) >> COL_SHIFT; | ||||
|     col[8 * 2] = (a2 + b2) >> COL_SHIFT; | ||||
|     col[8 * 5] = (a2 - b2) >> COL_SHIFT; | ||||
|     col[8 * 3] = (a3 + b3) >> COL_SHIFT; | ||||
|     col[8 * 4] = (a3 - b3) >> COL_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /* If all rows but the first one are zero after row transformation,
 | ||||
|    all rows will be identical after column transformation.  */ | ||||
| static inline void idct_col2(int16_t *col) | ||||
| { | ||||
|     int i; | ||||
|     uint64_t l, r; | ||||
| 
 | ||||
|     for (i = 0; i < 8; ++i) { | ||||
|         int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4; | ||||
| 
 | ||||
|         a0 *= W4; | ||||
|         col[i] = a0 >> COL_SHIFT; | ||||
|     } | ||||
| 
 | ||||
|     l = ldq(col + 0 * 4); r = ldq(col + 1 * 4); | ||||
|     stq(l, col +  2 * 4); stq(r, col +  3 * 4); | ||||
|     stq(l, col +  4 * 4); stq(r, col +  5 * 4); | ||||
|     stq(l, col +  6 * 4); stq(r, col +  7 * 4); | ||||
|     stq(l, col +  8 * 4); stq(r, col +  9 * 4); | ||||
|     stq(l, col + 10 * 4); stq(r, col + 11 * 4); | ||||
|     stq(l, col + 12 * 4); stq(r, col + 13 * 4); | ||||
|     stq(l, col + 14 * 4); stq(r, col + 15 * 4); | ||||
| } | ||||
| 
 | ||||
| void ff_simple_idct_axp(int16_t *block) | ||||
| { | ||||
| 
 | ||||
|     int i; | ||||
|     int rowsZero = 1;           /* all rows except row 0 zero */ | ||||
|     int rowsConstant = 1;       /* all rows consist of a constant value */ | ||||
| 
 | ||||
|     for (i = 0; i < 8; i++) { | ||||
|         int sparseness = idct_row(block + 8 * i); | ||||
| 
 | ||||
|         if (i > 0 && sparseness > 0) | ||||
|             rowsZero = 0; | ||||
|         if (sparseness == 2) | ||||
|             rowsConstant = 0; | ||||
|     } | ||||
| 
 | ||||
|     if (rowsZero) { | ||||
|         idct_col2(block); | ||||
|     } else if (rowsConstant) { | ||||
|         idct_col(block); | ||||
|         for (i = 0; i < 8; i += 2) { | ||||
|             uint64_t v = (uint16_t) block[0]; | ||||
|             uint64_t w = (uint16_t) block[8]; | ||||
| 
 | ||||
|             v |= v << 16; | ||||
|             w |= w << 16; | ||||
|             v |= v << 32; | ||||
|             w |= w << 32; | ||||
|             stq(v, block + 0 * 4); | ||||
|             stq(v, block + 1 * 4); | ||||
|             stq(w, block + 2 * 4); | ||||
|             stq(w, block + 3 * 4); | ||||
|             block += 4 * 4; | ||||
|         } | ||||
|     } else { | ||||
|         for (i = 0; i < 8; i++) | ||||
|             idct_col(block + i); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block) | ||||
| { | ||||
|     ff_simple_idct_axp(block); | ||||
|     put_pixels_clamped_axp_p(block, dest, line_size); | ||||
| } | ||||
| 
 | ||||
| void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block) | ||||
| { | ||||
|     ff_simple_idct_axp(block); | ||||
|     add_pixels_clamped_axp_p(block, dest, line_size); | ||||
| } | ||||
| @ -2442,7 +2442,9 @@ typedef struct AVCodecContext { | ||||
| #define FF_IDCT_SIMPLEVIS     18 | ||||
| #define FF_IDCT_FAAN          20 | ||||
| #define FF_IDCT_SIMPLENEON    22 | ||||
| #if FF_API_ARCH_ALPHA | ||||
| #define FF_IDCT_SIMPLEALPHA   23 | ||||
| #endif | ||||
| 
 | ||||
|     /**
 | ||||
|      * bits per sample/pixel from the demuxer (needed for huffyuv). | ||||
|  | ||||
| @ -61,8 +61,6 @@ void ff_simple_idct_armv5te(int16_t *data); | ||||
| void ff_simple_idct_armv6(int16_t *data); | ||||
| void ff_simple_idct_neon(int16_t *data); | ||||
| 
 | ||||
| void ff_simple_idct_axp(int16_t *data); | ||||
| 
 | ||||
| struct algo { | ||||
|     const char *name; | ||||
|     void (*func)(int16_t *block); | ||||
| @ -136,10 +134,6 @@ static const struct algo idct_tab[] = { | ||||
|     { "SIMPLE-NEON",    ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON }, | ||||
| #endif | ||||
| 
 | ||||
| #if ARCH_ALPHA | ||||
|     { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM }, | ||||
| #endif | ||||
| 
 | ||||
|     { 0 } | ||||
| }; | ||||
| 
 | ||||
|  | ||||
| @ -2666,8 +2666,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     if (ARCH_ALPHA) | ||||
|         ff_dsputil_init_alpha(c, avctx); | ||||
|     if (ARCH_ARM) | ||||
|         ff_dsputil_init_arm(c, avctx); | ||||
|     if (ARCH_BFIN) | ||||
|  | ||||
| @ -311,7 +311,6 @@ int ff_check_alignment(void); | ||||
| 
 | ||||
| void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | ||||
| 
 | ||||
| void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); | ||||
| void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); | ||||
| void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); | ||||
| void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | ||||
|  | ||||
| @ -54,8 +54,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) | ||||
|     hpel_funcs(avg, [3],  2); | ||||
|     hpel_funcs(avg_no_rnd,, 16); | ||||
| 
 | ||||
|     if (ARCH_ALPHA) | ||||
|         ff_hpeldsp_init_alpha(c, flags); | ||||
|     if (ARCH_ARM) | ||||
|         ff_hpeldsp_init_arm(c, flags); | ||||
|     if (ARCH_BFIN) | ||||
|  | ||||
| @ -94,7 +94,6 @@ typedef struct HpelDSPContext { | ||||
| 
 | ||||
| void ff_hpeldsp_init(HpelDSPContext *c, int flags); | ||||
| 
 | ||||
| void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); | ||||
|  | ||||
| @ -165,8 +165,6 @@ av_cold int ff_dct_common_init(MpegEncContext *s) | ||||
|         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact; | ||||
|     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c; | ||||
| 
 | ||||
|     if (ARCH_ALPHA) | ||||
|         ff_MPV_common_init_axp(s); | ||||
|     if (ARCH_ARM) | ||||
|         ff_MPV_common_init_arm(s); | ||||
|     if (ARCH_BFIN) | ||||
|  | ||||
| @ -785,7 +785,6 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt, | ||||
|                           const AVFrame *frame, int *got_packet); | ||||
| void ff_MPV_encode_init_x86(MpegEncContext *s); | ||||
| void ff_MPV_common_init_x86(MpegEncContext *s); | ||||
| void ff_MPV_common_init_axp(MpegEncContext *s); | ||||
| void ff_MPV_common_init_arm(MpegEncContext *s); | ||||
| void ff_MPV_common_init_bfin(MpegEncContext *s); | ||||
| void ff_MPV_common_init_ppc(MpegEncContext *s); | ||||
|  | ||||
| @ -242,10 +242,7 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n, | ||||
|         : "%eax", "%edx" | ||||
|     ); | ||||
| #else | ||||
|     /* #elif ARCH_ALPHA */ | ||||
|     /* Divisions are extremely costly on Alpha; optimize the most
 | ||||
|        common case. But they are costly everywhere... | ||||
|      */ | ||||
|     /* Divisions are costly everywhere; optimize the most common case. */ | ||||
|     if (scale == 8) { | ||||
|         a = (a + (8 >> 1)) / 8; | ||||
|         b = (b + (8 >> 1)) / 8; | ||||
|  | ||||
| @ -193,7 +193,9 @@ static const AVOption avcodec_options[] = { | ||||
| {"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| {"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| {"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| #if FF_API_ARCH_ALPHA | ||||
| {"simplealpha", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| #endif | ||||
| {"ipp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| {"xvidmmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"}, | ||||
| {"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"}, | ||||
|  | ||||
| @ -94,5 +94,8 @@ | ||||
| #ifndef FF_API_CODEC_PKT | ||||
| #define FF_API_CODEC_PKT         (LIBAVCODEC_VERSION_MAJOR < 56) | ||||
| #endif | ||||
| #ifndef FF_API_ARCH_ALPHA | ||||
| #define FF_API_ARCH_ALPHA        (LIBAVCODEC_VERSION_MAJOR < 56) | ||||
| #endif | ||||
| 
 | ||||
| #endif /* AVCODEC_VERSION_H */ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user