Based on the aarch64 asm. CPU cycle counts on cortex-a9 compared to gcc 4.8.2: before: 475 decicycles in get_cabac_noinline, 67106035 runs, 2829 skips after: 393 decicycles in get_cabac_noinline, 67106474 runs, 2390 skips Overall speedup is above 2%. Code generated by clang 3.4 is slower on the same hardware and the relative change is a little larger.
		
			
				
	
	
		
			191 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			191 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 | |
|  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 | |
|  *
 | |
|  * This file is part of Libav.
 | |
|  *
 | |
|  * Libav is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * Libav is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with Libav; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @file
 | |
|  * Context Adaptive Binary Arithmetic Coder inline functions
 | |
|  */
 | |
| 
 | |
| #ifndef AVCODEC_CABAC_FUNCTIONS_H
 | |
| #define AVCODEC_CABAC_FUNCTIONS_H
 | |
| 
 | |
| #include <stdint.h>
 | |
| 
 | |
| #include "cabac.h"
 | |
| #include "config.h"
 | |
| 
 | |
| #if ARCH_AARCH64
 | |
| #   include "aarch64/cabac.h"
 | |
| #endif
 | |
| #if ARCH_ARM
 | |
| #   include "arm/cabac.h"
 | |
| #endif
 | |
| #if ARCH_X86
 | |
| #   include "x86/cabac.h"
 | |
| #endif
 | |
| 
 | |
| static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
 | |
| static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
 | |
| static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
 | |
| static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
 | |
| 
 | |
| static void refill(CABACContext *c){
 | |
| #if CABAC_BITS == 16
 | |
|         c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
 | |
| #else
 | |
|         c->low+= c->bytestream[0]<<1;
 | |
| #endif
 | |
|     c->low -= CABAC_MASK;
 | |
|     if (c->bytestream < c->bytestream_end)
 | |
|         c->bytestream += CABAC_BITS / 8;
 | |
| }
 | |
| 
 | |
| static inline void renorm_cabac_decoder_once(CABACContext *c){
 | |
|     int shift= (uint32_t)(c->range - 0x100)>>31;
 | |
|     c->range<<= shift;
 | |
|     c->low  <<= shift;
 | |
|     if(!(c->low & CABAC_MASK))
 | |
|         refill(c);
 | |
| }
 | |
| 
 | |
| #ifndef get_cabac_inline
 | |
| static void refill2(CABACContext *c){
 | |
|     int i, x;
 | |
| 
 | |
|     x= c->low ^ (c->low-1);
 | |
|     i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
 | |
| 
 | |
|     x= -CABAC_MASK;
 | |
| 
 | |
| #if CABAC_BITS == 16
 | |
|         x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
 | |
| #else
 | |
|         x+= c->bytestream[0]<<1;
 | |
| #endif
 | |
| 
 | |
|     c->low += x<<i;
 | |
|     if (c->bytestream < c->bytestream_end)
 | |
|         c->bytestream += CABAC_BITS/8;
 | |
| }
 | |
| 
 | |
| static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
 | |
|     int s = *state;
 | |
|     int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
 | |
|     int bit, lps_mask;
 | |
| 
 | |
|     c->range -= RangeLPS;
 | |
|     lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
 | |
| 
 | |
|     c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
 | |
|     c->range += (RangeLPS - c->range) & lps_mask;
 | |
| 
 | |
|     s^=lps_mask;
 | |
|     *state= (ff_h264_mlps_state+128)[s];
 | |
|     bit= s&1;
 | |
| 
 | |
|     lps_mask= ff_h264_norm_shift[c->range];
 | |
|     c->range<<= lps_mask;
 | |
|     c->low  <<= lps_mask;
 | |
|     if(!(c->low & CABAC_MASK))
 | |
|         refill2(c);
 | |
|     return bit;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
 | |
|     return get_cabac_inline(c,state);
 | |
| }
 | |
| 
 | |
| static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
 | |
|     return get_cabac_inline(c,state);
 | |
| }
 | |
| 
 | |
| #ifndef get_cabac_bypass
 | |
| static int av_unused get_cabac_bypass(CABACContext *c){
 | |
|     int range;
 | |
|     c->low += c->low;
 | |
| 
 | |
|     if(!(c->low & CABAC_MASK))
 | |
|         refill(c);
 | |
| 
 | |
|     range= c->range<<(CABAC_BITS+1);
 | |
|     if(c->low < range){
 | |
|         return 0;
 | |
|     }else{
 | |
|         c->low -= range;
 | |
|         return 1;
 | |
|     }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifndef get_cabac_bypass_sign
 | |
| static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
 | |
|     int range, mask;
 | |
|     c->low += c->low;
 | |
| 
 | |
|     if(!(c->low & CABAC_MASK))
 | |
|         refill(c);
 | |
| 
 | |
|     range= c->range<<(CABAC_BITS+1);
 | |
|     c->low -= range;
 | |
|     mask= c->low >> 31;
 | |
|     range &= mask;
 | |
|     c->low += range;
 | |
|     return (val^mask)-mask;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /**
 | |
|  *
 | |
|  * @return the number of bytes read or 0 if no end
 | |
|  */
 | |
| static int av_unused get_cabac_terminate(CABACContext *c){
 | |
|     c->range -= 2;
 | |
|     if(c->low < c->range<<(CABAC_BITS+1)){
 | |
|         renorm_cabac_decoder_once(c);
 | |
|         return 0;
 | |
|     }else{
 | |
|         return c->bytestream - c->bytestream_start;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Skip @p n bytes and reset the decoder.
 | |
|  * @return the address of the first skipped byte or NULL if there's less than @p n bytes left
 | |
|  */
 | |
| static av_unused const uint8_t* skip_bytes(CABACContext *c, int n) {
 | |
|     const uint8_t *ptr = c->bytestream;
 | |
| 
 | |
|     if (c->low & 0x1)
 | |
|         ptr--;
 | |
| #if CABAC_BITS == 16
 | |
|     if (c->low & 0x1FF)
 | |
|         ptr--;
 | |
| #endif
 | |
|     if ((int) (c->bytestream_end - ptr) < n)
 | |
|         return NULL;
 | |
|     ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n);
 | |
| 
 | |
|     return ptr;
 | |
| }
 | |
| 
 | |
| #endif /* AVCODEC_CABAC_FUNCTIONS_H */
 |