Timings for Arrandale:
          C    SSE
win32:  2108   334
win64:  1152   322
Factorizing the inner loop with a call/jmp is a >15 cycles cost, even with
the jmp destination being aligned.
Unrolling for ARCH_X86_64 is a 20 cycles gain.
Signed-off-by: Janne Grunau <janne-libav@jannau.net>
		
	
			
		
			
				
	
	
		
			39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
 | 
						|
 *
 | 
						|
 * This file is part of Libav.
 | 
						|
 *
 | 
						|
 * Libav is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU Lesser General Public
 | 
						|
 * License as published by the Free Software Foundation; either
 | 
						|
 * version 2.1 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * Libav is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 * Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Lesser General Public
 | 
						|
 * License along with Libav; if not, write to the Free Software
 | 
						|
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef AVCODEC_SYNTH_FILTER_H
 | 
						|
#define AVCODEC_SYNTH_FILTER_H
 | 
						|
 | 
						|
#include "fft.h"
 | 
						|
 | 
						|
typedef struct SynthFilterContext {
 | 
						|
    void (*synth_filter_float)(FFTContext *imdct,
 | 
						|
                               float *synth_buf_ptr, int *synth_buf_offset,
 | 
						|
                               float synth_buf2[32], const float window[512],
 | 
						|
                               float out[32], const float in[32],
 | 
						|
                               float scale);
 | 
						|
} SynthFilterContext;
 | 
						|
 | 
						|
void ff_synth_filter_init(SynthFilterContext *c);
 | 
						|
void ff_synth_filter_init_arm(SynthFilterContext *c);
 | 
						|
void ff_synth_filter_init_x86(SynthFilterContext *c);
 | 
						|
 | 
						|
#endif /* AVCODEC_SYNTH_FILTER_H */
 |