Remove unneeded add bias from 3 functions.
DSPContext.vector_fmul_window() DCADSPContext.lfe_fir() SynthFilterContext.synth_filter_float() Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
		
							parent
							
								
									8cb3c557a9
								
							
						
					
					
						commit
						80ba1ddb58
					
				@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
 | 
					    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
 | 
				
			||||||
            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
 | 
					            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
 | 
				
			||||||
        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 0, 512);
 | 
					        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        memcpy(                        out,               saved,            448 * sizeof(float));
 | 
					        memcpy(                        out,               saved,            448 * sizeof(float));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 | 
					        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 | 
				
			||||||
            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 0, 64);
 | 
					            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
 | 
				
			||||||
            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      0, 64);
 | 
					            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
 | 
				
			||||||
            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      0, 64);
 | 
					            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
 | 
				
			||||||
            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      0, 64);
 | 
					            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
 | 
				
			||||||
            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      0, 64);
 | 
					            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
 | 
				
			||||||
            memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
 | 
					            memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 0, 64);
 | 
					            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
 | 
				
			||||||
            memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
 | 
					            memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
 | 
				
			|||||||
    // buffer update
 | 
					    // buffer update
 | 
				
			||||||
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 | 
					    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 | 
				
			||||||
        memcpy(                    saved,       temp + 64,         64 * sizeof(float));
 | 
					        memcpy(                    saved,       temp + 64,         64 * sizeof(float));
 | 
				
			||||||
        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
 | 
					        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
 | 
				
			||||||
        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
 | 
					        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
 | 
				
			||||||
        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
 | 
					        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
 | 
				
			||||||
        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
 | 
					        memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
 | 
				
			||||||
    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
 | 
					    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
 | 
				
			||||||
        memcpy(                    saved,       buf + 512,        448 * sizeof(float));
 | 
					        memcpy(                    saved,       buf + 512,        448 * sizeof(float));
 | 
				
			||||||
 | 
				
			|||||||
@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
 | 
				
			|||||||
            for(i=0; i<128; i++)
 | 
					            for(i=0; i<128; i++)
 | 
				
			||||||
                x[i] = s->transform_coeffs[ch][2*i];
 | 
					                x[i] = s->transform_coeffs[ch][2*i];
 | 
				
			||||||
            ff_imdct_half(&s->imdct_256, s->tmp_output, x);
 | 
					            ff_imdct_half(&s->imdct_256, s->tmp_output, x);
 | 
				
			||||||
            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
 | 
					            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
 | 
				
			||||||
            for(i=0; i<128; i++)
 | 
					            for(i=0; i<128; i++)
 | 
				
			||||||
                x[i] = s->transform_coeffs[ch][2*i+1];
 | 
					                x[i] = s->transform_coeffs[ch][2*i+1];
 | 
				
			||||||
            ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
 | 
					            ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
 | 
					            ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
 | 
				
			||||||
            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
 | 
					            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
 | 
				
			||||||
            memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
 | 
					            memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -23,7 +23,7 @@
 | 
				
			|||||||
#include "libavcodec/dcadsp.h"
 | 
					#include "libavcodec/dcadsp.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
 | 
					void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
 | 
				
			||||||
                         int decifactor, float scale, float bias);
 | 
					                         int decifactor, float scale);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
 | 
					void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
				
			|||||||
@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1
 | 
				
			|||||||
        cmp             r3,  #32
 | 
					        cmp             r3,  #32
 | 
				
			||||||
        moveq           r6,  #256/32
 | 
					        moveq           r6,  #256/32
 | 
				
			||||||
        movne           r6,  #256/64
 | 
					        movne           r6,  #256/64
 | 
				
			||||||
NOVFP   vldr            d0,  [sp, #16]          @ scale, bias
 | 
					NOVFP   vldr            s0,  [sp, #16]          @ scale
 | 
				
			||||||
        mov             lr,  #-16
 | 
					        mov             lr,  #-16
 | 
				
			||||||
1:
 | 
					1:
 | 
				
			||||||
        vmov.f32        q2,  #0.0               @ v0
 | 
					        vmov.f32        q2,  #0.0               @ v0
 | 
				
			||||||
@ -51,8 +51,7 @@ NOVFP   vldr            d0,  [sp, #16]          @ scale, bias
 | 
				
			|||||||
        vadd.f32        d4,  d4,  d5
 | 
					        vadd.f32        d4,  d4,  d5
 | 
				
			||||||
        vadd.f32        d6,  d6,  d7
 | 
					        vadd.f32        d6,  d6,  d7
 | 
				
			||||||
        vpadd.f32       d4,  d4,  d6
 | 
					        vpadd.f32       d4,  d4,  d6
 | 
				
			||||||
        vdup.32         d5,  d0[1]
 | 
					        vmul.f32        d5,  d4,  d0[0]
 | 
				
			||||||
        vmla.f32        d5,  d4,  d0[0]
 | 
					 | 
				
			||||||
        vst1.32         {d5[0]},  [r0,:32]!
 | 
					        vst1.32         {d5[0]},  [r0,:32]!
 | 
				
			||||||
        vst1.32         {d5[1]},  [r4,:32]!
 | 
					        vst1.32         {d5[1]},  [r4,:32]!
 | 
				
			||||||
        bne             1b
 | 
					        bne             1b
 | 
				
			||||||
 | 
				
			|||||||
@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
 | 
					void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
 | 
				
			||||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
 | 
					void ff_vector_fmul_window_neon(float *dst, const float *src0,
 | 
				
			||||||
                                const float *src1, const float *win,
 | 
					                                const float *src1, const float *win, int len);
 | 
				
			||||||
                                float add_bias, int len);
 | 
					 | 
				
			||||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
 | 
					void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
 | 
				
			||||||
                                int len);
 | 
					                                int len);
 | 
				
			||||||
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
 | 
					void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
 | 
				
			||||||
 | 
				
			|||||||
@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1
 | 
				
			|||||||
endfunc
 | 
					endfunc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function ff_vector_fmul_window_neon, export=1
 | 
					function ff_vector_fmul_window_neon, export=1
 | 
				
			||||||
VFP     vdup.32         q8,  d0[0]
 | 
					 | 
				
			||||||
NOVFP   vld1.32         {d16[],d17[]}, [sp,:32]
 | 
					 | 
				
			||||||
        push            {r4,r5,lr}
 | 
					        push            {r4,r5,lr}
 | 
				
			||||||
VFP     ldr             lr,  [sp, #12]
 | 
					        ldr             lr,  [sp, #12]
 | 
				
			||||||
NOVFP   ldr             lr,  [sp, #16]
 | 
					 | 
				
			||||||
        sub             r2,  r2,  #8
 | 
					        sub             r2,  r2,  #8
 | 
				
			||||||
        sub             r5,  lr,  #2
 | 
					        sub             r5,  lr,  #2
 | 
				
			||||||
        add             r2,  r2,  r5, lsl #2
 | 
					        add             r2,  r2,  r5, lsl #2
 | 
				
			||||||
@ -793,14 +790,12 @@ NOVFP   ldr             lr,  [sp, #16]
 | 
				
			|||||||
        vld1.64         {d4,d5},  [r3,:128]!
 | 
					        vld1.64         {d4,d5},  [r3,:128]!
 | 
				
			||||||
        vld1.64         {d6,d7},  [r4,:128], r5
 | 
					        vld1.64         {d6,d7},  [r4,:128], r5
 | 
				
			||||||
1:      subs            lr,  lr,  #4
 | 
					1:      subs            lr,  lr,  #4
 | 
				
			||||||
        vmov            q11, q8
 | 
					        vmul.f32        d22, d0,  d4
 | 
				
			||||||
        vmla.f32        d22, d0,  d4
 | 
					 | 
				
			||||||
        vmov            q10, q8
 | 
					 | 
				
			||||||
        vmla.f32        d23, d1,  d5
 | 
					 | 
				
			||||||
        vrev64.32       q3,  q3
 | 
					        vrev64.32       q3,  q3
 | 
				
			||||||
        vmla.f32        d20, d0,  d7
 | 
					        vmul.f32        d23, d1,  d5
 | 
				
			||||||
        vrev64.32       q1,  q1
 | 
					        vrev64.32       q1,  q1
 | 
				
			||||||
        vmla.f32        d21, d1,  d6
 | 
					        vmul.f32        d20, d0,  d7
 | 
				
			||||||
 | 
					        vmul.f32        d21, d1,  d6
 | 
				
			||||||
        beq             2f
 | 
					        beq             2f
 | 
				
			||||||
        vmla.f32        d22, d3,  d7
 | 
					        vmla.f32        d22, d3,  d7
 | 
				
			||||||
        vld1.64         {d0,d1},  [r1,:128]!
 | 
					        vld1.64         {d0,d1},  [r1,:128]!
 | 
				
			||||||
 | 
				
			|||||||
@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
 | 
				
			|||||||
                                float *synth_buf_ptr, int *synth_buf_offset,
 | 
					                                float *synth_buf_ptr, int *synth_buf_offset,
 | 
				
			||||||
                                float synth_buf2[32], const float window[512],
 | 
					                                float synth_buf2[32], const float window[512],
 | 
				
			||||||
                                float out[32], const float in[32],
 | 
					                                float out[32], const float in[32],
 | 
				
			||||||
                                float scale, float bias);
 | 
					                                float scale);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
av_cold void ff_fft_init_arm(FFTContext *s)
 | 
					av_cold void ff_fft_init_arm(FFTContext *s)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
				
			|||||||
@ -42,7 +42,7 @@ VFP     vpop            {d0}
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        ldr             r5,  [sp, #9*4]         @ window
 | 
					        ldr             r5,  [sp, #9*4]         @ window
 | 
				
			||||||
        ldr             r2,  [sp, #10*4]        @ out
 | 
					        ldr             r2,  [sp, #10*4]        @ out
 | 
				
			||||||
NOVFP   vldr            d0,  [sp, #12*4]        @ scale, bias
 | 
					NOVFP   vldr            s0,  [sp, #12*4]        @ scale
 | 
				
			||||||
        add             r8,  r9,  #12*4
 | 
					        add             r8,  r9,  #12*4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        mov             lr,  #64*4
 | 
					        mov             lr,  #64*4
 | 
				
			||||||
@ -90,10 +90,8 @@ NOVFP   vldr            d0,  [sp, #12*4]        @ scale, bias
 | 
				
			|||||||
        sub             r11, r11, #512*4
 | 
					        sub             r11, r11, #512*4
 | 
				
			||||||
        b               2b
 | 
					        b               2b
 | 
				
			||||||
3:
 | 
					3:
 | 
				
			||||||
        vdup.32         q8,  d0[1]
 | 
					        vmul.f32        q8,  q10, d0[0]
 | 
				
			||||||
        vdup.32         q9,  d0[1]
 | 
					        vmul.f32        q9,  q1,  d0[0]
 | 
				
			||||||
        vmla.f32        q8,  q10, d0[0]
 | 
					 | 
				
			||||||
        vmla.f32        q9,  q1,  d0[0]
 | 
					 | 
				
			||||||
        vst1.32         {q3},     [r3,:128]
 | 
					        vst1.32         {q3},     [r3,:128]
 | 
				
			||||||
        sub             r3,  r3,  #16*4
 | 
					        sub             r3,  r3,  #16*4
 | 
				
			||||||
        vst1.32         {q2},     [r3,:128]
 | 
					        vst1.32         {q2},     [r3,:128]
 | 
				
			||||||
 | 
				
			|||||||
@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            /* overlap and window */
 | 
					            /* overlap and window */
 | 
				
			||||||
            q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
 | 
					            q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
 | 
				
			||||||
                                      &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16);
 | 
					                                      &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
 | 
					            prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
 | 
				
			||||||
            start_pos += block_size;
 | 
					            start_pos += block_size;
 | 
				
			||||||
 | 
				
			|||||||
@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
 | 
				
			|||||||
        s->synth.synth_filter_float(&s->imdct,
 | 
					        s->synth.synth_filter_float(&s->imdct,
 | 
				
			||||||
                              s->subband_fir_hist[chans], &s->hist_index[chans],
 | 
					                              s->subband_fir_hist[chans], &s->hist_index[chans],
 | 
				
			||||||
                              s->subband_fir_noidea[chans], prCoeff,
 | 
					                              s->subband_fir_noidea[chans], prCoeff,
 | 
				
			||||||
                              samples_out, s->raXin, scale, 0);
 | 
					                              samples_out, s->raXin, scale);
 | 
				
			||||||
        samples_out+= 32;
 | 
					        samples_out+= 32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
 | 
				
			|||||||
    /* Interpolation */
 | 
					    /* Interpolation */
 | 
				
			||||||
    for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
 | 
					    for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
 | 
				
			||||||
        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
 | 
					        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
 | 
				
			||||||
                          scale, 0);
 | 
					                          scale);
 | 
				
			||||||
        samples_in++;
 | 
					        samples_in++;
 | 
				
			||||||
        samples_out += 2 * decifactor;
 | 
					        samples_out += 2 * decifactor;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -23,7 +23,7 @@
 | 
				
			|||||||
#include "dcadsp.h"
 | 
					#include "dcadsp.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
 | 
					static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
 | 
				
			||||||
                          int decifactor, float scale, float bias)
 | 
					                          int decifactor, float scale)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    float *out2 = out + decifactor;
 | 
					    float *out2 = out + decifactor;
 | 
				
			||||||
    const float *cf0 = coefs;
 | 
					    const float *cf0 = coefs;
 | 
				
			||||||
@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
 | 
				
			|||||||
            v0 += s * *cf0++;
 | 
					            v0 += s * *cf0++;
 | 
				
			||||||
            v1 += s * *--cf1;
 | 
					            v1 += s * *--cf1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        *out++  = (v0 * scale) + bias;
 | 
					        *out++  = v0 * scale;
 | 
				
			||||||
        *out2++ = (v1 * scale) + bias;
 | 
					        *out2++ = v1 * scale;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -21,7 +21,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
typedef struct DCADSPContext {
 | 
					typedef struct DCADSPContext {
 | 
				
			||||||
    void (*lfe_fir)(float *out, const float *in, const float *coefs,
 | 
					    void (*lfe_fir)(float *out, const float *in, const float *coefs,
 | 
				
			||||||
                    int decifactor, float scale, float bias);
 | 
					                    int decifactor, float scale);
 | 
				
			||||||
} DCADSPContext;
 | 
					} DCADSPContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_dcadsp_init(DCADSPContext *s);
 | 
					void ff_dcadsp_init(DCADSPContext *s);
 | 
				
			||||||
 | 
				
			|||||||
@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
 | 
				
			|||||||
        dst[i] = src0[i] * src1[i] + src2[i];
 | 
					        dst[i] = src0[i] * src1[i] + src2[i];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
 | 
					static void vector_fmul_window_c(float *dst, const float *src0,
 | 
				
			||||||
 | 
					                                 const float *src1, const float *win, int len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
    int i,j;
 | 
					    int i,j;
 | 
				
			||||||
    dst += len;
 | 
					    dst += len;
 | 
				
			||||||
    win += len;
 | 
					    win += len;
 | 
				
			||||||
@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
 | 
				
			|||||||
        float s1 = src1[j];
 | 
					        float s1 = src1[j];
 | 
				
			||||||
        float wi = win[i];
 | 
					        float wi = win[i];
 | 
				
			||||||
        float wj = win[j];
 | 
					        float wj = win[j];
 | 
				
			||||||
        dst[i] = s0*wj - s1*wi + add_bias;
 | 
					        dst[i] = s0*wj - s1*wi;
 | 
				
			||||||
        dst[j] = s0*wi + s1*wj + add_bias;
 | 
					        dst[j] = s0*wi + s1*wj;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
				
			|||||||
    c->vector_fmul = vector_fmul_c;
 | 
					    c->vector_fmul = vector_fmul_c;
 | 
				
			||||||
    c->vector_fmul_reverse = vector_fmul_reverse_c;
 | 
					    c->vector_fmul_reverse = vector_fmul_reverse_c;
 | 
				
			||||||
    c->vector_fmul_add = vector_fmul_add_c;
 | 
					    c->vector_fmul_add = vector_fmul_add_c;
 | 
				
			||||||
    c->vector_fmul_window = ff_vector_fmul_window_c;
 | 
					    c->vector_fmul_window = vector_fmul_window_c;
 | 
				
			||||||
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
 | 
					    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
 | 
				
			||||||
    c->vector_clipf = vector_clipf_c;
 | 
					    c->vector_clipf = vector_clipf_c;
 | 
				
			||||||
    c->float_to_int16 = ff_float_to_int16_c;
 | 
					    c->float_to_int16 = ff_float_to_int16_c;
 | 
				
			||||||
 | 
				
			|||||||
@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
 | 
				
			|||||||
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 | 
					void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 | 
				
			||||||
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 | 
					void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
 | 
					 | 
				
			||||||
                             const float *win, float add_bias, int len);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* encoding scans */
 | 
					/* encoding scans */
 | 
				
			||||||
extern const uint8_t ff_alternate_horizontal_scan[64];
 | 
					extern const uint8_t ff_alternate_horizontal_scan[64];
 | 
				
			||||||
extern const uint8_t ff_alternate_vertical_scan[64];
 | 
					extern const uint8_t ff_alternate_vertical_scan[64];
 | 
				
			||||||
@ -393,7 +390,7 @@ typedef struct DSPContext {
 | 
				
			|||||||
    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
 | 
					    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
 | 
				
			||||||
    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
 | 
					    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
 | 
				
			||||||
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
 | 
					    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
 | 
				
			||||||
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
 | 
					    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
 | 
				
			||||||
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
 | 
					    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
 | 
				
			||||||
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
 | 
					    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
 | 
				
			||||||
    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
 | 
					    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
 | 
				
			||||||
 | 
				
			|||||||
@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
 | 
					static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    union {
 | 
					    vector float zero, t0, t1, s0, s1, wi, wj;
 | 
				
			||||||
        vector float v;
 | 
					 | 
				
			||||||
        float s[4];
 | 
					 | 
				
			||||||
    } vadd;
 | 
					 | 
				
			||||||
    vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
 | 
					 | 
				
			||||||
    const vector unsigned char reverse = vcprm(3,2,1,0);
 | 
					    const vector unsigned char reverse = vcprm(3,2,1,0);
 | 
				
			||||||
    int i,j;
 | 
					    int i,j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
 | 
				
			|||||||
    win += len;
 | 
					    win += len;
 | 
				
			||||||
    src0+= len;
 | 
					    src0+= len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    vadd.s[0] = add_bias;
 | 
					 | 
				
			||||||
    vadd_bias = vec_splat(vadd.v, 0);
 | 
					 | 
				
			||||||
    zero = (vector float)vec_splat_u32(0);
 | 
					    zero = (vector float)vec_splat_u32(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
 | 
					    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
 | 
				
			||||||
@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
 | 
				
			|||||||
        s1 = vec_perm(s1, s1, reverse);
 | 
					        s1 = vec_perm(s1, s1, reverse);
 | 
				
			||||||
        wj = vec_perm(wj, wj, reverse);
 | 
					        wj = vec_perm(wj, wj, reverse);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        t0 = vec_madd(s0, wj, vadd_bias);
 | 
					        t0 = vec_madd(s0, wj, zero);
 | 
				
			||||||
        t0 = vec_nmsub(s1, wi, t0);
 | 
					        t0 = vec_nmsub(s1, wi, t0);
 | 
				
			||||||
        t1 = vec_madd(s0, wi, vadd_bias);
 | 
					        t1 = vec_madd(s0, wi, zero);
 | 
				
			||||||
        t1 = vec_madd(s1, wj, t1);
 | 
					        t1 = vec_madd(s1, wj, t1);
 | 
				
			||||||
        t1 = vec_perm(t1, t1, reverse);
 | 
					        t1 = vec_perm(t1, t1, reverse);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -24,7 +24,7 @@
 | 
				
			|||||||
static void synth_filter_float(FFTContext *imdct,
 | 
					static void synth_filter_float(FFTContext *imdct,
 | 
				
			||||||
                           float *synth_buf_ptr, int *synth_buf_offset,
 | 
					                           float *synth_buf_ptr, int *synth_buf_offset,
 | 
				
			||||||
                           float synth_buf2[32], const float window[512],
 | 
					                           float synth_buf2[32], const float window[512],
 | 
				
			||||||
                           float out[32], const float in[32], float scale, float bias)
 | 
					                           float out[32], const float in[32], float scale)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    float *synth_buf= synth_buf_ptr + *synth_buf_offset;
 | 
					    float *synth_buf= synth_buf_ptr + *synth_buf_offset;
 | 
				
			||||||
    int i, j;
 | 
					    int i, j;
 | 
				
			||||||
@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct,
 | 
				
			|||||||
            c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
 | 
					            c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
 | 
				
			||||||
            d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
 | 
					            d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        out[i     ] = a*scale + bias;
 | 
					        out[i     ] = a*scale;
 | 
				
			||||||
        out[i + 16] = b*scale + bias;
 | 
					        out[i + 16] = b*scale;
 | 
				
			||||||
        synth_buf2[i     ] = c;
 | 
					        synth_buf2[i     ] = c;
 | 
				
			||||||
        synth_buf2[i + 16] = d;
 | 
					        synth_buf2[i + 16] = d;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
 | 
				
			|||||||
                               float *synth_buf_ptr, int *synth_buf_offset,
 | 
					                               float *synth_buf_ptr, int *synth_buf_offset,
 | 
				
			||||||
                               float synth_buf2[32], const float window[512],
 | 
					                               float synth_buf2[32], const float window[512],
 | 
				
			||||||
                               float out[32], const float in[32],
 | 
					                               float out[32], const float in[32],
 | 
				
			||||||
                               float scale, float bias);
 | 
					                               float scale);
 | 
				
			||||||
} SynthFilterContext;
 | 
					} SynthFilterContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_synth_filter_init(SynthFilterContext *c);
 | 
					void ff_synth_filter_init(SynthFilterContext *c);
 | 
				
			||||||
 | 
				
			|||||||
@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
 | 
				
			|||||||
                                     prev_buf + (bsize-wsize)/2,
 | 
					                                     prev_buf + (bsize-wsize)/2,
 | 
				
			||||||
                                     buf1 + bsize*j,
 | 
					                                     buf1 + bsize*j,
 | 
				
			||||||
                                     ff_sine_windows[av_log2(wsize)],
 | 
					                                     ff_sine_windows[av_log2(wsize)],
 | 
				
			||||||
                                     0.0,
 | 
					 | 
				
			||||||
                                     wsize/2);
 | 
					                                     wsize/2);
 | 
				
			||||||
        out2 += wsize;
 | 
					        out2 += wsize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
 | 
				
			|||||||
        const float *win  = vc->win[blockflag & previous_window];
 | 
					        const float *win  = vc->win[blockflag & previous_window];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (blockflag == previous_window) {
 | 
					        if (blockflag == previous_window) {
 | 
				
			||||||
            vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4);
 | 
					            vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4);
 | 
				
			||||||
        } else if (blockflag > previous_window) {
 | 
					        } else if (blockflag > previous_window) {
 | 
				
			||||||
            vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4);
 | 
					            vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4);
 | 
				
			||||||
            memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
 | 
					            memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
 | 
					            memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
 | 
				
			||||||
            vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4);
 | 
					            vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
 | 
					        memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
 | 
				
			|||||||
        winlen >>= 1;
 | 
					        winlen >>= 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s->dsp.vector_fmul_window(start, start, start + winlen,
 | 
					        s->dsp.vector_fmul_window(start, start, start + winlen,
 | 
				
			||||||
                                  window, 0, winlen);
 | 
					                                  window, winlen);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s->channel[c].prev_block_len = s->subframe_len;
 | 
					        s->channel[c].prev_block_len = s->subframe_len;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1
 | 
				
			|||||||
    );
 | 
					    );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
 | 
					 | 
				
			||||||
                                      const float *win, float add_bias, int len){
 | 
					 | 
				
			||||||
#if HAVE_6REGS
 | 
					#if HAVE_6REGS
 | 
				
			||||||
    if(add_bias == 0){
 | 
					static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
 | 
				
			||||||
 | 
					                                      const float *win, int len){
 | 
				
			||||||
        x86_reg i = -len*4;
 | 
					        x86_reg i = -len*4;
 | 
				
			||||||
        x86_reg j = len*4-8;
 | 
					        x86_reg j = len*4-8;
 | 
				
			||||||
        __asm__ volatile(
 | 
					        __asm__ volatile(
 | 
				
			||||||
@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float
 | 
				
			|||||||
            :"+r"(i), "+r"(j)
 | 
					            :"+r"(i), "+r"(j)
 | 
				
			||||||
            :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
 | 
					            :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
    }else
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
        ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
 | 
					static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
 | 
				
			||||||
                                   const float *win, float add_bias, int len){
 | 
					                                   const float *win, int len){
 | 
				
			||||||
#if HAVE_6REGS
 | 
					 | 
				
			||||||
    if(add_bias == 0){
 | 
					 | 
				
			||||||
        x86_reg i = -len*4;
 | 
					        x86_reg i = -len*4;
 | 
				
			||||||
        x86_reg j = len*4-16;
 | 
					        x86_reg j = len*4-16;
 | 
				
			||||||
        __asm__ volatile(
 | 
					        __asm__ volatile(
 | 
				
			||||||
@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s
 | 
				
			|||||||
            :"+r"(i), "+r"(j)
 | 
					            :"+r"(i), "+r"(j)
 | 
				
			||||||
            :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
 | 
					            :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
    }else
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
        ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#endif /* HAVE_6REGS */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
 | 
					static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
 | 
					        if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
 | 
				
			||||||
            c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
 | 
					            c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
 | 
				
			||||||
 | 
					#if HAVE_6REGS
 | 
				
			||||||
            c->vector_fmul_window = vector_fmul_window_3dnow2;
 | 
					            c->vector_fmul_window = vector_fmul_window_3dnow2;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
 | 
					            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
 | 
				
			||||||
                c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
 | 
					                c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 | 
				
			|||||||
            c->vector_fmul = vector_fmul_sse;
 | 
					            c->vector_fmul = vector_fmul_sse;
 | 
				
			||||||
            c->vector_fmul_reverse = vector_fmul_reverse_sse;
 | 
					            c->vector_fmul_reverse = vector_fmul_reverse_sse;
 | 
				
			||||||
            c->vector_fmul_add = vector_fmul_add_sse;
 | 
					            c->vector_fmul_add = vector_fmul_add_sse;
 | 
				
			||||||
 | 
					#if HAVE_6REGS
 | 
				
			||||||
            c->vector_fmul_window = vector_fmul_window_sse;
 | 
					            c->vector_fmul_window = vector_fmul_window_sse;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
            c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
 | 
					            c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
 | 
				
			||||||
            c->vector_clipf = vector_clipf_sse;
 | 
					            c->vector_clipf = vector_clipf_sse;
 | 
				
			||||||
            c->float_to_int16 = float_to_int16_sse;
 | 
					            c->float_to_int16 = float_to_int16_sse;
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user