dsputil: remove shift parameter from scalarproduct_int16
There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
		
							parent
							
								
									dabf8dd34a
								
							
						
					
					
						commit
						7e1ce6a6ac
					
				@ -106,7 +106,7 @@ int16_t ff_acelp_decode_gain_code(
 | 
			
		||||
        mr_energy += quant_energy[i] * ma_prediction_coeff[i];
 | 
			
		||||
 | 
			
		||||
    mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
 | 
			
		||||
                sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0));
 | 
			
		||||
                sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
 | 
			
		||||
    return mr_energy >> 12;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -171,8 +171,7 @@ void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
 | 
			
		||||
 | 
			
		||||
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
 | 
			
		||||
 | 
			
		||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len,
 | 
			
		||||
                                    int shift);
 | 
			
		||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
 | 
			
		||||
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
 | 
			
		||||
                                             const int16_t *v3, int len, int mul);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -29,31 +29,7 @@ function ff_scalarproduct_int16_neon, export=1
 | 
			
		||||
        vmov.i16        q1,  #0
 | 
			
		||||
        vmov.i16        q2,  #0
 | 
			
		||||
        vmov.i16        q3,  #0
 | 
			
		||||
        negs            r3,  r3
 | 
			
		||||
        beq             2f
 | 
			
		||||
 | 
			
		||||
        vdup.s32        q12, r3
 | 
			
		||||
1:      vld1.16         {d16-d17}, [r0]!
 | 
			
		||||
        vld1.16         {d20-d21}, [r1,:128]!
 | 
			
		||||
        vmull.s16       q12, d16,  d20
 | 
			
		||||
        vld1.16         {d18-d19}, [r0]!
 | 
			
		||||
        vmull.s16       q13, d17,  d21
 | 
			
		||||
        vld1.16         {d22-d23}, [r1,:128]!
 | 
			
		||||
        vmull.s16       q14, d18,  d22
 | 
			
		||||
        vmull.s16       q15, d19,  d23
 | 
			
		||||
        vshl.s32        q8,  q12,  q12
 | 
			
		||||
        vshl.s32        q9,  q13,  q12
 | 
			
		||||
        vadd.s32        q0,  q0,   q8
 | 
			
		||||
        vshl.s32        q10, q14,  q12
 | 
			
		||||
        vadd.s32        q1,  q1,   q9
 | 
			
		||||
        vshl.s32        q11, q15,  q12
 | 
			
		||||
        vadd.s32        q2,  q2,   q10
 | 
			
		||||
        vadd.s32        q3,  q3,   q11
 | 
			
		||||
        subs            r2,  r2,   #16
 | 
			
		||||
        bne             1b
 | 
			
		||||
        b               3f
 | 
			
		||||
 | 
			
		||||
2:      vld1.16         {d16-d17}, [r0]!
 | 
			
		||||
        vld1.16         {d20-d21}, [r1,:128]!
 | 
			
		||||
        vmlal.s16       q0,  d16,  d20
 | 
			
		||||
        vld1.16         {d18-d19}, [r0]!
 | 
			
		||||
@ -62,9 +38,9 @@ function ff_scalarproduct_int16_neon, export=1
 | 
			
		||||
        vmlal.s16       q2,  d18,  d22
 | 
			
		||||
        vmlal.s16       q3,  d19,  d23
 | 
			
		||||
        subs            r2,  r2,   #16
 | 
			
		||||
        bne             2b
 | 
			
		||||
        bne             1b
 | 
			
		||||
 | 
			
		||||
3:      vpadd.s32       d16, d0,   d1
 | 
			
		||||
        vpadd.s32       d16, d0,   d1
 | 
			
		||||
        vpadd.s32       d17, d2,   d3
 | 
			
		||||
        vpadd.s32       d10, d4,   d5
 | 
			
		||||
        vpadd.s32       d11, d6,   d7
 | 
			
		||||
 | 
			
		||||
@ -2559,12 +2559,12 @@ static void vector_clipf_c(float *dst, const float *src, float min, float max, i
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
 | 
			
		||||
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
 | 
			
		||||
{
 | 
			
		||||
    int res = 0;
 | 
			
		||||
 | 
			
		||||
    while (order--)
 | 
			
		||||
        res += (*v1++ * *v2++) >> shift;
 | 
			
		||||
        res += *v1++ * *v2++;
 | 
			
		||||
 | 
			
		||||
    return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -536,9 +536,8 @@ typedef struct DSPContext {
 | 
			
		||||
    /**
 | 
			
		||||
     * Calculate scalar product of two vectors.
 | 
			
		||||
     * @param len length of vectors, should be multiple of 16
 | 
			
		||||
     * @param shift number of bits to discard from product
 | 
			
		||||
     */
 | 
			
		||||
    int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
 | 
			
		||||
    int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
 | 
			
		||||
    /* ape functions */
 | 
			
		||||
    /**
 | 
			
		||||
     * Calculate scalar product of v1 and v2,
 | 
			
		||||
 | 
			
		||||
@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 | 
			
		||||
SECTION_TEXT
 | 
			
		||||
 | 
			
		||||
%macro SCALARPRODUCT 1
 | 
			
		||||
; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
 | 
			
		||||
cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
 | 
			
		||||
; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
 | 
			
		||||
cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
 | 
			
		||||
    shl orderq, 1
 | 
			
		||||
    add v1q, orderq
 | 
			
		||||
    add v2q, orderq
 | 
			
		||||
    neg orderq
 | 
			
		||||
    movd    m3, shiftm
 | 
			
		||||
    pxor    m2, m2
 | 
			
		||||
.loop:
 | 
			
		||||
    movu    m0, [v1q + orderq]
 | 
			
		||||
@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
 | 
			
		||||
%if mmsize == 16
 | 
			
		||||
    movhlps m0, m2
 | 
			
		||||
    paddd   m2, m0
 | 
			
		||||
    psrad   m2, m3
 | 
			
		||||
    pshuflw m0, m2, 0x4e
 | 
			
		||||
%else
 | 
			
		||||
    psrad   m2, m3
 | 
			
		||||
    pshufw  m0, m2, 0x4e
 | 
			
		||||
%endif
 | 
			
		||||
    paddd   m2, m0
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user