avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
		
							parent
							
								
									69cb34f885
								
							
						
					
					
						commit
						42e88f18f3
					
				| @ -28,6 +28,10 @@ | |||||||
| void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs); | void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs); | ||||||
| void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); | void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); | ||||||
| void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len); | void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len); | ||||||
|  | void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4], | ||||||
|  |                                             const int32_t *coef0, | ||||||
|  |                                             const int32_t *coef1, | ||||||
|  |                                             int len); | ||||||
| 
 | 
 | ||||||
| av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c) | av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c) | ||||||
| { | { | ||||||
| @ -37,4 +41,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c) | |||||||
|     c->ac3_exponent_min = ff_ac3_exponent_min_neon; |     c->ac3_exponent_min = ff_ac3_exponent_min_neon; | ||||||
|     c->extract_exponents = ff_ac3_extract_exponents_neon; |     c->extract_exponents = ff_ac3_extract_exponents_neon; | ||||||
|     c->float_to_fixed24 = ff_float_to_fixed24_neon; |     c->float_to_fixed24 = ff_float_to_fixed24_neon; | ||||||
|  |     c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon; | ||||||
| } | } | ||||||
|  | |||||||
| @ -64,3 +64,26 @@ function ff_float_to_fixed24_neon, export=1 | |||||||
|         b.ne            1b |         b.ne            1b | ||||||
|         ret |         ret | ||||||
| endfunc | endfunc | ||||||
|  | 
 | ||||||
|  | function ff_ac3_sum_square_butterfly_int32_neon, export=1 | ||||||
|  |         movi            v0.2d, #0 | ||||||
|  |         movi            v1.2d, #0 | ||||||
|  |         movi            v2.2d, #0 | ||||||
|  |         movi            v3.2d, #0 | ||||||
|  | 1:      ld1             {v4.2s}, [x1], #8 | ||||||
|  |         ld1             {v5.2s}, [x2], #8 | ||||||
|  |         add             v6.2s, v4.2s, v5.2s | ||||||
|  |         sub             v7.2s, v4.2s, v5.2s | ||||||
|  |         smlal           v0.2d, v4.2s, v4.2s | ||||||
|  |         smlal           v1.2d, v5.2s, v5.2s | ||||||
|  |         smlal           v2.2d, v6.2s, v6.2s | ||||||
|  |         smlal           v3.2d, v7.2s, v7.2s | ||||||
|  |         subs            w3, w3, #2 | ||||||
|  |         b.gt            1b | ||||||
|  |         addp            d0, v0.2d | ||||||
|  |         addp            d1, v1.2d | ||||||
|  |         addp            d2, v2.2d | ||||||
|  |         addp            d3, v3.2d | ||||||
|  |         st1             {v0.1d-v3.1d}, [x0] | ||||||
|  |         ret | ||||||
|  | endfunc | ||||||
|  | |||||||
| @ -139,6 +139,32 @@ static void check_float_to_fixed24(AC3DSPContext *c) { | |||||||
|     report("float_to_fixed24"); |     report("float_to_fixed24"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) { | ||||||
|  | #define ELEMS 240 | ||||||
|  |     LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]); | ||||||
|  |     LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]); | ||||||
|  |     LOCAL_ALIGNED_16(uint64_t, v1, [4]); | ||||||
|  |     LOCAL_ALIGNED_16(uint64_t, v2, [4]); | ||||||
|  | 
 | ||||||
|  |     declare_func(void, int64_t[4], const int32_t *, const int32_t *, int); | ||||||
|  | 
 | ||||||
|  |     randomize_i24(lt, ELEMS); | ||||||
|  |     randomize_i24(rt, ELEMS); | ||||||
|  | 
 | ||||||
|  |     if (check_func(c->sum_square_butterfly_int32, | ||||||
|  |                    "ac3_sum_square_bufferfly_int32")) { | ||||||
|  |         call_ref(v1, lt, rt, ELEMS); | ||||||
|  |         call_new(v2, lt, rt, ELEMS); | ||||||
|  | 
 | ||||||
|  |         if (memcmp(v1, v2, sizeof(int64_t[4])) != 0) | ||||||
|  |             fail(); | ||||||
|  | 
 | ||||||
|  |         bench_new(v2, lt, rt, ELEMS); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     report("ac3_sum_square_butterfly_int32"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void checkasm_check_ac3dsp(void) | void checkasm_check_ac3dsp(void) | ||||||
| { | { | ||||||
|     AC3DSPContext c; |     AC3DSPContext c; | ||||||
| @ -147,4 +173,5 @@ void checkasm_check_ac3dsp(void) | |||||||
|     check_ac3_exponent_min(&c); |     check_ac3_exponent_min(&c); | ||||||
|     check_ac3_extract_exponents(&c); |     check_ac3_extract_exponents(&c); | ||||||
|     check_float_to_fixed24(&c); |     check_float_to_fixed24(&c); | ||||||
|  |     check_ac3_sum_square_butterfly_int32(&c); | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user