x86/aacpsdsp: precompute constant factors
Inspired by the optimization done to the C version by Rémi Denis-Courmont. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
							parent
							
								
									08edacc248
								
							
						
					
					
						commit
						2bcf86d53d
					
				@ -400,29 +400,32 @@ HYBRID_SYNTHESIS_DEINT
 | 
				
			|||||||
;                                 const float (*filter)[8][2],
 | 
					;                                 const float (*filter)[8][2],
 | 
				
			||||||
;                                 ptrdiff_t stride, int n);
 | 
					;                                 ptrdiff_t stride, int n);
 | 
				
			||||||
;*******************************************************************
 | 
					;*******************************************************************
 | 
				
			||||||
%macro PS_HYBRID_ANALYSIS_LOOP 3
 | 
					%macro PS_HYBRID_ANALYSIS_IN 1
 | 
				
			||||||
    movu     %1, [inq+mmsize*%3]
 | 
					    movu     m0, [inq+mmsize*%1]
 | 
				
			||||||
    movu     m1, [inq+mmsize*(5-%3)+8]
 | 
					    movu     m1, [inq+mmsize*(5-%1)+8]
 | 
				
			||||||
%if cpuflag(sse3)
 | 
					    mova     m3, m0
 | 
				
			||||||
    pshufd   %2, %1, q2301
 | 
					 | 
				
			||||||
    pshufd   m4, m1, q0123
 | 
					 | 
				
			||||||
    pshufd   m1, m1, q1032
 | 
					 | 
				
			||||||
    pshufd   m2, [filterq+nq+mmsize*%3], q2301
 | 
					 | 
				
			||||||
    addsubps %2, m4
 | 
					 | 
				
			||||||
    addsubps %1, m1
 | 
					 | 
				
			||||||
%else
 | 
					 | 
				
			||||||
    mova     m2, [filterq+nq+mmsize*%3]
 | 
					 | 
				
			||||||
    mova     %2, %1
 | 
					 | 
				
			||||||
    mova     m4, m1
 | 
					    mova     m4, m1
 | 
				
			||||||
    shufps   %2, %2, q2301
 | 
					    shufps   m3, m3, q2301
 | 
				
			||||||
    shufps   m4, m4, q0123
 | 
					    shufps   m4, m4, q0123
 | 
				
			||||||
    shufps   m1, m1, q1032
 | 
					    shufps   m1, m1, q1032
 | 
				
			||||||
    shufps   m2, m2, q2301
 | 
					%if cpuflag(sse3)
 | 
				
			||||||
 | 
					    addsubps m3, m4
 | 
				
			||||||
 | 
					    addsubps m0, m1
 | 
				
			||||||
 | 
					%else
 | 
				
			||||||
    xorps    m4, m7
 | 
					    xorps    m4, m7
 | 
				
			||||||
    xorps    m1, m7
 | 
					    xorps    m1, m7
 | 
				
			||||||
    subps    %2, m4
 | 
					    subps    m3, m4
 | 
				
			||||||
    subps    %1, m1
 | 
					    subps    m0, m1
 | 
				
			||||||
%endif
 | 
					%endif
 | 
				
			||||||
 | 
					    mova  [rsp+mmsize*%1*2], m3
 | 
				
			||||||
 | 
					    mova  [rsp+mmsize+mmsize*%1*2], m0
 | 
				
			||||||
 | 
					%endmacro
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%macro PS_HYBRID_ANALYSIS_LOOP 3
 | 
				
			||||||
 | 
					    mova     m2, [filterq+nq+mmsize*%3]
 | 
				
			||||||
 | 
					    shufps   m2, m2, q2301
 | 
				
			||||||
 | 
					    mova     %2, [rsp+mmsize*%3*2]
 | 
				
			||||||
 | 
					    mova     %1, [rsp+mmsize+mmsize*%3*2]
 | 
				
			||||||
    mulps    %2, m2
 | 
					    mulps    %2, m2
 | 
				
			||||||
    mulps    %1, m2
 | 
					    mulps    %1, m2
 | 
				
			||||||
%if %3
 | 
					%if %3
 | 
				
			||||||
@ -432,7 +435,7 @@ HYBRID_SYNTHESIS_DEINT
 | 
				
			|||||||
%endmacro
 | 
					%endmacro
 | 
				
			||||||
 | 
					
 | 
				
			||||||
%macro PS_HYBRID_ANALYSIS 0
 | 
					%macro PS_HYBRID_ANALYSIS 0
 | 
				
			||||||
cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
 | 
					cglobal ps_hybrid_analysis, 5, 5, 8, 24 * 4, out, in, filter, stride, n
 | 
				
			||||||
%if cpuflag(sse3)
 | 
					%if cpuflag(sse3)
 | 
				
			||||||
%define MOVH movsd
 | 
					%define MOVH movsd
 | 
				
			||||||
%else
 | 
					%else
 | 
				
			||||||
@ -443,6 +446,9 @@ cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
 | 
				
			|||||||
    add filterq, nq
 | 
					    add filterq, nq
 | 
				
			||||||
    neg nq
 | 
					    neg nq
 | 
				
			||||||
    mova m7, [ps_p1m1p1m1]
 | 
					    mova m7, [ps_p1m1p1m1]
 | 
				
			||||||
 | 
					    PS_HYBRID_ANALYSIS_IN 0
 | 
				
			||||||
 | 
					    PS_HYBRID_ANALYSIS_IN 1
 | 
				
			||||||
 | 
					    PS_HYBRID_ANALYSIS_IN 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
align 16
 | 
					align 16
 | 
				
			||||||
.loop:
 | 
					.loop:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user