x86util: import MOVHL macro
Originally committed to x264 in 1637239a by Henrik Gramner who has
agreed to re-license it as LGPL.  Original commit message follows.
    x86: Avoid some bypass delays and false dependencies
    A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning
    between int and float domains, so try to avoid that if possible.
			
			
This commit is contained in:
		
							parent
							
								
									e18bc2114f
								
							
						
					
					
						commit
						7627df15d4
					
				@ -876,3 +876,15 @@
 | 
			
		||||
    psrlq   %1, 8*(%2)
 | 
			
		||||
%endif
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro MOVHL 2 ; dst, src
 | 
			
		||||
%ifidn %1, %2
 | 
			
		||||
    punpckhqdq %1, %2
 | 
			
		||||
%elif cpuflag(avx)
 | 
			
		||||
    punpckhqdq %1, %2, %2
 | 
			
		||||
%elif cpuflag(sse4)
 | 
			
		||||
    pshufd     %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use it on more modern ones
 | 
			
		||||
%else
 | 
			
		||||
    movhlps    %1, %2        ; may cause an int/float domain transition and has a dependency on dst
 | 
			
		||||
%endif
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user