x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32
Reviewed-by: Rostislav Pehlivanov <atomnuker@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
							parent
							
								
									41d7642a7b
								
							
						
					
					
						commit
						7a15cf42ee
					
				@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
 | 
			
		||||
 | 
			
		||||
    RET
 | 
			
		||||
 | 
			
		||||
%if ARCH_X86_64 == 1
 | 
			
		||||
INIT_XMM sse4
 | 
			
		||||
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
 | 
			
		||||
cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h
 | 
			
		||||
    mov      r6, srcq
 | 
			
		||||
    mov      r7, dstq
 | 
			
		||||
    mov      r8, wq
 | 
			
		||||
%if ARCH_X86_64
 | 
			
		||||
cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2
 | 
			
		||||
%else
 | 
			
		||||
cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2
 | 
			
		||||
    %define  hd  r5mp
 | 
			
		||||
%endif
 | 
			
		||||
    shl      wd, 2
 | 
			
		||||
    add    srcq, wq
 | 
			
		||||
    neg      wq
 | 
			
		||||
    mov     t2q, dstq
 | 
			
		||||
    mov     t1q, wq
 | 
			
		||||
    pxor     m2, m2
 | 
			
		||||
    mova     m3, [clip_10bit]
 | 
			
		||||
    mova     m4, [convert_to_unsigned_10bit]
 | 
			
		||||
 | 
			
		||||
    .loop_h:
 | 
			
		||||
    mov      srcq, r6
 | 
			
		||||
    mov      dstq, r7
 | 
			
		||||
    mov      wq,   r8
 | 
			
		||||
    mov    dstq, t2q
 | 
			
		||||
    mov      wq, t1q
 | 
			
		||||
 | 
			
		||||
    .loop_w:
 | 
			
		||||
    movu     m0, [srcq+0*mmsize]
 | 
			
		||||
    movu     m1, [srcq+1*mmsize]
 | 
			
		||||
    movu     m0, [srcq+wq+0*mmsize]
 | 
			
		||||
    movu     m1, [srcq+wq+1*mmsize]
 | 
			
		||||
 | 
			
		||||
    paddd    m0, m4
 | 
			
		||||
    paddd    m1, m4
 | 
			
		||||
@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
 | 
			
		||||
 | 
			
		||||
    movu     [dstq], m0
 | 
			
		||||
 | 
			
		||||
    add      srcq, 2*mmsize
 | 
			
		||||
    add      dstq, 1*mmsize
 | 
			
		||||
    sub      wd, 8
 | 
			
		||||
    jg       .loop_w
 | 
			
		||||
    add      wq,   2*mmsize
 | 
			
		||||
    jl       .loop_w
 | 
			
		||||
 | 
			
		||||
    add      r6, src_strideq
 | 
			
		||||
    add      r7, dst_strideq
 | 
			
		||||
    add    srcq, src_strideq
 | 
			
		||||
    add     t2q, dst_strideq
 | 
			
		||||
    sub      hd, 1
 | 
			
		||||
    jg       .loop_h
 | 
			
		||||
 | 
			
		||||
    RET
 | 
			
		||||
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
 | 
			
		||||
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
			
		||||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
			
		||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
			
		||||
#if ARCH_X86_64
 | 
			
		||||
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
 | 
			
		||||
 | 
			
		||||
@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
 | 
			
		||||
 | 
			
		||||
    if (EXTERNAL_SSE4(mm_flags)) {
 | 
			
		||||
        c->dequant_subband[1]         = ff_dequant_subband_32_sse4;
 | 
			
		||||
#if ARCH_X86_64
 | 
			
		||||
        c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user