x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32
Reviewed-by: Rostislav Pehlivanov <atomnuker@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
							parent
							
								
									41d7642a7b
								
							
						
					
					
						commit
						7a15cf42ee
					
				@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    RET
 | 
					    RET
 | 
				
			||||||
 | 
					
 | 
				
			||||||
%if ARCH_X86_64 == 1
 | 
					INIT_XMM sse4
 | 
				
			||||||
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
 | 
					; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
 | 
				
			||||||
cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h
 | 
					%if ARCH_X86_64
 | 
				
			||||||
    mov      r6, srcq
 | 
					cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2
 | 
				
			||||||
    mov      r7, dstq
 | 
					%else
 | 
				
			||||||
    mov      r8, wq
 | 
					cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2
 | 
				
			||||||
 | 
					    %define  hd  r5mp
 | 
				
			||||||
 | 
					%endif
 | 
				
			||||||
 | 
					    shl      wd, 2
 | 
				
			||||||
 | 
					    add    srcq, wq
 | 
				
			||||||
 | 
					    neg      wq
 | 
				
			||||||
 | 
					    mov     t2q, dstq
 | 
				
			||||||
 | 
					    mov     t1q, wq
 | 
				
			||||||
    pxor     m2, m2
 | 
					    pxor     m2, m2
 | 
				
			||||||
    mova     m3, [clip_10bit]
 | 
					    mova     m3, [clip_10bit]
 | 
				
			||||||
    mova     m4, [convert_to_unsigned_10bit]
 | 
					    mova     m4, [convert_to_unsigned_10bit]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    .loop_h:
 | 
					    .loop_h:
 | 
				
			||||||
    mov      srcq, r6
 | 
					    mov    dstq, t2q
 | 
				
			||||||
    mov      dstq, r7
 | 
					    mov      wq, t1q
 | 
				
			||||||
    mov      wq,   r8
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    .loop_w:
 | 
					    .loop_w:
 | 
				
			||||||
    movu     m0, [srcq+0*mmsize]
 | 
					    movu     m0, [srcq+wq+0*mmsize]
 | 
				
			||||||
    movu     m1, [srcq+1*mmsize]
 | 
					    movu     m1, [srcq+wq+1*mmsize]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    paddd    m0, m4
 | 
					    paddd    m0, m4
 | 
				
			||||||
    paddd    m1, m4
 | 
					    paddd    m1, m4
 | 
				
			||||||
@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    movu     [dstq], m0
 | 
					    movu     [dstq], m0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    add      srcq, 2*mmsize
 | 
					 | 
				
			||||||
    add      dstq, 1*mmsize
 | 
					    add      dstq, 1*mmsize
 | 
				
			||||||
    sub      wd, 8
 | 
					    add      wq,   2*mmsize
 | 
				
			||||||
    jg       .loop_w
 | 
					    jl       .loop_w
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    add      r6, src_strideq
 | 
					    add    srcq, src_strideq
 | 
				
			||||||
    add      r7, dst_strideq
 | 
					    add     t2q, dst_strideq
 | 
				
			||||||
    sub      hd, 1
 | 
					    sub      hd, 1
 | 
				
			||||||
    jg       .loop_h
 | 
					    jg       .loop_h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RET
 | 
					    RET
 | 
				
			||||||
 | 
					 | 
				
			||||||
%endif
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
 | 
				
			|||||||
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
					void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
				
			||||||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
					void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
				
			||||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
					void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 | 
				
			||||||
#if ARCH_X86_64
 | 
					 | 
				
			||||||
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
 | 
					void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
 | 
					void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (EXTERNAL_SSE4(mm_flags)) {
 | 
					    if (EXTERNAL_SSE4(mm_flags)) {
 | 
				
			||||||
        c->dequant_subband[1]         = ff_dequant_subband_32_sse4;
 | 
					        c->dequant_subband[1]         = ff_dequant_subband_32_sse4;
 | 
				
			||||||
#if ARCH_X86_64
 | 
					 | 
				
			||||||
        c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
 | 
					        c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user