x86: bswapdsp: Don't treat 32-bit integers as 64-bit
The upper halves are not guaranteed to be zero in x86-64. Also use `test` instead of `and` when the result isn't used for anything other than as a branch condition, this allows some register moves to be eliminated. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
parent
f5ee23004d
commit
a344e5d094
@ -28,8 +28,8 @@ SECTION_TEXT
|
|||||||
|
|
||||||
; %1 = aligned/unaligned
|
; %1 = aligned/unaligned
|
||||||
%macro BSWAP_LOOPS 1
|
%macro BSWAP_LOOPS 1
|
||||||
mov r3, r2
|
mov r3d, r2d
|
||||||
sar r2, 3
|
sar r2d, 3
|
||||||
jz .left4_%1
|
jz .left4_%1
|
||||||
.loop8_%1:
|
.loop8_%1:
|
||||||
mov%1 m0, [r1 + 0]
|
mov%1 m0, [r1 + 0]
|
||||||
@ -57,11 +57,11 @@ SECTION_TEXT
|
|||||||
%endif
|
%endif
|
||||||
add r0, 32
|
add r0, 32
|
||||||
add r1, 32
|
add r1, 32
|
||||||
dec r2
|
dec r2d
|
||||||
jnz .loop8_%1
|
jnz .loop8_%1
|
||||||
.left4_%1:
|
.left4_%1:
|
||||||
mov r2, r3
|
mov r2d, r3d
|
||||||
and r3, 4
|
test r3d, 4
|
||||||
jz .left
|
jz .left
|
||||||
mov%1 m0, [r1]
|
mov%1 m0, [r1]
|
||||||
%if cpuflag(ssse3)
|
%if cpuflag(ssse3)
|
||||||
@ -84,13 +84,11 @@ SECTION_TEXT
|
|||||||
%macro BSWAP32_BUF 0
|
%macro BSWAP32_BUF 0
|
||||||
%if cpuflag(ssse3)
|
%if cpuflag(ssse3)
|
||||||
cglobal bswap32_buf, 3,4,3
|
cglobal bswap32_buf, 3,4,3
|
||||||
mov r3, r1
|
|
||||||
mova m2, [pb_bswap32]
|
mova m2, [pb_bswap32]
|
||||||
%else
|
%else
|
||||||
cglobal bswap32_buf, 3,4,5
|
cglobal bswap32_buf, 3,4,5
|
||||||
mov r3, r1
|
|
||||||
%endif
|
%endif
|
||||||
and r3, 15
|
test r1, 15
|
||||||
jz .start_align
|
jz .start_align
|
||||||
BSWAP_LOOPS u
|
BSWAP_LOOPS u
|
||||||
jmp .left
|
jmp .left
|
||||||
@ -98,8 +96,7 @@ cglobal bswap32_buf, 3,4,5
|
|||||||
BSWAP_LOOPS a
|
BSWAP_LOOPS a
|
||||||
.left:
|
.left:
|
||||||
%if cpuflag(ssse3)
|
%if cpuflag(ssse3)
|
||||||
mov r3, r2
|
test r2d, 2
|
||||||
and r2, 2
|
|
||||||
jz .left1
|
jz .left1
|
||||||
movq m0, [r1]
|
movq m0, [r1]
|
||||||
pshufb m0, m2
|
pshufb m0, m2
|
||||||
@ -107,13 +104,13 @@ cglobal bswap32_buf, 3,4,5
|
|||||||
add r1, 8
|
add r1, 8
|
||||||
add r0, 8
|
add r0, 8
|
||||||
.left1:
|
.left1:
|
||||||
and r3, 1
|
test r2d, 1
|
||||||
jz .end
|
jz .end
|
||||||
mov r2d, [r1]
|
mov r2d, [r1]
|
||||||
bswap r2d
|
bswap r2d
|
||||||
mov [r0], r2d
|
mov [r0], r2d
|
||||||
%else
|
%else
|
||||||
and r2, 3
|
and r2d, 3
|
||||||
jz .end
|
jz .end
|
||||||
.loop2:
|
.loop2:
|
||||||
mov r3d, [r1]
|
mov r3d, [r1]
|
||||||
@ -121,7 +118,7 @@ cglobal bswap32_buf, 3,4,5
|
|||||||
mov [r0], r3d
|
mov [r0], r3d
|
||||||
add r1, 4
|
add r1, 4
|
||||||
add r0, 4
|
add r0, 4
|
||||||
dec r2
|
dec r2d
|
||||||
jnz .loop2
|
jnz .loop2
|
||||||
%endif
|
%endif
|
||||||
.end:
|
.end:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user