summaryrefslogtreecommitdiffstats
path: root/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/memcpy-ssse3.S')
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3.S71
1 files changed, 35 insertions, 36 deletions
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
index 749c82d379..c512b0e812 100644
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
@@ -128,7 +128,7 @@ ENTRY (MEMCPY)
jb L(copy_forward)
je L(fwd_write_0bytes)
cmp $32, %ecx
- jge L(memmove_bwd)
+ jae L(memmove_bwd)
jmp L(bk_write_less32bytes_2)
L(memmove_bwd):
add %ecx, %eax
@@ -139,12 +139,12 @@ L(memmove_bwd):
L(copy_forward):
#endif
cmp $48, %ecx
- jge L(48bytesormore)
+ jae L(48bytesormore)
L(fwd_write_less32bytes):
#ifndef USE_AS_MEMMOVE
cmp %dl, %al
- jl L(bk_write)
+ jb L(bk_write)
#endif
add %ecx, %edx
add %ecx, %eax
@@ -181,7 +181,7 @@ L(48bytesormore):
#endif
mov %eax, %edi
- jge L(large_page)
+ jae L(large_page)
and $0xf, %edi
jz L(shl_0)
@@ -202,7 +202,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -210,7 +210,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -218,7 +218,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -250,7 +250,7 @@ L(shl_0_gobble):
POP (%edi)
lea -128(%ecx), %ecx
- jge L(shl_0_gobble_mem_loop)
+ jae L(shl_0_gobble_mem_loop)
L(shl_0_gobble_cache_loop):
movdqa (%eax), %xmm0
movdqa 0x10(%eax), %xmm1
@@ -272,8 +272,7 @@ L(shl_0_gobble_cache_loop):
movdqa %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_cache_loop)
-L(shl_0_gobble_cache_loop_tail):
+ jae L(shl_0_gobble_cache_loop)
cmp $-0x40, %ecx
lea 0x80(%ecx), %ecx
jl L(shl_0_cache_less_64bytes)
@@ -294,7 +293,7 @@ L(shl_0_gobble_cache_loop_tail):
add $0x40, %edx
L(shl_0_cache_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_cache_less_32bytes)
+ jb L(shl_0_cache_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -304,7 +303,7 @@ L(shl_0_cache_less_64bytes):
add $0x20, %edx
L(shl_0_cache_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_cache_less_16bytes)
+ jb L(shl_0_cache_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -342,7 +341,7 @@ L(shl_0_gobble_mem_loop):
movdqa %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_mem_loop)
+ jae L(shl_0_gobble_mem_loop)
cmp $-0x40, %ecx
lea 0x80(%ecx), %ecx
jl L(shl_0_mem_less_64bytes)
@@ -363,7 +362,7 @@ L(shl_0_gobble_mem_loop):
add $0x40, %edx
L(shl_0_mem_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_mem_less_32bytes)
+ jb L(shl_0_mem_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -373,7 +372,7 @@ L(shl_0_mem_less_64bytes):
add $0x20, %edx
L(shl_0_mem_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_mem_less_16bytes)
+ jb L(shl_0_mem_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -406,7 +405,7 @@ L(shl_1_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_1_end)
+ jb L(shl_1_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -449,7 +448,7 @@ L(shl_2_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_2_end)
+ jb L(shl_2_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -492,7 +491,7 @@ L(shl_3_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_3_end)
+ jb L(shl_3_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -535,7 +534,7 @@ L(shl_4_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_4_end)
+ jb L(shl_4_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -578,7 +577,7 @@ L(shl_5_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_5_end)
+ jb L(shl_5_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -622,7 +621,7 @@ L(shl_6_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_6_end)
+ jb L(shl_6_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -665,7 +664,7 @@ L(shl_7_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_7_end)
+ jb L(shl_7_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -708,7 +707,7 @@ L(shl_8_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_8_end)
+ jb L(shl_8_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -751,7 +750,7 @@ L(shl_9_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_9_end)
+ jb L(shl_9_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -794,7 +793,7 @@ L(shl_10_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_10_end)
+ jb L(shl_10_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -837,7 +836,7 @@ L(shl_11_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_11_end)
+ jb L(shl_11_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -880,7 +879,7 @@ L(shl_12_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_12_end)
+ jb L(shl_12_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -923,7 +922,7 @@ L(shl_13_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_13_end)
+ jb L(shl_13_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -966,7 +965,7 @@ L(shl_14_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_14_end)
+ jb L(shl_14_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -1010,7 +1009,7 @@ L(shl_15_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_15_end)
+ jb L(shl_15_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -1281,7 +1280,7 @@ L(large_page_loop):
sub $0x40, %ecx
L(large_page_less_64bytes):
cmp $32, %ecx
- jl L(large_page_less_32bytes)
+ jb L(large_page_less_32bytes)
movdqu (%eax), %xmm0
movdqu 0x10(%eax), %xmm1
lea 0x20(%eax), %eax
@@ -1617,11 +1616,11 @@ L(copy_backward):
L(bk_aligned_4):
cmp $64, %ecx
- jge L(bk_write_more64bytes)
+ jae L(bk_write_more64bytes)
L(bk_write_64bytesless):
cmp $32, %ecx
- jl L(bk_write_less32bytes)
+ jb L(bk_write_less32bytes)
L(bk_write_more32bytes):
/* Copy 32 bytes at a time. */
@@ -1656,7 +1655,7 @@ L(bk_write_less32bytes_2):
ALIGN (4)
L(bk_align):
cmp $8, %ecx
- jle L(bk_write_less32bytes)
+ jbe L(bk_write_less32bytes)
testl $1, %edx
/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
then (EDX & 2) must be != 0. */
@@ -1712,7 +1711,7 @@ L(bk_ssse3_align):
L(bk_ssse3_cpy_pre):
cmp $64, %ecx
- jl L(bk_write_more32bytes)
+ jb L(bk_write_more32bytes)
L(bk_ssse3_cpy):
sub $64, %esi
@@ -1727,7 +1726,7 @@ L(bk_ssse3_cpy):
movdqu (%esi), %xmm0
movdqa %xmm0, (%edx)
cmp $64, %ecx
- jge L(bk_ssse3_cpy)
+ jae L(bk_ssse3_cpy)
jmp L(bk_write_64bytesless)
#endif