diff options
Diffstat (limited to 'tools/arch/x86/lib/memset_64.S')
-rw-r--r-- | tools/arch/x86/lib/memset_64.S | 66 |
1 files changed, 23 insertions, 43 deletions
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fd5d25a474b7..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -1,11 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ +#include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> -.weak memset +.section .noinstr.text, "ax" /* * ISO C memset - set a memory block to a byte value. This function uses fast @@ -17,55 +19,33 @@ * rdx count (bytes) * * rax original destination - */ -SYM_FUNC_START_ALIAS(memset) -SYM_FUNC_START(__memset) - /* - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended - * to use it when possible. If not available, use fast string instructions. - * - * Otherwise, use original memset function. - */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS - - movq %rdi,%r9 - movq %rdx,%rcx - andl $7,%edx - shrq $3,%rcx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - imulq %rsi,%rax - rep stosq - movl %edx,%ecx - rep stosb - movq %r9,%rax - ret -SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) - -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. * - * rdi destination - * rsi value (char) - * rdx count (bytes) + * The FSRS alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. * - * rax original destination + * The 'rep stosb' itself is small enough to replace the call, but all + * the register moves blow up the code. And two of them are "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(memset_erms) +SYM_TYPED_FUNC_START(__memset) + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS + movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax - ret -SYM_FUNC_END(memset_erms) + RET +SYM_FUNC_END(__memset) +EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) +EXPORT_SYMBOL(memset) -SYM_FUNC_START(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ @@ -124,7 +104,7 @@ SYM_FUNC_START(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx |