aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/atomic64_386_32.S86
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S16
-rw-r--r--arch/x86/lib/checksum_32.S27
-rw-r--r--arch/x86/lib/clear_page_64.S144
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S4
-rw-r--r--arch/x86/lib/copy_mc_64.S18
-rw-r--r--arch/x86/lib/copy_page_64.S4
-rw-r--r--arch/x86/lib/copy_user_64.S138
-rw-r--r--arch/x86/lib/csum-copy_64.S2
-rw-r--r--arch/x86/lib/csum-partial_64.c165
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/error-inject.c5
-rw-r--r--arch/x86/lib/getuser.S22
-rw-r--r--arch/x86/lib/hweight.S6
-rw-r--r--arch/x86/lib/insn-eval.c185
-rw-r--r--arch/x86/lib/iomap_copy_64.S2
-rw-r--r--arch/x86/lib/iomem.c70
-rw-r--r--arch/x86/lib/kaslr.c2
-rw-r--r--arch/x86/lib/memcpy_32.c5
-rw-r--r--arch/x86/lib/memcpy_64.S23
-rw-r--r--arch/x86/lib/memmove_64.S13
-rw-r--r--arch/x86/lib/memset_64.S12
-rw-r--r--arch/x86/lib/mmx_32.c388
-rw-r--r--arch/x86/lib/msr-reg.S4
-rw-r--r--arch/x86/lib/putuser.S10
-rw-r--r--arch/x86/lib/retpoline.S82
-rw-r--r--arch/x86/lib/usercopy.c7
-rw-r--r--arch/x86/lib/usercopy_32.c67
-rw-r--r--arch/x86/lib/usercopy_64.c44
-rw-r--r--arch/x86/lib/x86-opcode-map.txt111
33 files changed, 849 insertions, 826 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index c6506c6a7092..7ba5f61d7273 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -63,10 +63,11 @@ ifeq ($(CONFIG_X86_32),y)
ifneq ($(CONFIG_X86_CMPXCHG64),y)
lib-y += cmpxchg8b_emu.o atomic64_386_32.o
endif
- lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += iomap_copy_64.o
+ifneq ($(CONFIG_GENERIC_CSUM),y)
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
+endif
lib-y += clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 16bc9130e7a5..e768815e58ae 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -9,81 +9,83 @@
#include <asm/alternative.h>
/* if you want SMP support, implement these with real spinlocks */
-.macro LOCK reg
+.macro IRQ_SAVE reg
pushfl
cli
.endm
-.macro UNLOCK reg
+.macro IRQ_RESTORE reg
popfl
.endm
-#define BEGIN(op) \
+#define BEGIN_IRQ_SAVE(op) \
.macro endp; \
SYM_FUNC_END(atomic64_##op##_386); \
.purgem endp; \
.endm; \
SYM_FUNC_START(atomic64_##op##_386); \
- LOCK v;
+ IRQ_SAVE v;
#define ENDP endp
-#define RET \
- UNLOCK v; \
- ret
-
-#define RET_ENDP \
- RET; \
- ENDP
+#define RET_IRQ_RESTORE \
+ IRQ_RESTORE v; \
+ RET
#define v %ecx
-BEGIN(read)
+BEGIN_IRQ_SAVE(read)
movl (v), %eax
movl 4(v), %edx
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(set)
+BEGIN_IRQ_SAVE(set)
movl %ebx, (v)
movl %ecx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(xchg)
+BEGIN_IRQ_SAVE(xchg)
movl (v), %eax
movl 4(v), %edx
movl %ebx, (v)
movl %ecx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(add)
+BEGIN_IRQ_SAVE(add)
addl %eax, (v)
adcl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(add_return)
+BEGIN_IRQ_SAVE(add_return)
addl (v), %eax
adcl 4(v), %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(sub)
+BEGIN_IRQ_SAVE(sub)
subl %eax, (v)
sbbl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(sub_return)
+BEGIN_IRQ_SAVE(sub_return)
negl %edx
negl %eax
sbbl $0, %edx
@@ -91,47 +93,52 @@ BEGIN(sub_return)
adcl 4(v), %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(inc)
+BEGIN_IRQ_SAVE(inc)
addl $1, (v)
adcl $0, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(inc_return)
+BEGIN_IRQ_SAVE(inc_return)
movl (v), %eax
movl 4(v), %edx
addl $1, %eax
adcl $0, %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(dec)
+BEGIN_IRQ_SAVE(dec)
subl $1, (v)
sbbl $0, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(dec_return)
+BEGIN_IRQ_SAVE(dec_return)
movl (v), %eax
movl 4(v), %edx
subl $1, %eax
sbbl $0, %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(add_unless)
+BEGIN_IRQ_SAVE(add_unless)
addl %eax, %ecx
adcl %edx, %edi
addl (v), %eax
@@ -143,7 +150,7 @@ BEGIN(add_unless)
movl %edx, 4(v)
movl $1, %eax
2:
- RET
+ RET_IRQ_RESTORE
3:
cmpl %edx, %edi
jne 1b
@@ -153,7 +160,7 @@ ENDP
#undef v
#define v %esi
-BEGIN(inc_not_zero)
+BEGIN_IRQ_SAVE(inc_not_zero)
movl (v), %eax
movl 4(v), %edx
testl %eax, %eax
@@ -165,7 +172,7 @@ BEGIN(inc_not_zero)
movl %edx, 4(v)
movl $1, %eax
2:
- RET
+ RET_IRQ_RESTORE
3:
testl %edx, %edx
jne 1b
@@ -174,7 +181,7 @@ ENDP
#undef v
#define v %esi
-BEGIN(dec_if_positive)
+BEGIN_IRQ_SAVE(dec_if_positive)
movl (v), %eax
movl 4(v), %edx
subl $1, %eax
@@ -183,5 +190,6 @@ BEGIN(dec_if_positive)
movl %eax, (v)
movl %edx, 4(v)
1:
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index ce6935690766..90afb488b396 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -18,7 +18,7 @@
SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
- ret
+ RET
SYM_FUNC_END(atomic64_read_cx8)
SYM_FUNC_START(atomic64_set_cx8)
@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8)
cmpxchg8b (%esi)
jne 1b
- ret
+ RET
SYM_FUNC_END(atomic64_set_cx8)
SYM_FUNC_START(atomic64_xchg_cx8)
@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8)
cmpxchg8b (%esi)
jne 1b
- ret
+ RET
SYM_FUNC_END(atomic64_xchg_cx8)
.macro addsub_return func ins insc
@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
popl %esi
popl %ebx
popl %ebp
- ret
+ RET
SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_cx8)
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_dec_if_positive_cx8)
SYM_FUNC_START(atomic64_add_unless_cx8)
@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8)
addl $8, %esp
popl %ebx
popl %ebp
- ret
+ RET
4:
cmpl %edx, 4(%esp)
jne 2b
@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4304320e51f4..23318c338db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial)
8:
popl %ebx
popl %esi
- ret
+ RET
SYM_FUNC_END(csum_partial)
#else
@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial)
90:
popl %ebx
popl %esi
- ret
+ RET
SYM_FUNC_END(csum_partial)
#endif
@@ -260,9 +260,9 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
* Copy from ds while checksumming, otherwise like csum_partial
*/
-#define EXC(y...) \
- 9999: y; \
- _ASM_EXTABLE_UA(9999b, 6001f)
+#define EXC(y...) \
+ 9999: y; \
+ _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
@@ -358,20 +358,11 @@ EXC( movb %cl, (%edi) )
adcl $0, %eax
7:
-# Exception handler:
-.section .fixup, "ax"
-
-6001:
- xorl %eax, %eax
- jmp 7b
-
-.previous
-
popl %ebx
popl %esi
popl %edi
popl %ecx # equivalent to addl $4,%esp
- ret
+ RET
SYM_FUNC_END(csum_partial_copy_generic)
#else
@@ -439,15 +430,11 @@ EXC( movb %dl, (%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
-.section .fixup, "ax"
-6001: xorl %eax, %eax
- jmp 7b
-.previous
popl %esi
popl %edi
popl %ebx
- ret
+ RET
SYM_FUNC_END(csum_partial_copy_generic)
#undef ROUND
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index c4c7dd115953..ecbfb4dd3b01 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
+#include <asm/asm.h>
#include <asm/export.h>
/*
@@ -17,7 +18,7 @@ SYM_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
- ret
+ RET
SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
@@ -39,7 +40,7 @@ SYM_FUNC_START(clear_page_orig)
leaq 64(%rdi),%rdi
jnz .Lloop
nop
- ret
+ RET
SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
@@ -47,6 +48,143 @@ SYM_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
- ret
+ RET
SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
+
+/*
+ * Default clear user-space.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rcx: uncleared bytes or 0 if successful.
+ */
+SYM_FUNC_START(clear_user_original)
+ /*
+ * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
+ * i.e., no need for a 'q' suffix and thus a REX prefix.
+ */
+ mov %ecx,%eax
+ shr $3,%rcx
+ jz .Lrest_bytes
+
+ # do the qwords first
+ .p2align 4
+.Lqwords:
+ movq $0,(%rdi)
+ lea 8(%rdi),%rdi
+ dec %rcx
+ jnz .Lqwords
+
+.Lrest_bytes:
+ and $7, %eax
+ jz .Lexit
+
+ # now do the rest bytes
+.Lbytes:
+ movb $0,(%rdi)
+ inc %rdi
+ dec %eax
+ jnz .Lbytes
+
+.Lexit:
+ /*
+ * %rax still needs to be cleared in the exception case because this function is called
+ * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
+ * in case it might reuse it somewhere.
+ */
+ xor %eax,%eax
+ RET
+
+.Lqwords_exception:
+ # convert remaining qwords back into bytes to return to caller
+ shl $3, %rcx
+ and $7, %eax
+ add %rax,%rcx
+ jmp .Lexit
+
+.Lbytes_exception:
+ mov %eax,%ecx
+ jmp .Lexit
+
+ _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
+ _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
+SYM_FUNC_END(clear_user_original)
+EXPORT_SYMBOL(clear_user_original)
+
+/*
+ * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
+ * present.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rcx: uncleared bytes or 0 if successful.
+ */
+SYM_FUNC_START(clear_user_rep_good)
+ # call the original thing for less than a cacheline
+ cmp $64, %rcx
+ jb clear_user_original
+
+.Lprep:
+ # copy lower 32-bits for rest bytes
+ mov %ecx, %edx
+ shr $3, %rcx
+ jz .Lrep_good_rest_bytes
+
+.Lrep_good_qwords:
+ rep stosq
+
+.Lrep_good_rest_bytes:
+ and $7, %edx
+ jz .Lrep_good_exit
+
+.Lrep_good_bytes:
+ mov %edx, %ecx
+ rep stosb
+
+.Lrep_good_exit:
+ # see .Lexit comment above
+ xor %eax, %eax
+ RET
+
+.Lrep_good_qwords_exception:
+ # convert remaining qwords back into bytes to return to caller
+ shl $3, %rcx
+ and $7, %edx
+ add %rdx, %rcx
+ jmp .Lrep_good_exit
+
+ _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
+ _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
+SYM_FUNC_END(clear_user_rep_good)
+EXPORT_SYMBOL(clear_user_rep_good)
+
+/*
+ * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rcx: uncleared bytes or 0 if successful.
+ *
+ */
+SYM_FUNC_START(clear_user_erms)
+ # call the original thing for less than a cacheline
+ cmp $64, %rcx
+ jb clear_user_original
+
+.Lerms_bytes:
+ rep stosb
+
+.Lerms_exit:
+ xorl %eax,%eax
+ RET
+
+ _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
+SYM_FUNC_END(clear_user_erms)
+EXPORT_SYMBOL(clear_user_erms)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 3542502faa3b..33c70c0160ea 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
popfq
mov $1, %al
- ret
+ RET
.Lnot_same:
popfq
xor %al,%al
- ret
+ RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index ca01ed6029f4..6a912d58fecc 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
movl %ecx, 4(%esi)
popfl
- ret
+ RET
.Lnot_same:
movl (%esi), %eax
@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
movl 4(%esi), %edx
popfl
- ret
+ RET
SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index 7334055157ba..c859a8a09860 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -77,10 +77,8 @@ SYM_FUNC_START(copy_mc_fragile)
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
- ret
-SYM_FUNC_END(copy_mc_fragile)
+ RET
- .section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
@@ -105,14 +103,14 @@ SYM_FUNC_END(copy_mc_fragile)
movl %ecx, %edx
jmp copy_mc_fragile_handle_tail
- .previous
-
_ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+
+SYM_FUNC_END(copy_mc_fragile)
#endif /* CONFIG_X86_MCE */
/*
@@ -132,10 +130,8 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string)
rep movsb
/* Copy successful. Return zero */
xorl %eax, %eax
- ret
-SYM_FUNC_END(copy_mc_enhanced_fast_string)
+ RET
- .section .fixup, "ax"
.E_copy:
/*
* On fault %rcx is updated such that the copy instruction could
@@ -145,9 +141,9 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string)
* user-copy routines.
*/
movq %rcx, %rax
- ret
-
- .previous
+ RET
_ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE)
+
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index db4b4f9197c7..30ea644bf446 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
- ret
+ RET
SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs)
movq (%rsp), %rbx
movq 1*8(%rsp), %r12
addq $2*8, %rsp
- ret
+ RET
SYM_FUNC_END(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 2797e630b9b1..9dec1b38a98f 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -32,14 +32,10 @@
decl %ecx
jnz 100b
102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp .Lcopy_user_handle_tail
- .previous
- _ASM_EXTABLE_CPY(100b, 103b)
- _ASM_EXTABLE_CPY(101b, 103b)
- .endm
+ _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
+ _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
+.endm
/*
* copy_user_generic_unrolled - memory copy with exception handling.
@@ -57,12 +53,12 @@
SYM_FUNC_START(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
- jb 20f /* less then 8 bytes, go to byte copy loop */
+ jb .Lcopy_user_short_string_bytes
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz .L_copy_short_string
+ jz copy_user_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -83,39 +79,11 @@ SYM_FUNC_START(copy_user_generic_unrolled)
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
-.L_copy_short_string:
- movl %edx,%ecx
- andl $7,%edx
- shrl $3,%ecx
- jz 20f
-18: movq (%rsi),%r8
-19: movq %r8,(%rdi)
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
- decl %ecx
- jnz 18b
-20: andl %edx,%edx
- jz 23f
- movl %edx,%ecx
-21: movb (%rsi),%al
-22: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 21b
-23: xor %eax,%eax
- ASM_CLAC
- ret
+ jmp copy_user_short_string
- .section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
- jmp 60f
-40: leal (%rdx,%rcx,8),%edx
- jmp 60f
-50: movl %ecx,%edx
-60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
- .previous
+ jmp .Lcopy_user_handle_tail
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
@@ -133,10 +101,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
_ASM_EXTABLE_CPY(14b, 30b)
_ASM_EXTABLE_CPY(15b, 30b)
_ASM_EXTABLE_CPY(16b, 30b)
- _ASM_EXTABLE_CPY(18b, 40b)
- _ASM_EXTABLE_CPY(19b, 40b)
- _ASM_EXTABLE_CPY(21b, 50b)
- _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -166,20 +130,16 @@ SYM_FUNC_START(copy_user_generic_string)
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
-1: rep
- movsq
+1: rep movsq
2: movl %edx,%ecx
-3: rep
- movsb
+3: rep movsb
xorl %eax,%eax
ASM_CLAC
- ret
+ RET
- .section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, 11b)
_ASM_EXTABLE_CPY(3b, 12b)
@@ -200,19 +160,16 @@ EXPORT_SYMBOL(copy_user_generic_string)
*/
SYM_FUNC_START(copy_user_enhanced_fast_string)
ASM_STAC
- cmpl $64,%edx
- jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
+ /* CPUs without FSRM should avoid rep movsb for short copies */
+ ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
movl %edx,%ecx
-1: rep
- movsb
+1: rep movsb
xorl %eax,%eax
ASM_CLAC
- ret
+ RET
- .section .fixup,"ax"
12: movl %ecx,%edx /* ecx is zerorest also */
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
@@ -225,6 +182,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Don't try to copy the tail if machine check happened
*
* Input:
+ * eax trap number written by ex_handler_copy()
* rdi destination
* rsi source
* rdx count
@@ -233,16 +191,76 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* eax uncopied bytes or 0 if successful.
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
+ cmp $X86_TRAP_MC,%eax
+ je 3f
+
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
- ret
+ RET
+
+3:
+ movl %edx,%eax
+ ASM_CLAC
+ RET
_ASM_EXTABLE_CPY(1b, 2b)
+
+.Lcopy_user_handle_align:
+ addl %ecx,%edx /* ecx is zerorest also */
+ jmp .Lcopy_user_handle_tail
+
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
+ * Finish memcpy of less than 64 bytes. #AC should already be set.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count (< 64)
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+SYM_CODE_START_LOCAL(copy_user_short_string)
+ movl %edx,%ecx
+ andl $7,%edx
+ shrl $3,%ecx
+ jz .Lcopy_user_short_string_bytes
+18: movq (%rsi),%r8
+19: movq %r8,(%rdi)
+ leaq 8(%rsi),%rsi
+ leaq 8(%rdi),%rdi
+ decl %ecx
+ jnz 18b
+.Lcopy_user_short_string_bytes:
+ andl %edx,%edx
+ jz 23f
+ movl %edx,%ecx
+21: movb (%rsi),%al
+22: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 21b
+23: xor %eax,%eax
+ ASM_CLAC
+ RET
+
+40: leal (%rdx,%rcx,8),%edx
+ jmp 60f
+50: movl %ecx,%edx /* ecx is zerorest also */
+60: jmp .Lcopy_user_handle_tail
+
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
+SYM_CODE_END(copy_user_short_string)
+
+/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
@@ -348,9 +366,8 @@ SYM_FUNC_START(__copy_user_nocache)
xorl %eax,%eax
ASM_CLAC
sfence
- ret
+ RET
- .section .fixup,"ax"
.L_fixup_4x8b_copy:
shll $6,%ecx
addl %ecx,%edx
@@ -366,7 +383,6 @@ SYM_FUNC_START(__copy_user_nocache)
.L_fixup_handle_tail:
sfence
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 1fbd8ee9642d..d9e16a2cf285 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
movq 3*8(%rsp), %r13
movq 4*8(%rsp), %r15
addq $5*8, %rsp
- ret
+ RET
.Lshort:
movl %ecx, %r10d
jmp .L1
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index e7925d668b68..50734a23034c 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -9,6 +9,7 @@
#include <linux/compiler.h>
#include <linux/export.h>
#include <asm/checksum.h>
+#include <asm/word-at-a-time.h>
static inline unsigned short from32to16(unsigned a)
{
@@ -21,120 +22,93 @@ static inline unsigned short from32to16(unsigned a)
}
/*
- * Do a 64-bit checksum on an arbitrary memory area.
+ * Do a checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
* This isn't as time critical as it used to be because many NICs
* do hardware checksumming these days.
- *
- * Things tried and found to not make it faster:
- * Manual Prefetching
- * Unrolling to an 128 bytes inner loop.
- * Using interleaving with more registers to break the carry chains.
+ *
+ * Still, with CHECKSUM_COMPLETE this is called to compute
+ * checksums on IPv6 headers (40 bytes) and other small parts.
+ * it's best to have buff aligned on a 64-bit boundary
*/
-static unsigned do_csum(const unsigned char *buff, unsigned len)
+__wsum csum_partial(const void *buff, int len, __wsum sum)
{
- unsigned odd, count;
- unsigned long result = 0;
+ u64 temp64 = (__force u64)sum;
+ unsigned odd, result;
- if (unlikely(len == 0))
- return result;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
- result = *buff << 8;
+ if (unlikely(len == 0))
+ return sum;
+ temp64 = ror32((__force u32)sum, 8);
+ temp64 += (*(unsigned char *)buff << 8);
len--;
buff++;
}
- count = len >> 1; /* nr of 16-bit words.. */
- if (count) {
- if (2 & (unsigned long) buff) {
- result += *(unsigned short *)buff;
- count--;
- len -= 2;
- buff += 2;
- }
- count >>= 1; /* nr of 32-bit words.. */
- if (count) {
- unsigned long zero;
- unsigned count64;
- if (4 & (unsigned long) buff) {
- result += *(unsigned int *) buff;
- count--;
- len -= 4;
- buff += 4;
- }
- count >>= 1; /* nr of 64-bit words.. */
- /* main loop using 64byte blocks */
- zero = 0;
- count64 = count >> 3;
- while (count64) {
- asm("addq 0*8(%[src]),%[res]\n\t"
- "adcq 1*8(%[src]),%[res]\n\t"
- "adcq 2*8(%[src]),%[res]\n\t"
- "adcq 3*8(%[src]),%[res]\n\t"
- "adcq 4*8(%[src]),%[res]\n\t"
- "adcq 5*8(%[src]),%[res]\n\t"
- "adcq 6*8(%[src]),%[res]\n\t"
- "adcq 7*8(%[src]),%[res]\n\t"
- "adcq %[zero],%[res]"
- : [res] "=r" (result)
- : [src] "r" (buff), [zero] "r" (zero),
- "[res]" (result));
- buff += 64;
- count64--;
- }
+ while (unlikely(len >= 64)) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq 4*8(%[src]),%[res]\n\t"
+ "adcq 5*8(%[src]),%[res]\n\t"
+ "adcq 6*8(%[src]),%[res]\n\t"
+ "adcq 7*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 64;
+ len -= 64;
+ }
+
+ if (len & 32) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 32;
+ }
+ if (len & 16) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 16;
+ }
+ if (len & 8) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 8;
+ }
+ if (len & 7) {
+ unsigned int shift = (8 - (len & 7)) * 8;
+ unsigned long trail;
- /* last up to 7 8byte blocks */
- count %= 8;
- while (count) {
- asm("addq %1,%0\n\t"
- "adcq %2,%0\n"
- : "=r" (result)
- : "m" (*(unsigned long *)buff),
- "r" (zero), "0" (result));
- --count;
- buff += 8;
- }
- result = add32_with_carry(result>>32,
- result&0xffffffff);
+ trail = (load_unaligned_zeropad(buff) << shift) >> shift;
- if (len & 4) {
- result += *(unsigned int *) buff;
- buff += 4;
- }
- }
- if (len & 2) {
- result += *(unsigned short *) buff;
- buff += 2;
- }
+ asm("addq %[trail],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [trail] "r" (trail));
}
- if (len & 1)
- result += *buff;
- result = add32_with_carry(result>>32, result & 0xffffffff);
- if (unlikely(odd)) {
+ result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
+ if (unlikely(odd)) {
result = from32to16(result);
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
}
- return result;
-}
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 64-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
- return (__force __wsum)add32_with_carry(do_csum(buff, len),
- (__force u32)sum);
+ return (__force __wsum)result;
}
EXPORT_SYMBOL(csum_partial);
@@ -147,4 +121,3 @@ __sum16 ip_compute_csum(const void *buff, int len)
return csum_fold(csum_partial(buff,len,0));
}
EXPORT_SYMBOL(ip_compute_csum);
-
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 189344924a2b..145f9a0bde29 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -32,7 +32,6 @@ csum_and_copy_from_user(const void __user *src, void *dst, int len)
user_access_end();
return sum;
}
-EXPORT_SYMBOL(csum_and_copy_from_user);
/**
* csum_and_copy_to_user - Copy and checksum to user space.
@@ -57,7 +56,6 @@ csum_and_copy_to_user(const void *src, void __user *dst, int len)
user_access_end();
return sum;
}
-EXPORT_SYMBOL(csum_and_copy_to_user);
/**
* csum_partial_copy_nocheck - Copy and checksum.
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 65d15df6212d..0e65d00e2339 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -54,8 +54,8 @@ static void delay_loop(u64 __loops)
" jnz 2b \n"
"3: dec %0 \n"
- : /* we don't need output */
- :"a" (loops)
+ : "+a" (loops)
+ :
);
}
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index be5b5fb1598b..1e3de0769b81 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/linkage.h>
#include <linux/error-injection.h>
#include <linux/kprobes.h>
+#include <linux/objtool.h>
asmlinkage void just_return_func(void);
@@ -10,7 +12,8 @@ asm(
".type just_return_func, @function\n"
".globl just_return_func\n"
"just_return_func:\n"
- " ret\n"
+ ANNOTATE_NOENDBR
+ ASM_RET
".size just_return_func, .-just_return_func\n"
);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index fa1bc2104b32..b70d98d79a9d 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1)
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2)
2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4)
3: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8)
4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
#else
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8)
5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
#endif
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
6: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
7: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
8: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8)
#endif
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_8)
EXPORT_SYMBOL(__get_user_nocheck_8)
@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
- ret
+ RET
SYM_CODE_END(.Lbad_get_user_clac)
#ifdef CONFIG_X86_32
@@ -179,7 +179,7 @@ bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
- ret
+ RET
SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index dbf8cc97b7f5..12c16c6aa44a 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32)
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
- ret
+ RET
SYM_FUNC_END(__sw_hweight32)
EXPORT_SYMBOL(__sw_hweight32)
@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64)
popq %rdx
popq %rdi
- ret
+ RET
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
pushl %ecx
@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64)
addl %ecx, %eax # result
popl %ecx
- ret
+ RET
#endif
SYM_FUNC_END(__sw_hweight64)
EXPORT_SYMBOL(__sw_hweight64)
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index eb3ccffb9b9d..21104c41cba0 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -37,8 +37,6 @@ enum reg_type {
*/
static bool is_string_insn(struct insn *insn)
{
- insn_get_opcode(insn);
-
/* All string instructions have a 1-byte opcode. */
if (insn->opcode.nbytes != 1)
return false;
@@ -344,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
*/
static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
{
-#ifdef CONFIG_X86_64
unsigned short sel;
+#ifdef CONFIG_X86_64
switch (seg_reg_idx) {
case INAT_SEG_REG_IGNORE:
return 0;
@@ -404,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
case INAT_SEG_REG_FS:
return (unsigned short)(regs->fs & 0xffff);
case INAT_SEG_REG_GS:
- return get_user_gs(regs);
+ savesegment(gs, sel);
+ return sel;
case INAT_SEG_REG_IGNORE:
default:
return -EINVAL;
@@ -412,32 +411,44 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
#endif /* CONFIG_X86_64 */
}
-static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
- enum reg_type type)
+static const int pt_regoff[] = {
+ offsetof(struct pt_regs, ax),
+ offsetof(struct pt_regs, cx),
+ offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, bx),
+ offsetof(struct pt_regs, sp),
+ offsetof(struct pt_regs, bp),
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, di),
+#ifdef CONFIG_X86_64
+ offsetof(struct pt_regs, r8),
+ offsetof(struct pt_regs, r9),
+ offsetof(struct pt_regs, r10),
+ offsetof(struct pt_regs, r11),
+ offsetof(struct pt_regs, r12),
+ offsetof(struct pt_regs, r13),
+ offsetof(struct pt_regs, r14),
+ offsetof(struct pt_regs, r15),
+#else
+ offsetof(struct pt_regs, ds),
+ offsetof(struct pt_regs, es),
+ offsetof(struct pt_regs, fs),
+ offsetof(struct pt_regs, gs),
+#endif
+};
+
+int pt_regs_offset(struct pt_regs *regs, int regno)
+{
+ if ((unsigned)regno < ARRAY_SIZE(pt_regoff))
+ return pt_regoff[regno];
+ return -EDOM;
+}
+
+static int get_regno(struct insn *insn, enum reg_type type)
{
+ int nr_registers = ARRAY_SIZE(pt_regoff);
int regno = 0;
- static const int regoff[] = {
- offsetof(struct pt_regs, ax),
- offsetof(struct pt_regs, cx),
- offsetof(struct pt_regs, dx),
- offsetof(struct pt_regs, bx),
- offsetof(struct pt_regs, sp),
- offsetof(struct pt_regs, bp),
- offsetof(struct pt_regs, si),
- offsetof(struct pt_regs, di),
-#ifdef CONFIG_X86_64
- offsetof(struct pt_regs, r8),
- offsetof(struct pt_regs, r9),
- offsetof(struct pt_regs, r10),
- offsetof(struct pt_regs, r11),
- offsetof(struct pt_regs, r12),
- offsetof(struct pt_regs, r13),
- offsetof(struct pt_regs, r14),
- offsetof(struct pt_regs, r15),
-#endif
- };
- int nr_registers = ARRAY_SIZE(regoff);
/*
* Don't possibly decode a 32-bit instructions as
* reading a 64-bit-only register.
@@ -505,7 +516,18 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
WARN_ONCE(1, "decoded an instruction with an invalid register");
return -EINVAL;
}
- return regoff[regno];
+ return regno;
+}
+
+static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
+ enum reg_type type)
+{
+ int regno = get_regno(insn, type);
+
+ if (regno < 0)
+ return regno;
+
+ return pt_regs_offset(regs, regno);
}
/**
@@ -851,6 +873,26 @@ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs)
}
/**
+ * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte
+ * @insn: Instruction containing the ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * Returns:
+ *
+ * The register indicated by the reg part of the ModRM byte.
+ * The register is obtained as a pointer within pt_regs.
+ */
+unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs)
+{
+ int offset;
+
+ offset = insn_get_modrm_reg_off(insn, regs);
+ if (offset < 0)
+ return NULL;
+ return (void *)regs + offset;
+}
+
+/**
* get_seg_base_limit() - obtain base address and limit of a segment
* @insn: Instruction. Must be valid.
* @regs: Register values as seen when entering kernel mode
@@ -1405,6 +1447,9 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
if (!insn || !regs)
return (void __user *)-1L;
+ if (insn_get_opcode(insn))
+ return (void __user *)-1L;
+
switch (insn->addr_bytes) {
case 2:
return get_addr_ref_16(insn, regs);
@@ -1539,3 +1584,87 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
return true;
}
+
+/**
+ * insn_decode_mmio() - Decode a MMIO instruction
+ * @insn: Structure to store decoded instruction
+ * @bytes: Returns size of memory operand
+ *
+ * Decodes instruction that used for Memory-mapped I/O.
+ *
+ * Returns:
+ *
+ * Type of the instruction. Size of the memory operand is stored in
+ * @bytes. If decode failed, MMIO_DECODE_FAILED returned.
+ */
+enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
+{
+ enum mmio_type type = MMIO_DECODE_FAILED;
+
+ *bytes = 0;
+
+ if (insn_get_opcode(insn))
+ return MMIO_DECODE_FAILED;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0x88: /* MOV m8,r8 */
+ *bytes = 1;
+ fallthrough;
+ case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_WRITE;
+ break;
+
+ case 0xc6: /* MOV m8, imm8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_WRITE_IMM;
+ break;
+
+ case 0x8a: /* MOV r8, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_READ;
+ break;
+
+ case 0xa4: /* MOVS m8, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_MOVS;
+ break;
+
+ case 0x0f: /* Two-byte instruction */
+ switch (insn->opcode.bytes[1]) {
+ case 0xb6: /* MOVZX r16/r32/r64, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xb7: /* MOVZX r32/r64, m16 */
+ if (!*bytes)
+ *bytes = 2;
+ type = MMIO_READ_ZERO_EXTEND;
+ break;
+
+ case 0xbe: /* MOVSX r16/r32/r64, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xbf: /* MOVSX r32/r64, m16 */
+ if (!*bytes)
+ *bytes = 2;
+ type = MMIO_READ_SIGN_EXTEND;
+ break;
+ }
+ break;
+ }
+
+ return type;
+}
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index cb5a1964506b..a1f9416bf67a 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -11,5 +11,5 @@
SYM_FUNC_START(__iowrite32_copy)
movl %edx,%ecx
rep movsd
- ret
+ RET
SYM_FUNC_END(__iowrite32_copy)
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index df50451d94ef..e0411a3774d4 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -1,6 +1,7 @@
#include <linux/string.h>
#include <linux/module.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#define movs(type,to,from) \
asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
@@ -22,7 +23,7 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n)
: "memory");
}
-void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
+static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
{
if (unlikely(!n))
return;
@@ -37,14 +38,17 @@ void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
n-=2;
}
rep_movs(to, (const void *)from, n);
+ /* KMSAN must treat values read from devices as initialized. */
+ kmsan_unpoison_memory(to, n);
}
-EXPORT_SYMBOL(memcpy_fromio);
-void memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
+static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
{
if (unlikely(!n))
return;
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(from, n);
/* Align any unaligned destination IO */
if (unlikely(1 & (unsigned long)to)) {
movs("b", to, from);
@@ -56,14 +60,64 @@ void memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
}
rep_movs((void *)to, (const void *) from, n);
}
+
+static void unrolled_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
+{
+ const volatile char __iomem *in = from;
+ char *out = to;
+ int i;
+
+ for (i = 0; i < n; ++i)
+ out[i] = readb(&in[i]);
+}
+
+static void unrolled_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
+{
+ volatile char __iomem *out = to;
+ const char *in = from;
+ int i;
+
+ for (i = 0; i < n; ++i)
+ writeb(in[i], &out[i]);
+}
+
+static void unrolled_memset_io(volatile void __iomem *a, int b, size_t c)
+{
+ volatile char __iomem *mem = a;
+ int i;
+
+ for (i = 0; i < c; ++i)
+ writeb(b, &mem[i]);
+}
+
+void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
+{
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO))
+ unrolled_memcpy_fromio(to, from, n);
+ else
+ string_memcpy_fromio(to, from, n);
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+void memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
+{
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO))
+ unrolled_memcpy_toio(to, from, n);
+ else
+ string_memcpy_toio(to, from, n);
+}
EXPORT_SYMBOL(memcpy_toio);
void memset_io(volatile void __iomem *a, int b, size_t c)
{
- /*
- * TODO: memset can mangle the IO patterns quite a bit.
- * perhaps it would be better to use a dumb one:
- */
- memset((void *)a, b, c);
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {
+ unrolled_memset_io(a, b, c);
+ } else {
+ /*
+ * TODO: memset can mangle the IO patterns quite a bit.
+ * perhaps it would be better to use a dumb one:
+ */
+ memset((void *)a, b, c);
+ }
}
EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 2b3eb8c948a3..a58f451a7dd3 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -11,7 +11,7 @@
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820/api.h>
-#include <asm/io.h>
+#include <asm/shared/io.h>
/*
* When built for the regular kernel, several functions need to be stubbed out
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index e565d1c9019e..ef3af7ff2c8a 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -4,14 +4,11 @@
#undef memcpy
#undef memset
+#undef memmove
__visible void *memcpy(void *to, const void *from, size_t n)
{
-#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
- return __memcpy3d(to, from, n);
-#else
return __memcpy(to, from, n);
-#endif
}
EXPORT_SYMBOL(memcpy);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 1cc9da6e29c7..dd8cd8831251 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -2,6 +2,7 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -27,8 +28,7 @@
* Output:
* rax original destination
*/
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_TYPED_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -39,12 +39,13 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
- ret
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+ RET
+SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
@@ -53,7 +54,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@@ -137,7 +138,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@@ -149,7 +150,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@@ -162,7 +163,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@@ -180,7 +181,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
- retq
+ RET
SYM_FUNC_END(memcpy_orig)
.popsection
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 64801010d312..724bbf83eb5b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,7 +24,6 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START_WEAK(memmove)
SYM_FUNC_START(__memmove)
mov %rdi, %rax
@@ -40,7 +39,7 @@ SYM_FUNC_START(__memmove)
/* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
@@ -205,8 +204,14 @@ SYM_FUNC_START(__memmove)
movb (%rsi), %r11b
movb %r11b, (%rdi)
13:
- retq
+ RET
+
+.Lmemmove_erms:
+ movq %rdx, %rcx
+ rep movsb
+ RET
SYM_FUNC_END(__memmove)
-SYM_FUNC_END_ALIAS(memmove)
EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 9827ae267f96..fc9ffd3ff3b2 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
*
* rax original destination
*/
-SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -40,12 +39,13 @@ SYM_FUNC_START(__memset)
movl %edx,%ecx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
movq %rdx,%rcx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(memset_erms)
SYM_FUNC_START_LOCAL(memset_orig)
@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
.Lende:
movq %r10,%rax
- ret
+ RET
.Lbad_alignment:
cmpq $7,%rdx
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
deleted file mode 100644
index cc5f4ea943d3..000000000000
--- a/arch/x86/lib/mmx_32.c
+++ /dev/null
@@ -1,388 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * MMX 3DNow! library helper functions
- *
- * To do:
- * We can use MMX just for prefetch in IRQ's. This may be a win.
- * (reported so on K6-III)
- * We should use a better code neutral filler for the short jump
- * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
- * We also want to clobber the filler register so we don't get any
- * register forwarding stalls on the filler.
- *
- * Add *user handling. Checksums are not a win with MMX on any CPU
- * tested so far for any MMX solution figured.
- *
- * 22/09/2000 - Arjan van de Ven
- * Improved for non-engineering-sample Athlons
- *
- */
-#include <linux/hardirq.h>
-#include <linux/string.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-
-#include <asm/fpu/api.h>
-#include <asm/asm.h>
-
-/*
- * Use KFPU_387. MMX instructions are not affected by MXCSR,
- * but both AMD and Intel documentation states that even integer MMX
- * operations will result in #MF if an exception is pending in FCW.
- *
- * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
- * any subsequent user of the 387 stack will reinitialize it using
- * KFPU_387.
- */
-
-void *_mmx_memcpy(void *to, const void *from, size_t len)
-{
- void *p;
- int i;
-
- if (unlikely(in_interrupt()))
- return __memcpy(to, from, len);
-
- p = to;
- i = len >> 6; /* len/64 */
-
- kernel_fpu_begin_mask(KFPU_387);
-
- __asm__ __volatile__ (
- "1: prefetch (%0)\n" /* This set is 28 bytes */
- " prefetch 64(%0)\n"
- " prefetch 128(%0)\n"
- " prefetch 192(%0)\n"
- " prefetch 256(%0)\n"
- "2: \n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : : "r" (from));
-
- for ( ; i > 5; i--) {
- __asm__ __volatile__ (
- "1: prefetch 320(%0)\n"
- "2: movq (%0), %%mm0\n"
- " movq 8(%0), %%mm1\n"
- " movq 16(%0), %%mm2\n"
- " movq 24(%0), %%mm3\n"
- " movq %%mm0, (%1)\n"
- " movq %%mm1, 8(%1)\n"
- " movq %%mm2, 16(%1)\n"
- " movq %%mm3, 24(%1)\n"
- " movq 32(%0), %%mm0\n"
- " movq 40(%0), %%mm1\n"
- " movq 48(%0), %%mm2\n"
- " movq 56(%0), %%mm3\n"
- " movq %%mm0, 32(%1)\n"
- " movq %%mm1, 40(%1)\n"
- " movq %%mm2, 48(%1)\n"
- " movq %%mm3, 56(%1)\n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : : "r" (from), "r" (to) : "memory");
-
- from += 64;
- to += 64;
- }
-
- for ( ; i > 0; i--) {
- __asm__ __volatile__ (
- " movq (%0), %%mm0\n"
- " movq 8(%0), %%mm1\n"
- " movq 16(%0), %%mm2\n"
- " movq 24(%0), %%mm3\n"
- " movq %%mm0, (%1)\n"
- " movq %%mm1, 8(%1)\n"
- " movq %%mm2, 16(%1)\n"
- " movq %%mm3, 24(%1)\n"
- " movq 32(%0), %%mm0\n"
- " movq 40(%0), %%mm1\n"
- " movq 48(%0), %%mm2\n"
- " movq 56(%0), %%mm3\n"
- " movq %%mm0, 32(%1)\n"
- " movq %%mm1, 40(%1)\n"
- " movq %%mm2, 48(%1)\n"
- " movq %%mm3, 56(%1)\n"
- : : "r" (from), "r" (to) : "memory");
-
- from += 64;
- to += 64;
- }
- /*
- * Now do the tail of the block:
- */
- __memcpy(to, from, len & 63);
- kernel_fpu_end();
-
- return p;
-}
-EXPORT_SYMBOL(_mmx_memcpy);
-
-#ifdef CONFIG_MK7
-
-/*
- * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
- * other MMX using processors do not.
- */
-
-static void fast_clear_page(void *page)
-{
- int i;
-
- kernel_fpu_begin_mask(KFPU_387);
-
- __asm__ __volatile__ (
- " pxor %%mm0, %%mm0\n" : :
- );
-
- for (i = 0; i < 4096/64; i++) {
- __asm__ __volatile__ (
- " movntq %%mm0, (%0)\n"
- " movntq %%mm0, 8(%0)\n"
- " movntq %%mm0, 16(%0)\n"
- " movntq %%mm0, 24(%0)\n"
- " movntq %%mm0, 32(%0)\n"
- " movntq %%mm0, 40(%0)\n"
- " movntq %%mm0, 48(%0)\n"
- " movntq %%mm0, 56(%0)\n"
- : : "r" (page) : "memory");
- page += 64;
- }
-
- /*
- * Since movntq is weakly-ordered, a "sfence" is needed to become
- * ordered again:
- */
- __asm__ __volatile__("sfence\n"::);
-
- kernel_fpu_end();
-}
-
-static void fast_copy_page(void *to, void *from)
-{
- int i;
-
- kernel_fpu_begin_mask(KFPU_387);
-
- /*
- * maybe the prefetch stuff can go before the expensive fnsave...
- * but that is for later. -AV
- */
- __asm__ __volatile__(
- "1: prefetch (%0)\n"
- " prefetch 64(%0)\n"
- " prefetch 128(%0)\n"
- " prefetch 192(%0)\n"
- " prefetch 256(%0)\n"
- "2: \n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b) : : "r" (from));
-
- for (i = 0; i < (4096-320)/64; i++) {
- __asm__ __volatile__ (
- "1: prefetch 320(%0)\n"
- "2: movq (%0), %%mm0\n"
- " movntq %%mm0, (%1)\n"
- " movq 8(%0), %%mm1\n"
- " movntq %%mm1, 8(%1)\n"
- " movq 16(%0), %%mm2\n"
- " movntq %%mm2, 16(%1)\n"
- " movq 24(%0), %%mm3\n"
- " movntq %%mm3, 24(%1)\n"
- " movq 32(%0), %%mm4\n"
- " movntq %%mm4, 32(%1)\n"
- " movq 40(%0), %%mm5\n"
- " movntq %%mm5, 40(%1)\n"
- " movq 48(%0), %%mm6\n"
- " movntq %%mm6, 48(%1)\n"
- " movq 56(%0), %%mm7\n"
- " movntq %%mm7, 56(%1)\n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
-
- from += 64;
- to += 64;
- }
-
- for (i = (4096-320)/64; i < 4096/64; i++) {
- __asm__ __volatile__ (
- "2: movq (%0), %%mm0\n"
- " movntq %%mm0, (%1)\n"
- " movq 8(%0), %%mm1\n"
- " movntq %%mm1, 8(%1)\n"
- " movq 16(%0), %%mm2\n"
- " movntq %%mm2, 16(%1)\n"
- " movq 24(%0), %%mm3\n"
- " movntq %%mm3, 24(%1)\n"
- " movq 32(%0), %%mm4\n"
- " movntq %%mm4, 32(%1)\n"
- " movq 40(%0), %%mm5\n"
- " movntq %%mm5, 40(%1)\n"
- " movq 48(%0), %%mm6\n"
- " movntq %%mm6, 48(%1)\n"
- " movq 56(%0), %%mm7\n"
- " movntq %%mm7, 56(%1)\n"
- : : "r" (from), "r" (to) : "memory");
- from += 64;
- to += 64;
- }
- /*
- * Since movntq is weakly-ordered, a "sfence" is needed to become
- * ordered again:
- */
- __asm__ __volatile__("sfence \n"::);
- kernel_fpu_end();
-}
-
-#else /* CONFIG_MK7 */
-
-/*
- * Generic MMX implementation without K7 specific streaming
- */
-static void fast_clear_page(void *page)
-{
- int i;
-
- kernel_fpu_begin_mask(KFPU_387);
-
- __asm__ __volatile__ (
- " pxor %%mm0, %%mm0\n" : :
- );
-
- for (i = 0; i < 4096/128; i++) {
- __asm__ __volatile__ (
- " movq %%mm0, (%0)\n"
- " movq %%mm0, 8(%0)\n"
- " movq %%mm0, 16(%0)\n"
- " movq %%mm0, 24(%0)\n"
- " movq %%mm0, 32(%0)\n"
- " movq %%mm0, 40(%0)\n"
- " movq %%mm0, 48(%0)\n"
- " movq %%mm0, 56(%0)\n"
- " movq %%mm0, 64(%0)\n"
- " movq %%mm0, 72(%0)\n"
- " movq %%mm0, 80(%0)\n"
- " movq %%mm0, 88(%0)\n"
- " movq %%mm0, 96(%0)\n"
- " movq %%mm0, 104(%0)\n"
- " movq %%mm0, 112(%0)\n"
- " movq %%mm0, 120(%0)\n"
- : : "r" (page) : "memory");
- page += 128;
- }
-
- kernel_fpu_end();
-}
-
-static void fast_copy_page(void *to, void *from)
-{
- int i;
-
- kernel_fpu_begin_mask(KFPU_387);
-
- __asm__ __volatile__ (
- "1: prefetch (%0)\n"
- " prefetch 64(%0)\n"
- " prefetch 128(%0)\n"
- " prefetch 192(%0)\n"
- " prefetch 256(%0)\n"
- "2: \n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b) : : "r" (from));
-
- for (i = 0; i < 4096/64; i++) {
- __asm__ __volatile__ (
- "1: prefetch 320(%0)\n"
- "2: movq (%0), %%mm0\n"
- " movq 8(%0), %%mm1\n"
- " movq 16(%0), %%mm2\n"
- " movq 24(%0), %%mm3\n"
- " movq %%mm0, (%1)\n"
- " movq %%mm1, 8(%1)\n"
- " movq %%mm2, 16(%1)\n"
- " movq %%mm3, 24(%1)\n"
- " movq 32(%0), %%mm0\n"
- " movq 40(%0), %%mm1\n"
- " movq 48(%0), %%mm2\n"
- " movq 56(%0), %%mm3\n"
- " movq %%mm0, 32(%1)\n"
- " movq %%mm1, 40(%1)\n"
- " movq %%mm2, 48(%1)\n"
- " movq %%mm3, 56(%1)\n"
- ".section .fixup, \"ax\"\n"
- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : : "r" (from), "r" (to) : "memory");
-
- from += 64;
- to += 64;
- }
- kernel_fpu_end();
-}
-
-#endif /* !CONFIG_MK7 */
-
-/*
- * Favour MMX for page clear and copy:
- */
-static void slow_zero_page(void *page)
-{
- int d0, d1;
-
- __asm__ __volatile__(
- "cld\n\t"
- "rep ; stosl"
-
- : "=&c" (d0), "=&D" (d1)
- :"a" (0), "1" (page), "0" (1024)
- :"memory");
-}
-
-void mmx_clear_page(void *page)
-{
- if (unlikely(in_interrupt()))
- slow_zero_page(page);
- else
- fast_clear_page(page);
-}
-EXPORT_SYMBOL(mmx_clear_page);
-
-static void slow_copy_page(void *to, void *from)
-{
- int d0, d1, d2;
-
- __asm__ __volatile__(
- "cld\n\t"
- "rep ; movsl"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (1024), "1" ((long) to), "2" ((long) from)
- : "memory");
-}
-
-void mmx_copy_page(void *to, void *from)
-{
- if (unlikely(in_interrupt()))
- slow_copy_page(to, from);
- else
- fast_copy_page(to, from);
-}
-EXPORT_SYMBOL(mmx_copy_page);
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index a2b9caa5274c..ebd259f31496 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs)
movl %edi, 28(%r10)
popq %r12
popq %rbx
- ret
+ RET
3:
movl $-EIO, %r11d
jmp 2b
@@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs)
popl %esi
popl %ebp
popl %ebx
- ret
+ RET
3:
movl $-EIO, 4(%esp)
jmp 2b
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 0ea344c5ea43..b7dfd60243b7 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -48,11 +48,12 @@ SYM_FUNC_START(__put_user_1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
@@ -62,11 +63,12 @@ SYM_FUNC_START(__put_user_2)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
@@ -76,11 +78,12 @@ SYM_FUNC_START(__put_user_4)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
@@ -90,6 +93,7 @@ SYM_FUNC_START(__put_user_8)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cf0b39f97adc..073289a55f84 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -23,7 +23,7 @@
.Ldo_rop_\@:
mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC
- ret
+ RET
.endm
.macro THUNK reg
@@ -31,10 +31,11 @@
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
+ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
.endm
@@ -66,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
+#ifdef CONFIG_RETHUNK
+
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c3e8a62ca561..24b48af27417 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -6,6 +6,7 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/instrumented.h>
#include <asm/tlbflush.h>
@@ -32,7 +33,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
unsigned long ret;
- if (__range_not_ok(from, n, TASK_SIZE))
+ if (!__access_ok(from, n))
return n;
if (!nmi_uaccess_okay())
@@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
* called from other contexts.
*/
pagefault_disable();
- ret = __copy_from_user_inatomic(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
+ ret = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, ret);
pagefault_enable();
return ret;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7d290777246d..422257c350c6 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -8,7 +8,6 @@
*/
#include <linux/export.h>
#include <linux/uaccess.h>
-#include <asm/mmx.h>
#include <asm/asm.h>
#ifdef CONFIG_X86_INTEL_USERCOPY
@@ -43,11 +42,7 @@ do { \
" movl %2,%0\n" \
"1: rep; stosb\n" \
"2: " ASM_CLAC "\n" \
- ".section .fixup,\"ax\"\n" \
- "3: lea 0(%2,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(0b, 3b) \
+ _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %2) \
_ASM_EXTABLE_UA(1b, 2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
@@ -149,10 +144,6 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
"36: movl %%eax, %0\n"
"37: rep; movsb\n"
"100:\n"
- ".section .fixup,\"ax\"\n"
- "101: lea 0(%%eax,%0,4),%0\n"
- " jmp 100b\n"
- ".previous\n"
_ASM_EXTABLE_UA(1b, 100b)
_ASM_EXTABLE_UA(2b, 100b)
_ASM_EXTABLE_UA(3b, 100b)
@@ -190,7 +181,7 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
_ASM_EXTABLE_UA(35b, 100b)
_ASM_EXTABLE_UA(36b, 100b)
_ASM_EXTABLE_UA(37b, 100b)
- _ASM_EXTABLE_UA(99b, 101b)
+ _ASM_EXTABLE_TYPE_REG(99b, 100b, EX_TYPE_UCOPY_LEN4, %%eax)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -255,30 +246,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
" movl %%eax,%0\n"
"7: rep; movsb\n"
"8:\n"
- ".section .fixup,\"ax\"\n"
- "9: lea 0(%%eax,%0,4),%0\n"
- "16: jmp 8b\n"
- ".previous\n"
- _ASM_EXTABLE_UA(0b, 16b)
- _ASM_EXTABLE_UA(1b, 16b)
- _ASM_EXTABLE_UA(2b, 16b)
- _ASM_EXTABLE_UA(21b, 16b)
- _ASM_EXTABLE_UA(3b, 16b)
- _ASM_EXTABLE_UA(31b, 16b)
- _ASM_EXTABLE_UA(4b, 16b)
- _ASM_EXTABLE_UA(41b, 16b)
- _ASM_EXTABLE_UA(10b, 16b)
- _ASM_EXTABLE_UA(51b, 16b)
- _ASM_EXTABLE_UA(11b, 16b)
- _ASM_EXTABLE_UA(61b, 16b)
- _ASM_EXTABLE_UA(12b, 16b)
- _ASM_EXTABLE_UA(71b, 16b)
- _ASM_EXTABLE_UA(13b, 16b)
- _ASM_EXTABLE_UA(81b, 16b)
- _ASM_EXTABLE_UA(14b, 16b)
- _ASM_EXTABLE_UA(91b, 16b)
- _ASM_EXTABLE_UA(6b, 9b)
- _ASM_EXTABLE_UA(7b, 16b)
+ _ASM_EXTABLE_UA(0b, 8b)
+ _ASM_EXTABLE_UA(1b, 8b)
+ _ASM_EXTABLE_UA(2b, 8b)
+ _ASM_EXTABLE_UA(21b, 8b)
+ _ASM_EXTABLE_UA(3b, 8b)
+ _ASM_EXTABLE_UA(31b, 8b)
+ _ASM_EXTABLE_UA(4b, 8b)
+ _ASM_EXTABLE_UA(41b, 8b)
+ _ASM_EXTABLE_UA(10b, 8b)
+ _ASM_EXTABLE_UA(51b, 8b)
+ _ASM_EXTABLE_UA(11b, 8b)
+ _ASM_EXTABLE_UA(61b, 8b)
+ _ASM_EXTABLE_UA(12b, 8b)
+ _ASM_EXTABLE_UA(71b, 8b)
+ _ASM_EXTABLE_UA(13b, 8b)
+ _ASM_EXTABLE_UA(81b, 8b)
+ _ASM_EXTABLE_UA(14b, 8b)
+ _ASM_EXTABLE_UA(91b, 8b)
+ _ASM_EXTABLE_TYPE_REG(6b, 8b, EX_TYPE_UCOPY_LEN4, %%eax)
+ _ASM_EXTABLE_UA(7b, 8b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -315,14 +302,8 @@ do { \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "5: addl %3,%0\n" \
- " jmp 2b\n" \
- "3: lea 0(%3,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(4b, 5b) \
- _ASM_EXTABLE_UA(0b, 3b) \
+ _ASM_EXTABLE_TYPE_REG(4b, 2b, EX_TYPE_UCOPY_LEN1, %3) \
+ _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN4, %3) \
_ASM_EXTABLE_UA(1b, 2b) \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 508c81e97ab1..6c1f8ac5e721 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -14,48 +14,6 @@
* Zero Userspace
*/
-unsigned long __clear_user(void __user *addr, unsigned long size)
-{
- long __d0;
- might_fault();
- /* no memory constraint because it doesn't change any memory gcc knows
- about */
- stac();
- asm volatile(
- " testq %[size8],%[size8]\n"
- " jz 4f\n"
- " .align 16\n"
- "0: movq $0,(%[dst])\n"
- " addq $8,%[dst]\n"
- " decl %%ecx ; jnz 0b\n"
- "4: movq %[size1],%%rcx\n"
- " testl %%ecx,%%ecx\n"
- " jz 2f\n"
- "1: movb $0,(%[dst])\n"
- " incq %[dst]\n"
- " decl %%ecx ; jnz 1b\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: lea 0(%[size1],%[size8],8),%[size8]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE_UA(0b, 3b)
- _ASM_EXTABLE_UA(1b, 2b)
- : [size8] "=&c"(size), [dst] "=&D" (__d0)
- : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
- clac();
- return size;
-}
-EXPORT_SYMBOL(__clear_user);
-
-unsigned long clear_user(void __user *to, unsigned long n)
-{
- if (access_ok(to, n))
- return __clear_user(to, n);
- return n;
-}
-EXPORT_SYMBOL(clear_user);
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
@@ -121,7 +79,7 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
/* cache copy and flush to align dest */
if (!IS_ALIGNED(dest, 8)) {
- unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+ size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest);
memcpy((void *) dest, (void *) source, len);
clean_cache_range((void *) dest, len);
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
4c: vrcp14ps/d Vpd,Wpd (66),(ev)
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
62: vpexpandb/w Vx,Wx (66),(ev)
63: vpcompressb/w Wx,Vx (66),(ev)
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
30: kshiftrb/w Vk,Uk,Ib (66),(v)
31: kshiftrd/q Vk,Uk,Ib (66),(v)
32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
ce: vgf2p8affineqb Vx,Wx,Ib (66)
cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
EndTable
GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
3: xrstors
4: xsavec
5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable