aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S26
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S47
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S15
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S4
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S11
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S15
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S22
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S15
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S75
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S12
-rw-r--r--arch/x86/events/intel/core.c11
-rw-r--r--arch/x86/events/intel/cstate.c4
-rw-r--r--arch/x86/events/intel/rapl.c3
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/events/msr.c8
-rw-r--r--arch/x86/ia32/ia32_aout.c4
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/alternative.h3
-rw-r--r--arch/x86/include/asm/asm.h15
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/fpu/internal.h90
-rw-r--r--arch/x86/include/asm/fpu/types.h32
-rw-r--r--arch/x86/include/asm/fpu/xstate.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/kvm_para.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h32
-rw-r--r--arch/x86/include/asm/mshyperv.h10
-rw-r--r--arch/x86/include/asm/paravirt.h37
-rw-r--r--arch/x86/include/asm/paravirt_types.h23
-rw-r--r--arch/x86/include/asm/pgtable.h27
-rw-r--r--arch/x86/include/asm/preempt.h15
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/rwsem.h4
-rw-r--r--arch/x86/include/asm/special_insns.h10
-rw-r--r--arch/x86/include/asm/thread_info.h11
-rw-r--r--arch/x86/include/asm/trace/fpu.h11
-rw-r--r--arch/x86/include/asm/uaccess.h6
-rw-r--r--arch/x86/include/asm/xen/hypercall.h9
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h6
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c11
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/common.c57
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c4
-rw-r--r--arch/x86/kernel/fpu/core.c155
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c48
-rw-r--r--arch/x86/kernel/fpu/signal.c37
-rw-r--r--arch/x86/kernel/fpu/xstate.c264
-rw-r--r--arch/x86/kernel/io_delay.c2
-rw-r--r--arch/x86/kernel/irq_32.c6
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvm.c15
-rw-r--r--arch/x86/kernel/paravirt.c5
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/emulate.c17
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/svm.c38
-rw-r--r--arch/x86/kvm/vmx.c412
-rw-r--r--arch/x86/kvm/x86.c53
-rw-r--r--arch/x86/math-emu/fpu_entry.c2
-rw-r--r--arch/x86/mm/extable.c24
-rw-r--r--arch/x86/mm/fault.c50
-rw-r--r--arch/x86/mm/init.c34
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/mm/tlb.c31
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/pci/irq.c2
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/power/hibernate_64.c21
-rw-r--r--arch/x86/um/os-Linux/registers.c18
-rw-r--r--arch/x86/um/os-Linux/tls.c2
-rw-r--r--arch/x86/um/user-offsets.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/mmu_pv.c17
87 files changed, 1163 insertions, 991 deletions
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 246c67006ed0..8c1fcb6bad21 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
-#define CTX %rdi
+#define CTX %r12
#define RIO %rsi
#define RX0 %rax
@@ -56,12 +56,12 @@
#define RX2bh %ch
#define RX3bh %dh
-#define RT0 %rbp
+#define RT0 %rdi
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9
-#define RT0d %ebp
+#define RT0d %edi
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d
@@ -120,13 +120,14 @@
ENTRY(__blowfish_enc_blk)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
- movq %rbp, %r11;
+ movq %r12, %r11;
+ movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
round_enc(14);
add_roundkey_enc(16);
- movq %r11, %rbp;
+ movq %r11, %r12;
movq %r10, RIO;
test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
ENTRY(blowfish_dec_blk)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
- movq %rbp, %r11;
+ movq %r12, %r11;
+ movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
movq %r10, RIO;
write_block();
- movq %r11, %rbp;
+ movq %r11, %r12;
ret;
ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
ENTRY(__blowfish_enc_blk_4way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
- pushq %rbp;
+ pushq %r12;
pushq %rbx;
pushq %rcx;
- preload_roundkey_enc(0);
-
+ movq %rdi, CTX
movq %rsi, %r11;
movq %rdx, RIO;
+ preload_roundkey_enc(0);
+
read_block4();
round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
round_enc4(14);
add_preloaded_roundkey4();
- popq %rbp;
+ popq %r12;
movq %r11, RIO;
- test %bpl, %bpl;
+ test %r12b, %r12b;
jnz .L__enc_xor4;
write_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
.L__enc_xor4:
xor_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
ENDPROC(__blowfish_enc_blk_4way)
ENTRY(blowfish_dec_blk_4way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
- pushq %rbp;
+ pushq %r12;
pushq %rbx;
- preload_roundkey_dec(17);
- movq %rsi, %r11;
+ movq %rdi, CTX;
+ movq %rsi, %r11
movq %rdx, RIO;
+ preload_roundkey_dec(17);
read_block4();
round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
write_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 310319c601ed..95ba6956a7f6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
#define RCD1bh %dh
#define RT0 %rsi
-#define RT1 %rbp
+#define RT1 %r12
#define RT2 %r8
#define RT0d %esi
-#define RT1d %ebp
+#define RT1d %r12d
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
-#define RRBP %r10
+#define RR12 %r10
#define RDST %r11
#define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
* %rdx: src
* %rcx: bool xor
*/
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
enc_outunpack(mov, RT1);
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
.L__enc_xor:
enc_outunpack(xor, RT1);
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
ENDPROC(__camellia_enc_blk)
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
dec_outunpack();
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
ENDPROC(camellia_dec_blk)
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
*/
pushq %rbx;
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
enc_outunpack2(mov, RT2);
- movq RRBP, %rbp;
+ movq RR12, %r12;
popq %rbx;
ret;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
- movq RRBP, %rbp;
+ movq RR12, %r12;
popq %rbx;
ret;
ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
dec_outunpack2();
- movq RRBP, %rbp;
+ movq RR12, %r12;
movq RXOR, %rbx;
ret;
ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
/**********************************************************************
16-way AVX cast5
**********************************************************************/
-#define CTX %rdi
+#define CTX %r15
#define RL1 %xmm0
#define RR1 %xmm1
@@ -70,8 +70,8 @@
#define RTMP %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %rdi
+#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
@@ -226,7 +226,7 @@
.align 16
__cast5_enc_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
* RR4: encrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
.L__skip_enc:
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
.align 16
__cast5_dec_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
* RR4: decrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
vmovdqa .Lbswap_mask, RKM;
popq %rbx;
- popq %rbp;
+ popq %r15;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 952d3156a933..7f30b6f0d72c 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
/**********************************************************************
8-way AVX cast6
**********************************************************************/
-#define CTX %rdi
+#define CTX %r15
#define RA1 %xmm0
#define RB1 %xmm1
@@ -70,8 +70,8 @@
#define RTMP %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %rdi
+#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
@@ -264,15 +264,17 @@
.align 8
__cast6_enc_blk8:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
QBAR(11);
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
.align 8
__cast6_dec_blk8:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
QBAR(0);
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
* %rcx: iv (little endian, 128bit)
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index f3e91647ca27..8e49ce117494 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
#define RW2bh %ch
#define RT0 %r15
-#define RT1 %rbp
+#define RT1 %rsi
#define RT2 %r14
#define RT3 %rdx
#define RT0d %r15d
-#define RT1d %ebp
+#define RT1d %esi
#define RT2d %r14d
#define RT3d %edx
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
* %rsi: dst
* %rdx: src
*/
- pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
+ pushq %rsi; /* dst */
+
read_block(%rdx, RL0, RR0);
initial_permutation(RL0, RR0);
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
round1(32+15, RL0, RR0, dummy2);
final_permutation(RR0, RL0);
+
+ popq %rsi /* dst */
write_block(%rsi, RR0, RL0);
popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
popq %r13;
popq %r12;
popq %rbx;
- popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
* %rdx: src (3 blocks)
*/
- pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
+ pushq %rsi /* dst */
+
/* load input */
movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
bswapl RR2d;
bswapl RL2d;
+ popq %rsi /* dst */
movl RR0d, 0 * 4(%rsi);
movl RL0d, 1 * 4(%rsi);
movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
popq %r13;
popq %r12;
popq %rbx;
- popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1eab79c9ac48..9f712a7dfd79 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
#define REG_RE %rdx
#define REG_RTA %r12
#define REG_RTB %rbx
-#define REG_T1 %ebp
+#define REG_T1 %r11d
#define xmm_mov vmovups
#define avx2_zeroupper vzeroupper
#define RND_F1 1
@@ -637,7 +637,6 @@ _loop3:
ENTRY(\name)
push %rbx
- push %rbp
push %r12
push %r13
push %r14
@@ -673,7 +672,6 @@ _loop3:
pop %r14
pop %r13
pop %r12
- pop %rbp
pop %rbx
ret
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index a4109506a5e8..6204bd53528c 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
#define REG_A %ecx
#define REG_B %esi
#define REG_C %edi
-#define REG_D %ebp
+#define REG_D %r12d
#define REG_E %edx
#define REG_T1 %eax
@@ -74,10 +74,10 @@
ENTRY(\name)
push %rbx
- push %rbp
push %r12
+ push %rbp
+ mov %rsp, %rbp
- mov %rsp, %r12
sub $64, %rsp # allocate workspace
and $~15, %rsp # align stack
@@ -99,10 +99,9 @@
xor %rax, %rax
rep stosq
- mov %r12, %rsp # deallocate workspace
-
- pop %r12
+ mov %rbp, %rsp # deallocate workspace
pop %rbp
+ pop %r12
pop %rbx
ret
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index e08888a1a5f2..001bbcf93c79 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
-TBL = %rbp
+TBL = %r12
a = %eax
b = %ebx
@@ -350,13 +350,13 @@ a = TMP_
ENTRY(sha256_transform_avx)
.align 32
pushq %rbx
- pushq %rbp
+ pushq %r12
pushq %r13
pushq %r14
pushq %r15
- pushq %r12
+ pushq %rbp
+ movq %rsp, %rbp
- mov %rsp, %r12
subq $STACK_SIZE, %rsp # allocate stack space
and $~15, %rsp # align stack pointer
@@ -452,13 +452,12 @@ loop2:
done_hash:
- mov %r12, %rsp
-
- popq %r12
+ mov %rbp, %rsp
+ popq %rbp
popq %r15
popq %r14
popq %r13
- popq %rbp
+ popq %r12
popq %rbx
ret
ENDPROC(sha256_transform_avx)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d = %r8d
e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP
-
-TBL = %rbp
SRND = CTX # SRND is same register as CTX
a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
ENTRY(sha256_transform_rorx)
.align 32
pushq %rbx
- pushq %rbp
pushq %r12
pushq %r13
pushq %r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
mov CTX, _CTX(%rsp)
loop0:
- lea K256(%rip), TBL
-
## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
.align 16
loop1:
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X0, XFER
+ vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
- vpaddd 2*32(TBL, SRND), X0, XFER
+ vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
- vpaddd 3*32(TBL, SRND), X0, XFER
+ vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
@@ -619,10 +614,11 @@ loop1:
loop2:
## Do last 16 rounds with no scheduling
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X1, XFER
+
+ vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
@@ -676,9 +672,6 @@ loop3:
ja done_hash
do_last_block:
- #### do last block
- lea K256(%rip), TBL
-
VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
popq %r14
popq %r13
popq %r12
- popq %rbp
popq %rbx
ret
ENDPROC(sha256_transform_rorx)
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 39b83c93e7fd..c6c05ed2c16a 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
-TBL = %rbp
+TBL = %r12
a = %eax
b = %ebx
@@ -356,13 +356,13 @@ a = TMP_
ENTRY(sha256_transform_ssse3)
.align 32
pushq %rbx
- pushq %rbp
+ pushq %r12
pushq %r13
pushq %r14
pushq %r15
- pushq %r12
+ pushq %rbp
+ mov %rsp, %rbp
- mov %rsp, %r12
subq $STACK_SIZE, %rsp
and $~15, %rsp
@@ -462,13 +462,12 @@ loop2:
done_hash:
- mov %r12, %rsp
-
- popq %r12
+ mov %rbp, %rsp
+ popq %rbp
popq %r15
popq %r14
popq %r13
- popq %rbp
+ popq %r12
popq %rbx
ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 7f5f6c6ec72e..b16d56005162 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
-# 1st arg
-CTX = %rdi
+# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
+CTX1 = %rdi
+CTX2 = %r12
# 2nd arg
INP = %rsi
# 3rd arg
@@ -81,7 +82,7 @@ d = %r8
e = %rdx
y3 = %rsi
-TBL = %rbp
+TBL = %rdi # clobbers CTX1
a = %rax
b = %rbx
@@ -91,26 +92,26 @@ g = %r10
h = %r11
old_h = %r11
-T1 = %r12
+T1 = %r12 # clobbers CTX2
y0 = %r13
y1 = %r14
y2 = %r15
-y4 = %r12
-
# Local variables (stack frame)
XFER_SIZE = 4*8
SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
+CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
+GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
+frame_CTX = frame_INPEND + INPEND_SIZE
+frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs
- mov %rbp, frame_GPRSAVE(%rsp)
- mov %rbx, 8*1+frame_GPRSAVE(%rsp)
- mov %r12, 8*2+frame_GPRSAVE(%rsp)
- mov %r13, 8*3+frame_GPRSAVE(%rsp)
- mov %r14, 8*4+frame_GPRSAVE(%rsp)
- mov %r15, 8*5+frame_GPRSAVE(%rsp)
+ mov %rbx, 8*0+frame_GPRSAVE(%rsp)
+ mov %r12, 8*1+frame_GPRSAVE(%rsp)
+ mov %r13, 8*2+frame_GPRSAVE(%rsp)
+ mov %r14, 8*3+frame_GPRSAVE(%rsp)
+ mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest
- mov 8*0(CTX),a
- mov 8*1(CTX),b
- mov 8*2(CTX),c
- mov 8*3(CTX),d
- mov 8*4(CTX),e
- mov 8*5(CTX),f
- mov 8*6(CTX),g
- mov 8*7(CTX),h
+ mov 8*0(CTX1), a
+ mov 8*1(CTX1), b
+ mov 8*2(CTX1), c
+ mov 8*3(CTX1), d
+ mov 8*4(CTX1), e
+ mov 8*5(CTX1), f
+ mov 8*6(CTX1), g
+ mov 8*7(CTX1), h
+
+ # save %rdi (CTX) before it gets clobbered
+ mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
@@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp)
jne loop2
- addm 8*0(CTX),a
- addm 8*1(CTX),b
- addm 8*2(CTX),c
- addm 8*3(CTX),d
- addm 8*4(CTX),e
- addm 8*5(CTX),f
- addm 8*6(CTX),g
- addm 8*7(CTX),h
+ mov frame_CTX(%rsp), CTX2
+ addm 8*0(CTX2), a
+ addm 8*1(CTX2), b
+ addm 8*2(CTX2), c
+ addm 8*3(CTX2), d
+ addm 8*4(CTX2), e
+ addm 8*5(CTX2), f
+ addm 8*6(CTX2), g
+ addm 8*7(CTX2), h
mov frame_INP(%rsp), INP
add $128, INP
@@ -669,12 +673,11 @@ loop2:
done_hash:
# Restore GPRs
- mov frame_GPRSAVE(%rsp) ,%rbp
- mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
- mov 8*2+frame_GPRSAVE(%rsp) ,%r12
- mov 8*3+frame_GPRSAVE(%rsp) ,%r13
- mov 8*4+frame_GPRSAVE(%rsp) ,%r14
- mov 8*5+frame_GPRSAVE(%rsp) ,%r15
+ mov 8*0+frame_GPRSAVE(%rsp), %rbx
+ mov 8*1+frame_GPRSAVE(%rsp), %r12
+ mov 8*2+frame_GPRSAVE(%rsp), %r13
+ mov 8*3+frame_GPRSAVE(%rsp), %r14
+ mov 8*4+frame_GPRSAVE(%rsp), %r15
# Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index b3f49d286348..73b471da3622 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
#define RT %xmm14
#define RR %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %r13
+#define RID1d %r13d
#define RID2 %rsi
#define RID2d %esi
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
vmovdqu w(CTX), RK1;
- pushq %rbp;
+ pushq %r13;
pushq %rbx;
pushq %rcx;
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
popq %rcx;
popq %rbx;
- popq %rbp;
+ popq %r13;
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
vmovdqu (w+4*4)(CTX), RK1;
- pushq %rbp;
+ pushq %r13;
pushq %rbx;
inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
vmovdqu (w)(CTX), RK1;
popq %rbx;
- popq %rbp;
+ popq %r13;
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 829e89cfcee2..9fb9a1f1e47b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void)
return 0;
}
- if (lockup_detector_suspend() != 0) {
- pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
- return 0;
- }
+ cpus_read_lock();
+
+ hardlockup_detector_perf_stop();
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void)
x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
- lockup_detector_resume();
-
- cpus_read_lock();
+ hardlockup_detector_perf_restart();
for_each_online_cpu(c)
free_excl_cntrs(c);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4cf100ff2a37..72db0664a53d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8e2457cb6b4a..005908ee9333 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index db1fe377e6dd..a7196818416a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
static struct intel_uncore_type skx_uncore_iio = {
.name = "iio",
.num_counters = 4,
- .num_boxes = 5,
+ .num_boxes = 6,
.perf_ctr_bits = 48,
.event_ctl = SKX_IIO0_MSR_PMON_CTL0,
.perf_ctr = SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
static struct intel_uncore_type skx_uncore_irp = {
.name = "irp",
.num_counters = 2,
- .num_boxes = 5,
+ .num_boxes = 6,
.perf_ctr_bits = 48,
.event_ctl = SKX_IRP0_MSR_PMON_CTL0,
.perf_ctr = SKX_IRP0_MSR_PMON_CTR0,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 4bb3ec69e8ea..06723671ae4e 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
case INTEL_FAM6_ATOM_SILVERMONT1:
case INTEL_FAM6_ATOM_SILVERMONT2:
case INTEL_FAM6_ATOM_AIRMONT:
+
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
+
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
+
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
if (idx == PERF_MSR_SMI)
return true;
break;
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 8d0879f1d42c..8e02b30cf08e 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -407,10 +407,10 @@ static int load_aout_library(struct file *file)
unsigned long bss, start_addr, len, error;
int retval;
struct exec ex;
-
+ loff_t pos = 0;
retval = -ENOEXEC;
- error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
+ error = kernel_read(file, &ex, sizeof(ex), &pos);
if (error != sizeof(ex))
goto out;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c02857..0e2a5edbce00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
- if (fpu->fpstate_active) {
+ if (fpu->initialized) {
unsigned long fx_aligned, math_size;
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
{ \
- register void *__sp asm(_ASM_SP); \
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
- : output, "+r" (__sp) \
+ : output, ASM_CALL_CONSTRAINT \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input); \
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..b0dc91f4bedc 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
# define __ASM_FORM_COMMA(x) " " #x ","
#endif
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
+/* 64 bit */
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
@@ -132,4 +134,15 @@
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1a2ba368da39..9d0e13738ed3 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
#define store_gdt(dtr) native_store_gdt(dtr)
-#define store_idt(dtr) native_store_idt(dtr)
#define store_tr(tr) (tr = native_store_tr())
#define load_TLS(t, cpu) native_load_tls(t, cpu)
@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
asm volatile("sgdt %0":"=m" (*dtr));
}
-static inline void native_store_idt(struct desc_ptr *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
{
asm volatile("sidt %0":"=m" (*dtr));
}
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 554cdb205d17..e3221ffa304e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,11 +23,9 @@
/*
* High level FPU state handling functions:
*/
-extern void fpu__activate_curr(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
+extern void fpu__initialize(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
extern void fpu__save(struct fpu *fpu);
extern void fpu__restore(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
err; \
})
-#define check_insn(insn, output, input...) \
-({ \
- int err; \
+#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
+ : output : input)
static inline int copy_fregs_to_user(struct fregs_state __user *fx)
{
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
- int err;
-
if (IS_ENABLED(CONFIG_X86_32)) {
- err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
- err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
/* See comment in copy_fxregs_to_kernel() below. */
- err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+ kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
}
}
- /* Copying from a kernel buffer to FPU registers should never fail: */
- WARN_ON_FPU(err);
}
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
{
- int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- WARN_ON_FPU(err);
+ kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
-#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
- "xor %[err], %[err]\n" \
"3:\n" \
- ".pushsection .fixup,\"ax\"\n" \
- "4: movl $-2, %[err]\n" \
- "jmp 3b\n" \
- ".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
- : [err] "=r" (err) \
+ _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+ : \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
- /* We should never fault when copying from a kernel buffer: */
+ /*
+ * We should never fault when copying from a kernel buffer, and the FPU
+ * state we set at boot time should be valid.
+ */
WARN_ON_FPU(err);
}
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
u32 hmask = mask >> 32;
int err;
- WARN_ON(!alternatives_patched);
+ WARN_ON_FPU(!alternatives_patched);
XSTATE_XSAVE(xstate, lmask, hmask, err);
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err;
-
- XSTATE_XRESTORE(xstate, lmask, hmask, err);
- /* We should never fault when copying from a kernel buffer: */
- WARN_ON_FPU(err);
+ XSTATE_XRESTORE(xstate, lmask, hmask);
}
/*
@@ -526,38 +503,17 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
*/
static inline void fpregs_deactivate(struct fpu *fpu)
{
- WARN_ON_FPU(!fpu->fpregs_active);
-
- fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}
static inline void fpregs_activate(struct fpu *fpu)
{
- WARN_ON_FPU(fpu->fpregs_active);
-
- fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}
/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
- return current->thread.fpu.fpregs_active;
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->fpregs_active) {
+ if (old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
old_fpu->last_cpu = cpu;
/* But leave fpu_fpregs_owner_ctx! */
- old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
} else
old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
bool preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active;
+ new_fpu->initialized;
if (preload) {
if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = &current->thread.fpu;
preempt_disable();
- if (!fpregs_active())
- fpregs_activate(fpu);
+ fpregs_activate(fpu);
preempt_enable();
}
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..a1520575d86b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
/* Default value for fxregs_state.mxcsr: */
#define MXCSR_DEFAULT 0x1f80
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
/*
* Software based FPU emulation state. This is arbitrary really,
* it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
unsigned int last_cpu;
/*
- * @fpstate_active:
+ * @initialized:
*
- * This flag indicates whether this context is active: if the task
+ * This flag indicates whether this context is initialized: if the task
* is not running then we can restore from this context, if the task
* is running then we should save into this context.
*/
- unsigned char fpstate_active;
-
- /*
- * @fpregs_active:
- *
- * This flag determines whether a given context is actively
- * loaded into the FPU's registers and that those registers
- * represent the task's current FPU state.
- *
- * Note the interaction with fpstate_active:
- *
- * # task does not use the FPU:
- * fpstate_active == 0
- *
- * # task uses the FPU and regs are active:
- * fpstate_active == 1 && fpregs_active == 1
- *
- * # the regs are inactive but still match fpstate:
- * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
- *
- * The third state is what we use for the lazy restore optimization
- * on lazy-switching CPUs.
- */
- unsigned char fpregs_active;
+ unsigned char initialized;
/*
* @state:
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 1b2799e0699a..83fee2469eb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);
int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
- void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
- struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8844eee290b2..c73e493adf07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -951,7 +951,6 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- u32 (*get_pkru)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -973,7 +972,7 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(void);
+ bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index bc62e7cbf1b1..59ad3d132353 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
void __init kvm_guest_init(void);
-void kvm_async_pf_task_wait(u32 token);
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
@@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void)
#else /* CONFIG_KVM_GUEST */
#define kvm_guest_init() do {} while (0)
-#define kvm_async_pf_task_wait(T) do {} while(0)
+#define kvm_async_pf_task_wait(T, I) do {} while(0)
#define kvm_async_pf_task_wake(T) do {} while(0)
static inline bool kvm_para_available(void)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits. This serves two purposes. It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero. It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(mm->pgd);
+ }
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
/*
* This can be used from process context to figure out what the value of
@@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
*/
static inline unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
- if (static_cpu_has(X86_FEATURE_PCID))
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
u64 hv_status;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("mov %4, %%r8\n"
"call *%5"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
: "r" (output_address), "m" (hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("call *%7"
: "=A" (hv_status),
- "+c" (input_address_lo), "+r" (__sp)
+ "+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
{
__asm__ __volatile__("call *%4"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: "m" (hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
__asm__ __volatile__ ("call *%5"
: "=A"(hv_status),
"+c"(input1_lo),
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25dd22f7c70..12deec722cf0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}
-static inline unsigned long __read_cr4(void)
-{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
-}
-
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
}
-static inline void store_idt(struct desc_ptr *dtr)
-{
- PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
-}
static inline unsigned long paravirt_store_tr(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
}
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
-}
-
static inline pte_t __pte(pteval_t val)
{
pteval_t ret;
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
-{
- if (sizeof(pmdval_t) > sizeof(long))
- /* 5 arg words */
- pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
- else
- PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
- native_pmd_val(pmd));
-}
-
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
-{
- if (sizeof(pudval_t) > sizeof(long))
- /* 5 arg words */
- pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
- else
- PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
- native_pud_val(pud));
-}
-
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6b64fc6367f2..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);
- unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long);
#ifdef CONFIG_X86_64
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
void (*load_idt)(const struct desc_ptr *);
- /* store_gdt has been removed. */
- void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
- void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmdval);
- void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pudval);
- void (*pte_update)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
@@ -468,8 +459,8 @@ int paravirt_disable_iospace(void);
*/
#ifdef CONFIG_X86_32
#define PVOP_VCALL_ARGS \
- unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \
- register void *__sp asm("esp")
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
@@ -489,8 +480,8 @@ int paravirt_disable_iospace(void);
/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx, __eax = __eax; \
- register void *__sp asm("rsp")
+ __edx = __edx, __ecx = __ecx, __eax = __eax;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
@@ -541,7 +532,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -551,7 +542,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -578,7 +569,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b4c44d419c5..b714934512b3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
#else /* !CONFIG_PARAVIRT */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
-#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
-#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
#define pmd_clear(pmd) native_pmd_clear(pmd)
-#define pte_update(mm, addr, ptep) do { } while (0)
-
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
native_set_pte(ptep, pte);
}
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp , pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
{
native_set_pmd(pmdp, pmd);
}
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
{
native_set_pud(pudp, pud);
}
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces. It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.
- */
-#define pte_update(mm, addr, ptep) do { } while (0)
-#endif
-
/*
* We only update the dirty/accessed state if we set
* the dirty bit by hand in the kernel, since the hardware
@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
- pte_update(mm, addr, ptep);
return pte;
}
@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
- pte_update(mm, addr, ptep);
}
#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
#ifdef CONFIG_PREEMPT
extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+ asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
extern asmlinkage void preempt_schedule(void);
extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+ asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
extern asmlinkage void preempt_schedule_notrace(void);
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
- register void *__sp asm(_ASM_SP);
-
#ifdef CONFIG_X86_32
asm volatile (
"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
"pushl $1f\n\t"
"iret\n\t"
"1:"
- : "+r" (__sp) : : "memory");
+ : ASM_CALL_CONSTRAINT : : "memory");
#else
unsigned int tmp;
@@ -703,7 +701,7 @@ static inline void sync_core(void)
"iretq\n\t"
UNWIND_HINT_RESTORE
"1:"
- : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
#endif
}
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
({ \
long tmp; \
struct rw_semaphore* ret; \
- register void *__sp asm(_ASM_SP); \
\
asm volatile("# beginning down_write\n\t" \
LOCK_PREFIX " xadd %1,(%4)\n\t" \
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
" call " slow_path "\n" \
"1:\n" \
"# ending down_write" \
- : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+ : "+m" (sem->count), "=d" (tmp), \
+ "=a" (ret), ASM_CALL_CONSTRAINT \
: "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
: "memory", "cc"); \
ret; \
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 9efaabf5b54b..a24dfcf79f4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
extern asmlinkage void native_load_gs_index(unsigned);
+static inline unsigned long __read_cr4(void)
+{
+ return native_read_cr4();
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x);
}
-static inline unsigned long __read_cr4(void)
-{
- return native_read_cr4();
-}
-
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5161da1a0fa0..89e7eeb5cec1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,17 +158,6 @@ struct thread_info {
*/
#ifndef __ASSEMBLY__
-static inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-#ifdef CONFIG_X86_64
- asm("mov %%rsp,%0" : "=g" (sp));
-#else
- asm("mov %%esp,%0" : "=g" (sp));
-#endif
- return sp;
-}
-
/*
* Walks up the stack frames to make sure that the specified object is
* entirely contained by a single stack frame.
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e59789fcd..39f7a27bef13 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
TP_STRUCT__entry(
__field(struct fpu *, fpu)
- __field(bool, fpregs_active)
- __field(bool, fpstate_active)
+ __field(bool, initialized)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
TP_fast_assign(
__entry->fpu = fpu;
- __entry->fpregs_active = fpu->fpregs_active;
- __entry->fpstate_active = fpu->fpstate_active;
+ __entry->initialized = fpu->initialized;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
- __entry->fpregs_active,
- __entry->fpstate_active,
+ __entry->initialized,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..4b892917edeb 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
- register void *__sp asm(_ASM_SP); \
__chk_user_ptr(ptr); \
might_fault(); \
asm volatile("call __get_user_%P4" \
- : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \
+ : "=a" (__ret_gu), "=r" (__val_gu), \
+ ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
@@ -337,7 +337,7 @@ do { \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=r" (retval), "=&A"(x) \
- : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \
+ : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
"i" (errret), "0" (retval)); \
})
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..7cb282e9e587 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
- register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
- register void *__sp asm(_ASM_SP);
+ register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
-#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT
#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
@@ -552,13 +551,13 @@ static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
- u32 *p = (u32 *) &desc;
-
mcl->op = __HYPERVISOR_update_descriptor;
if (sizeof(maddr) == sizeof(long)) {
mcl->args[0] = maddr;
mcl->args[1] = *(unsigned long *)&desc;
} else {
+ u32 *p = (u32 *)&desc;
+
mcl->args[0] = maddr;
mcl->args[1] = maddr >> 32;
mcl->args[2] = *p++;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7032f4d8dff3..f65d12504e80 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,12 +153,6 @@
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
/*
- * HV_VP_SET available
- */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
-
-
-/*
* Crash notification flag.
*/
#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f8ae286c1502..079535e53e2a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1373,7 +1373,7 @@ static void __init acpi_reduced_hw_init(void)
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
*/
-static struct dmi_system_id __initdata acpi_dmi_table[] = {
+static const struct dmi_system_id acpi_dmi_table[] __initconst = {
/*
* Boxes that need ACPI disabled
*/
@@ -1448,7 +1448,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
};
/* second table for DMI checks that should run after early-quirks */
-static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
+static const struct dmi_system_id acpi_dmi_table_late[] __initconst = {
/*
* HP laptops which use a DSDT reporting as HP/SB400/10000,
* which includes some code which overrides all temperature
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 446b0d3d4932..e4b0d92b3ae0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2043,7 +2043,7 @@ static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d)
return 0;
}
-static struct dmi_system_id __initdata apm_dmi_table[] = {
+static const struct dmi_system_id apm_dmi_table[] __initconst = {
{
print_if_true,
KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9862e2cd6d93..d58184b7cd44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
}
}
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+ /*
+ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+ * all up to and including B1.
+ */
+ if (c->x86_model <= 1 && c->x86_mask <= 1)
+ set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
+ case 0x17: init_amd_zn(c); break;
}
/* Enable workaround for FXSAVE leak */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
void __init check_bugs(void)
{
-#ifdef CONFIG_X86_32
- /*
- * Regardless of whether PCID is enumerated, the SDM says
- * that it can't be enabled in 32-bit mode.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
identify_boot_cpu();
if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb1d3358a4af..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
__setup("nompx", x86_mpx_setup);
#ifdef CONFIG_X86_64
-static int __init x86_pcid_setup(char *s)
+static int __init x86_nopcid_setup(char *s)
{
- /* require an exact match without trailing characters */
- if (strlen(s))
- return 0;
+ /* nopcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
/* do not emit a message if the feature is not present */
if (!boot_cpu_has(X86_FEATURE_PCID))
- return 1;
+ return 0;
setup_clear_cpu_cap(X86_FEATURE_PCID);
pr_info("nopcid: PCID feature disabled\n");
- return 1;
+ return 0;
}
-__setup("nopcid", x86_pcid_setup);
+early_param("nopcid", x86_nopcid_setup);
#endif
static int __init x86_noinvpcid_setup(char *s)
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
-static void setup_pcid(struct cpuinfo_x86 *c)
-{
- if (cpu_has(c, X86_FEATURE_PCID)) {
- if (cpu_has(c, X86_FEATURE_PGE)) {
- /*
- * We'd like to use cr4_set_bits_and_update_boot(),
- * but we can't. CR4.PCIDE is special and can only
- * be set in long mode, and the early CPU init code
- * doesn't know this and would try to restore CR4.PCIDE
- * prior to entering long mode.
- *
- * Instead, we rely on the fact that hotplug, resume,
- * etc all fully restore CR4 before they write anything
- * that could have nonzero PCID bits to CR3. CR4.PCIDE
- * has no effect on the page tables themselves, so we
- * don't need it to be restored early.
- */
- cr4_set_bits(X86_CR4_PCIDE);
- } else {
- /*
- * flush_tlb_all(), as currently implemented, won't
- * work if PCID is on but PGE is not. Since that
- * combination doesn't exist on real hardware, there's
- * no reason to try to fully support it, but it's
- * polite to avoid corrupting data if we're on
- * an improperly configured VM.
- */
- clear_cpu_cap(c, X86_FEATURE_PCID);
- }
- }
-}
-
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -936,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
@@ -1175,9 +1151,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c);
setup_smap(c);
- /* Set up PCID */
- setup_pcid(c);
-
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3b3f713e15e5..236324e83a3a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
void hv_setup_vmbus_irq(void (*handler)(void))
{
vmbus_handler = handler;
- /* Setup the IDT for hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
}
void hv_remove_vmbus_irq(void)
@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
*/
x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops();
+ /* Setup the IDT for hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
#endif
}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable();
- if (fpu->fpregs_active) {
+ if (fpu->initialized) {
/*
* Ignore return value -- we don't care if reg state
* is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
{
struct fpu *fpu = &current->thread.fpu;
- if (fpu->fpregs_active)
+ if (fpu->initialized)
copy_kernel_to_fpregs(&fpu->state);
kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
preempt_disable();
trace_x86_fpu_before_save(fpu);
- if (fpu->fpregs_active) {
+ if (fpu->initialized) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(&fpu->state);
}
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
- if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+ if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
* Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying.
- * In lazy mode, if the FPU context isn't loaded into
- * fpregs, CR0.TS will be set and do_device_not_available
- * will load the FPU context.
*
- * We have to do all this with preemption disabled,
- * mostly because of the FNSAVE case, because in that
- * case we must not allow preemption in the window
- * between the FNSAVE and us marking the context lazy.
- *
- * It shouldn't be an issue as even FNSAVE is plenty
- * fast in terms of critical section length.
+ * ( The function 'fails' in the FNSAVE case, which destroys
+ * register contents so we have to copy them back. )
*/
- preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
- memcpy(&src_fpu->state, &dst_fpu->state,
- fpu_kernel_xstate_size);
-
+ memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
copy_kernel_to_fpregs(&src_fpu->state);
}
- preempt_enable();
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Activate the current task's in-memory FPU context,
* if it has not been used before:
*/
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- if (!fpu->fpstate_active) {
+ if (!fpu->initialized) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
trace_x86_fpu_activate_state(fpu);
/* Safe to do for the current task: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
/*
* This function must be called before we read a task's fpstate.
*
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ * to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ * registers were already saved by the context-switch code when
+ * the task scheduled out - we only have to initialize the registers
+ * if they've never been initialized.
*
* If the task has used the FPU before then save it.
*/
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
{
- /*
- * If fpregs are active (in the current CPU), then
- * copy them to the fpstate:
- */
- if (fpu->fpregs_active) {
+ if (fpu == &current->thread.fpu) {
fpu__save(fpu);
} else {
- if (!fpu->fpstate_active) {
+ if (!fpu->initialized) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
trace_x86_fpu_activate_state(fpu);
/* Safe to do for current and for stopped child tasks: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
}
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
/*
* This function must be called before we write a task's fpstate.
*
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
* If the task has not used the FPU before then initialize its fpstate.
*
* After this function call, after registers in the fpstate are
* modified and the child task has woken up, the child task will
* restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
* state pending on its former CPU could be restored, corrupting
* the modifications.
*/
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
{
/*
* Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
*/
WARN_ON_FPU(fpu == &current->thread.fpu);
- if (fpu->fpstate_active) {
- /* Invalidate any lazy state: */
+ if (fpu->initialized) {
+ /* Invalidate any cached state: */
__fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(&fpu->state);
@@ -310,74 +300,11 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
trace_x86_fpu_activate_state(fpu);
/* Safe to do for stopped child tasks: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'. Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
- struct fpu *fpu = &current->thread.fpu;
-
- /*
- * Ensure that the context-switching code does not write
- * over the fpstate while we are doing our update.
- */
- preempt_disable();
-
- /*
- * Move the fpregs in to the fpu's 'fpstate'.
- */
- fpu__activate_fpstate_read(fpu);
-
- /*
- * The caller is about to write to 'fpu'. Ensure that no
- * CPU thinks that its fpregs match the fpstate. This
- * ensures we will not be lazy and skip a XRSTOR in the
- * future.
- */
- __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
- struct fpu *fpu = &current->thread.fpu;
-
- /*
- * 'fpu' now has an updated copy of the state, but the
- * registers may still be out of date. Update them with
- * an XRSTOR if they are active.
- */
- if (fpregs_active())
- copy_kernel_to_fpregs(&fpu->state);
-
- /*
- * Our update is done and the fpregs/fpstate are in sync
- * if necessary. Context switches can happen again.
- */
- preempt_enable();
-}
-
-/*
* 'fpu__restore()' is called to copy FPU registers from
* the FPU fpstate to the live hw registers and to activate
* access to the hardware registers, so that FPU instructions
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
*/
void fpu__restore(struct fpu *fpu)
{
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- if (fpu->fpregs_active) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- fpregs_deactivate(fpu);
+ if (fpu == &current->thread.fpu) {
+ if (fpu->initialized) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ fpregs_deactivate(fpu);
+ }
}
- fpu->fpstate_active = 0;
+ fpu->initialized = 0;
trace_x86_fpu_dropped(fpu);
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
* Make sure fpstate is cleared and initialized.
*/
if (static_cpu_has(X86_FEATURE_FPU)) {
- fpu__activate_curr(fpu);
+ preempt_disable();
+ fpu__initialize(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
+ preempt_enable();
}
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..7affb7e3d9a5 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- WARN_ON_FPU(current->thread.fpu.fpstate_active);
+ WARN_ON_FPU(current->thread.fpu.initialized);
}
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..3ea151372389 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
{
struct fpu *target_fpu = &target->thread.fpu;
- return target_fpu->fpstate_active ? regset->n : 0;
+ return target_fpu->initialized ? regset->n : 0;
}
int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
struct fpu *target_fpu = &target->thread.fpu;
- if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+ if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
return regset->n;
else
return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
fpstate_sanitize_xstate(fpu);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
xsave = &fpu->state.xsave;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
if (using_compacted_format()) {
- ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+ if (kbuf)
+ ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+ else
+ ret = copy_xstate_to_user(ubuf, xsave, pos, count);
} else {
fpstate_sanitize_xstate(fpu);
/*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
xsave = &fpu->state.xsave;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- ret = copyin_to_xsaves(kbuf, ubuf, xsave);
- else
+ if (using_compacted_format()) {
+ if (kbuf)
+ ret = copy_kernel_to_xstate(xsave, kbuf);
+ else
+ ret = copy_user_to_xstate(xsave, ubuf);
+ } else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
- /*
- * In case of failure, mark all states as init:
- */
- if (ret)
- fpstate_init(&fpu->state);
+ if (!ret)
+ ret = validate_xstate_header(&xsave->header);
+ }
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
- xsave->header.xfeatures &= xfeatures_mask;
+
/*
- * These bits must be zero.
+ * In case of failure, mark all states as init:
*/
- memset(&xsave->header.reserved, 0, 48);
+ if (ret)
+ fpstate_init(&fpu->state);
return ret;
}
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct fpu *fpu = &target->thread.fpu;
struct user_i387_ia32_struct env;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
struct user_i387_ia32_struct env;
int ret;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
struct fpu *fpu = &tsk->thread.fpu;
int fpvalid;
- fpvalid = fpu->fpstate_active;
+ fpvalid = fpu->initialized;
if (fpvalid)
fpvalid = !fpregs_get(tsk, NULL,
0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..fb639e70048f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
*/
int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
- struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct fpu *fpu = &current->thread.fpu;
+ struct xregs_state *xsave = &fpu->state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
- if (fpregs_active() || using_compacted_format()) {
+ if (fpu->initialized || using_compacted_format()) {
/* Save the live register state to the user directly. */
if (copy_fpregs_to_sigframe(buf_fx))
return -1;
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
- copy_fxregs_to_kernel(&tsk->thread.fpu);
+ copy_fxregs_to_kernel(fpu);
} else {
/*
* It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
return -1;
}
- fpstate_sanitize_xstate(&tsk->thread.fpu);
+ fpstate_sanitize_xstate(fpu);
if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
return -1;
}
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
struct xstate_header *header = &xsave->header;
if (use_xsave()) {
- /* These bits must be zero. */
- memset(header->reserved, 0, 48);
+ /*
+ * Note: we don't need to zero the reserved bits in the
+ * xstate_header here because we either didn't copy them at all,
+ * or we checked earlier that they aren't set.
+ */
/*
* Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (fx_only)
header->xfeatures = XFEATURE_MASK_FPSSE;
else
- header->xfeatures &= (xfeatures_mask & xfeatures);
+ header->xfeatures &= xfeatures;
}
if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(VERIFY_READ, buf, size))
return -EACCES;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
/*
* For 32-bit frames with fxstate, copy the user state to the
* thread's fpu state, reconstruct fxstate from the fsave
- * header. Sanitize the copied state etc.
+ * header. Validate and sanitize the copied state.
*/
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
/*
- * Drop the current fpu which clears fpu->fpstate_active. This ensures
+ * Drop the current fpu which clears fpu->initialized. This ensures
* that any context-switch during the copy of the new state,
* avoids the intermediate state from getting restored/saved.
* Thus avoiding the new restored state from getting corrupted.
* We will be ready to restore/save the state only after
- * fpu->fpstate_active is again set.
+ * fpu->initialized is again set.
*/
fpu__drop(fpu);
if (using_compacted_format()) {
- err = copyin_to_xsaves(NULL, buf_fx,
- &fpu->state.xsave);
+ err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
} else {
- err = __copy_from_user(&fpu->state.xsave,
- buf_fx, state_size);
+ err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+ if (!err && state_size > offsetof(struct xregs_state, header))
+ err = validate_xstate_header(&fpu->state.xsave.header);
}
if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
preempt_disable();
fpu__restore(fpu);
preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..f1d5476c9022 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
return boot_cpu_has(X86_FEATURE_XSAVES);
}
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+ /* No unknown or supervisor features may be set */
+ if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+ return -EINVAL;
+
+ /* Userspace must use the uncompacted format */
+ if (hdr->xcomp_bv)
+ return -EINVAL;
+
+ /*
+ * If 'reserved' is shrunken to add a new field, make sure to validate
+ * that new field here!
+ */
+ BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+ /* No reserved bits may be set */
+ if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+ return -EINVAL;
+
+ return 0;
+}
+
static void __xstate_dump_leaves(void)
{
int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
{
struct fpu *fpu = &current->thread.fpu;
- if (!fpu->fpstate_active)
+ if (!fpu->initialized)
return NULL;
/*
* fpu__save() takes the CPU's xstate registers
@@ -921,38 +945,129 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+ if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+ return false;
+
+ if (xfeatures & XFEATURE_MASK_FP)
+ return false;
+
+ return true;
+}
+
+/*
* This is similar to user_regset_copyout(), but will not add offset to
* the source data pointer or increment pos, count, kbuf, and ubuf.
*/
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf,
- const void *data, const int start_pos,
- const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+ unsigned int offset, unsigned int size, unsigned int size_total)
{
- if ((count == 0) || (pos < start_pos))
- return 0;
+ if (offset < size_total) {
+ unsigned int copy = min(size, size_total - offset);
- if (end_pos < 0 || pos < end_pos) {
- unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+ memcpy(kbuf + offset, data, copy);
+ }
+}
- if (kbuf) {
- memcpy(kbuf + pos, data, copy);
- } else {
- if (__copy_to_user(ubuf + pos, data, copy))
- return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+ unsigned int offset, size;
+ struct xstate_header header;
+ int i;
+
+ /*
+ * Currently copy_regset_to_user() starts from pos 0:
+ */
+ if (unlikely(offset_start != 0))
+ return -EFAULT;
+
+ /*
+ * The destination is a ptrace buffer; we put in only user xstates:
+ */
+ memset(&header, 0, sizeof(header));
+ header.xfeatures = xsave->header.xfeatures;
+ header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+ /*
+ * Copy xregs_state->header:
+ */
+ offset = offsetof(struct xregs_state, header);
+ size = sizeof(header);
+
+ __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ /*
+ * Copy only in-use xstates:
+ */
+ if ((header.xfeatures >> i) & 1) {
+ void *src = __raw_xsave_addr(xsave, 1 << i);
+
+ offset = xstate_offsets[i];
+ size = xstate_sizes[i];
+
+ /* The next component has to fit fully into the output buffer: */
+ if (offset + size > size_total)
+ break;
+
+ __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
}
+
+ }
+
+ if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+ }
+
+ /*
+ * Fill xsave->i387.sw_reserved value for ptrace frame:
+ */
+ offset = offsetof(struct fxregs_state, sw_reserved);
+ size = sizeof(xstate_fx_sw_bytes);
+
+ __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+ return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+ if (!size)
+ return 0;
+
+ if (offset < size_total) {
+ unsigned int copy = min(size, size_total - offset);
+
+ if (__copy_to_user(ubuf + offset, data, copy))
+ return -EFAULT;
}
return 0;
}
/*
* Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
* zero. This is called from xstateregs_get() and there we check the CPU
* has XSAVES.
*/
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
- void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
{
unsigned int offset, size;
int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
/*
* Currently copy_regset_to_user() starts from pos 0:
*/
- if (unlikely(pos != 0))
+ if (unlikely(offset_start != 0))
return -EFAULT;
/*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
offset = offsetof(struct xregs_state, header);
size = sizeof(header);
- ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+ ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
if (ret)
return ret;
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
offset = xstate_offsets[i];
size = xstate_sizes[i];
- ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+ /* The next component has to fit fully into the output buffer: */
+ if (offset + size > size_total)
+ break;
+ ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
if (ret)
return ret;
-
- if (offset + size >= count)
- break;
}
}
+ if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+ }
+
/*
* Fill xsave->i387.sw_reserved value for ptrace frame:
*/
offset = offsetof(struct fxregs_state, sw_reserved);
size = sizeof(xstate_fx_sw_bytes);
- ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+ ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
if (ret)
return ret;
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
}
/*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
*/
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
- struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
{
unsigned int offset, size;
int i;
- u64 xfeatures;
- u64 allowed_features;
+ struct xstate_header hdr;
offset = offsetof(struct xregs_state, header);
- size = sizeof(xfeatures);
+ size = sizeof(hdr);
- if (kbuf) {
- memcpy(&xfeatures, kbuf + offset, size);
- } else {
- if (__copy_from_user(&xfeatures, ubuf + offset, size))
- return -EFAULT;
+ memcpy(&hdr, kbuf + offset, size);
+
+ if (validate_xstate_header(&hdr))
+ return -EINVAL;
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ u64 mask = ((u64)1 << i);
+
+ if (hdr.xfeatures & mask) {
+ void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+ offset = xstate_offsets[i];
+ size = xstate_sizes[i];
+
+ memcpy(dst, kbuf + offset, size);
+ }
+ }
+
+ if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
}
/*
- * Reject if the user sets any disabled or supervisor features:
+ * The state that came in from userspace was user-state only.
+ * Mask all the user states out of 'xfeatures':
+ */
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+ /*
+ * Add back in the features that came in from userspace:
*/
- allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures |= hdr.xfeatures;
- if (xfeatures & ~allowed_features)
+ return 0;
+}
+
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+ unsigned int offset, size;
+ int i;
+ struct xstate_header hdr;
+
+ offset = offsetof(struct xregs_state, header);
+ size = sizeof(hdr);
+
+ if (__copy_from_user(&hdr, ubuf + offset, size))
+ return -EFAULT;
+
+ if (validate_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
u64 mask = ((u64)1 << i);
- if (xfeatures & mask) {
+ if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, 1 << i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
- if (kbuf) {
- memcpy(dst, kbuf + offset, size);
- } else {
- if (__copy_from_user(dst, ubuf + offset, size))
- return -EFAULT;
- }
+ if (__copy_from_user(dst, ubuf + offset, size))
+ return -EFAULT;
}
}
+ if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+ return -EFAULT;
+ }
+
/*
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
/*
* Add back in the features that came in from userspace:
*/
- xsave->header.xfeatures |= xfeatures;
+ xsave->header.xfeatures |= hdr.xfeatures;
return 0;
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 50c89e8a95f2..7ebcc4a74438 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -58,7 +58,7 @@ static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
* Quirk table for systems that misbehave (lock up, etc.) if port
* 0x80 is used:
*/
-static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+static const struct dmi_system_id io_delay_0xed_port_dmi_table[] __initconst = {
{
.callback = dmi_io_delay_0xed_port,
.ident = "Compaq Presario V6000",
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..d4eb450144fd 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
static inline void *current_stack(void)
{
- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
/* Save the next esp at the bottom of the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp);
}
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592ca9e47..8c1cc08f514f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
return 0;
out_clean_nodes:
- for (j = i - 1; j > 0; j--)
+ for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
out_setup_data_kobj:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 874827b0d7ca..8bb9594d0761 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
return NULL;
}
-void kvm_async_pf_task_wait(u32 token)
+/*
+ * @interrupt_kernel: Is this called from a routine which interrupts the kernel
+ * (other than user space)?
+ */
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
{
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,7 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) || preempt_count() > 1;
+ n.halted = is_idle_task(current) ||
+ (IS_ENABLED(CONFIG_PREEMPT_COUNT)
+ ? preempt_count() > 1 || rcu_preempt_depth()
+ : interrupt_kernel);
init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock);
@@ -180,7 +187,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
hlist_del_init(&n->link);
if (n->halted)
smp_send_reschedule(n->cpu);
- else if (swait_active(&n->wq))
+ else if (swq_has_sleeper(&n->wq))
swake_up(&n->wq);
}
@@ -268,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2());
+ kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a14df9eecfed..19a3e8f961c7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_debugreg = native_set_debugreg,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
- .read_cr4 = native_read_cr4,
.write_cr4 = native_write_cr4,
#ifdef CONFIG_X86_64
.read_cr8 = native_read_cr8,
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
.load_idt = native_load_idt,
- .store_idt = native_store_idt,
.store_tr = native_store_tr,
.load_tls = native_load_tls,
#ifdef CONFIG_X86_64
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.set_pte = native_set_pte,
.set_pte_at = native_set_pte_at,
.set_pmd = native_set_pmd,
- .set_pmd_at = native_set_pmd_at,
- .pte_update = paravirt_nop,
.ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
- .set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54984b142641..54180fa6f66f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -150,7 +150,7 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
/*
* This is a single dmi_table handling all reboot quirks.
*/
-static struct dmi_system_id __initdata reboot_dmi_table[] = {
+static const struct dmi_system_id reboot_dmi_table[] __initconst = {
/* Acer */
{ /* Handle reboot issue on Acer Aspire one */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d84afb0a322d..0957dd73d127 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
* with the current CR4 value. This may not be necessary, but
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
+ *
+ * Mask off features that don't work outside long mode (just
+ * PCIDE for now).
*/
- mmu_cr4_features = __read_cr4();
+ mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
memblock_set_current_limit(get_max_mapped());
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..4e188fda5961 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = (unsigned long) ka->sa.sa_restorer;
}
- if (fpu->fpstate_active) {
+ if (fpu->initialized) {
sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L;
/* save i387 and extended state */
- if (fpu->fpstate_active &&
+ if (fpu->initialized &&
copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- if (fpu->fpstate_active)
+ if (fpu->initialized)
fpu__clear(fpu);
}
signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index cd6622c3204e..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -226,16 +226,12 @@ static int enable_start_cpu0;
static void notrace start_secondary(void *unused)
{
/*
- * Don't put *anything* before cpu_init(), SMP booting is too
- * fragile that we want to limit the things done here to the
- * most necessary things.
+ * Don't put *anything* except direct CPU state initialization
+ * before cpu_init(), SMP booting is too fragile that we want to
+ * limit the things done here to the most necessary things.
*/
- cpu_init();
- x86_cpuinit.early_percpu_clock_init();
- preempt_disable();
- smp_callin();
-
- enable_start_cpu0 = 0;
+ if (boot_cpu_has(X86_FEATURE_PCID))
+ __write_cr4(__read_cr4() | X86_CR4_PCIDE);
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
@@ -243,6 +239,13 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
#endif
+ cpu_init();
+ x86_cpuinit.early_percpu_clock_init();
+ preempt_disable();
+ smp_callin();
+
+ enable_start_cpu0 = 0;
+
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..67db4f43309e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* from double_fault.
*/
BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer()) >= THREAD_SIZE);
+ current_stack_pointer) >= THREAD_SIZE);
preempt_enable_no_resched();
}
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3ea624452f93..3c48bc8bf08c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
depends on HIGH_RES_TIMERS
# for TASKSTATS/TASK_DELAY_ACCT:
depends on NET && MULTIUSER
+ depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 1ea3c0e1e3a9..0bc5c1315708 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -59,7 +59,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
{
unsigned x86_leaf = x86_feature / 32;
- BUILD_BUG_ON(!__builtin_constant_p(x86_leaf));
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..d90cdc77e077 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -425,8 +425,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
#op " %al \n\t" \
FOP_RET
-asm(".global kvm_fastop_exception \n"
- "kvm_fastop_exception: xor %esi, %esi; ret");
+asm(".pushsection .fixup, \"ax\"\n"
+ ".global kvm_fastop_exception \n"
+ "kvm_fastop_exception: xor %esi, %esi; ret\n"
+ ".popsection");
FOP_START(setcc)
FOP_SETCC(seto)
@@ -4102,10 +4104,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
if (efer & EFER_LMA) {
u64 maxphyaddr;
- u32 eax = 0x80000008;
+ u32 eax, ebx, ecx, edx;
- if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
- NULL, false))
+ eax = 0x80000008;
+ ecx = 0;
+ if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
+ &edx, false))
maxphyaddr = eax & 0xff;
else
maxphyaddr = 36;
@@ -5296,7 +5300,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
if (!(ctxt->d & ByteOp))
@@ -5304,7 +5307,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
+ [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index aaf10b6f5380..69c5612be786 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1324,6 +1324,10 @@ static void apic_timer_expired(struct kvm_lapic *apic)
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
+ /*
+ * For x86, the atomic_inc() is serialized, thus
+ * using swait_active() is safe.
+ */
if (swait_active(q))
swake_up(q);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca30c1eb1d9..106d4a029a8a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
case KVM_PV_REASON_PAGE_NOT_PRESENT:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
- kvm_async_pf_task_wait(fault_address);
+ kvm_async_pf_task_wait(fault_address, 0);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2c1cfe68a9af..0e68f0b3cbf7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1200,7 +1200,6 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
- svm->vcpu.arch.apicv_active = true;
}
static void init_vmcb(struct vcpu_svm *svm)
@@ -1316,7 +1315,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_PAUSE);
}
- if (avic)
+ if (kvm_vcpu_apicv_active(&svm->vcpu))
avic_init_vmcb(svm);
/*
@@ -1600,6 +1599,23 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
}
+static int avic_init_vcpu(struct vcpu_svm *svm)
+{
+ int ret;
+
+ if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ return 0;
+
+ ret = avic_init_backing_page(&svm->vcpu);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&svm->ir_list);
+ spin_lock_init(&svm->ir_list_lock);
+
+ return ret;
+}
+
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
{
struct vcpu_svm *svm;
@@ -1636,14 +1652,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (!hsave_page)
goto free_page3;
- if (avic) {
- err = avic_init_backing_page(&svm->vcpu);
- if (err)
- goto free_page4;
-
- INIT_LIST_HEAD(&svm->ir_list);
- spin_lock_init(&svm->ir_list_lock);
- }
+ err = avic_init_vcpu(svm);
+ if (err)
+ goto free_page4;
/* We initialize this flag to true to make sure that the is_running
* bit would be set the first time the vcpu is loaded.
@@ -4395,9 +4406,9 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
return;
}
-static bool svm_get_enable_apicv(void)
+static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
{
- return avic;
+ return avic && irqchip_split(vcpu->kvm);
}
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
@@ -4414,7 +4425,7 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- if (!avic)
+ if (!kvm_vcpu_apicv_active(&svm->vcpu))
return;
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
@@ -5302,6 +5313,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
*/
if (info->rep_prefix != REPE_PREFIX)
goto out;
+ break;
case SVM_EXIT_IOIO: {
u64 exit_info;
u32 bytes;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4253adef9044..a2b804e10c95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
+ unsigned long vmcs_host_cr3; /* May not match real cr3 */
+ unsigned long vmcs_host_cr4; /* May not match real cr4 */
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -600,8 +602,6 @@ struct vcpu_vmx {
int gs_ldt_reload_needed;
int fs_reload_needed;
u64 msr_host_bndcfgs;
- unsigned long vmcs_host_cr3; /* May not match real cr3 */
- unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state;
struct {
int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new;
unsigned int dest;
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ /*
+ * In case of hot-plug or hot-unplug, we may have to undo
+ * vmx_vcpu_pi_put even if there is no assigned device. And we
+ * always keep PI.NDST up to date for simplicity: it makes the
+ * code easier, and CPU migration is not a fast path.
+ */
+ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
return;
+ /*
+ * First handle the simple case where no cmpxchg is necessary; just
+ * allow posting non-urgent interrupts.
+ *
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+ * expects the VCPU to be on the blocked_vcpu_list that matches
+ * PI.NDST.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+ vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
+ return;
+ }
+
+ /* The full case. */
do {
old.control = new.control = pi_desc->control;
- /*
- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
- * are two possible cases:
- * 1. After running 'pre_block', context switch
- * happened. For this case, 'sn' was set in
- * vmx_vcpu_put(), so we need to clear it here.
- * 2. After running 'pre_block', we were blocked,
- * and woken up by some other guy. For this case,
- * we don't need to do anything, 'pi_post_block'
- * will do everything for us. However, we cannot
- * check whether it is case #1 or case #2 here
- * (maybe, not needed), so we also clear sn here,
- * I think it is not a big deal.
- */
- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
- if (vcpu->cpu != cpu) {
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
- }
+ dest = cpu_physical_id(cpu);
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- }
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
- /* Allow posting non-urgent interrupts */
new.sn = 0;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
}
static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5012,7 +5010,7 @@ static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_activ
}
}
-static bool vmx_get_enable_apicv(void)
+static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
{
return enable_apicv;
}
@@ -5077,21 +5075,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
if (vcpu->mode == IN_GUEST_MODE) {
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
/*
- * Currently, we don't support urgent interrupt,
- * all interrupts are recognized as non-urgent
- * interrupt, so we cannot post interrupts when
- * 'SN' is set.
+ * The vector of interrupt to be delivered to vcpu had
+ * been set in PIR before this function.
*
- * If the vcpu is in guest mode, it means it is
- * running instead of being scheduled out and
- * waiting in the run queue, and that's the only
- * case when 'SN' is set currently, warning if
- * 'SN' is set.
+ * Following cases will be reached in this block, and
+ * we always send a notification event in all cases as
+ * explained below.
+ *
+ * Case 1: vcpu keeps in non-root mode. Sending a
+ * notification event posts the interrupt to vcpu.
+ *
+ * Case 2: vcpu exits to root mode and is still
+ * runnable. PIR will be synced to vIRR before the
+ * next vcpu entry. Sending a notification event in
+ * this case has no effect, as vcpu is not in root
+ * mode.
+ *
+ * Case 3: vcpu exits to root mode and is blocked.
+ * vcpu_block() has already synced PIR to vIRR and
+ * never blocks vcpu if vIRR is not cleared. Therefore,
+ * a blocked vcpu here does not wait for any requested
+ * interrupts in PIR, and sending a notification event
+ * which has no effect is safe here.
*/
- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
return true;
@@ -5169,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
*/
cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
- vmx->host_state.vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
- vmx->host_state.vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64
@@ -5192,7 +5199,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- native_store_idt(&dt);
+ store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
vmx->host_idt_base = dt.address;
@@ -8344,12 +8351,14 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
- vmcs_readl(EXIT_QUALIFICATION),
- vmx->idt_vectoring_info,
- intr_info,
- vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
- KVM_ISA_VMX);
+ if (vmx->nested.nested_run_pending)
+ return false;
+
+ if (unlikely(vmx->fail)) {
+ pr_info_ratelimited("%s failed vm entry %x\n", __func__,
+ vmcs_read32(VM_INSTRUCTION_ERROR));
+ return true;
+ }
/*
* The host physical addresses of some pages of guest memory
@@ -8363,14 +8372,12 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
*/
nested_mark_vmcs12_pages_dirty(vcpu);
- if (vmx->nested.nested_run_pending)
- return false;
-
- if (unlikely(vmx->fail)) {
- pr_info_ratelimited("%s failed vm entry %x\n", __func__,
- vmcs_read32(VM_INSTRUCTION_ERROR));
- return true;
- }
+ trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
+ vmcs_readl(EXIT_QUALIFICATION),
+ vmx->idt_vectoring_info,
+ intr_info,
+ vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+ KVM_ISA_VMX);
switch (exit_reason) {
case EXIT_REASON_EXCEPTION_NMI:
@@ -9036,7 +9043,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- register void *__sp asm(_ASM_SP);
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9071,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
:
[entry]"r"(entry),
[ss]"i"(__KERNEL_DS),
@@ -9265,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+ if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
vmcs_writel(HOST_CR3, cr3);
- vmx->host_state.vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
}
cr4 = cr4_read_shadow();
- if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+ if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
- vmx->host_state.vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
}
/* When single-stepping over STI and MOV SS, we must clear the
@@ -9424,12 +9430,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
| (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
- vmx->loaded_vmcs->launched = 1;
-
- vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
-
/*
* eager fpu is enabled if PKEY is supported and CR4 is switched
* back on host, so it is safe to read guest PKRU from current
@@ -9451,6 +9451,14 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
vmx->nested.nested_run_pending = 0;
+ vmx->idt_vectoring_info = 0;
+
+ vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+ if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+ return;
+
+ vmx->loaded_vmcs->launched = 1;
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
@@ -9581,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+ /*
+ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+ * or POSTED_INTR_WAKEUP_VECTOR.
+ */
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+ vmx->pi_desc.sn = 1;
+
return &vmx->vcpu;
free_vmcs:
@@ -9829,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
WARN_ON(!is_guest_mode(vcpu));
- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+ if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+ !to_vmx(vcpu)->nested.nested_run_pending) {
vmcs12->vm_exit_intr_error_code = fault->error_code;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -10525,6 +10541,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (exec_control & CPU_BASED_TPR_SHADOW) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ } else {
+#ifdef CONFIG_X86_64
+ exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING;
+#endif
}
/*
@@ -11388,46 +11409,30 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u32 vm_inst_error = 0;
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ /*
+ * The only expected VM-instruction error is "VM entry with
+ * invalid control field(s)." Anything else indicates a
+ * problem with L0.
+ */
+ WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
+ VMXERR_ENTRY_INVALID_CONTROL_FIELD));
+
leave_guest_mode(vcpu);
- prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
- exit_qualification);
- if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
- vmcs12->vm_exit_msr_store_count))
- nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ if (likely(!vmx->fail)) {
+ prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
+ exit_qualification);
- if (unlikely(vmx->fail))
- vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+ if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
+ vmcs12->vm_exit_msr_store_count))
+ nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ }
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-
- /*
- * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of
- * the VM-exit interrupt information (valid interrupt) is always set to
- * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need
- * kvm_cpu_has_interrupt(). See the commit message for details.
- */
- if (nested_exit_intr_ack_set(vcpu) &&
- exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
- kvm_cpu_has_interrupt(vcpu)) {
- int irq = kvm_cpu_get_interrupt(vcpu);
- WARN_ON(irq < 0);
- vmcs12->vm_exit_intr_info = irq |
- INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
- }
-
- trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
- vmcs12->exit_qualification,
- vmcs12->idt_vectoring_info_field,
- vmcs12->vm_exit_intr_info,
- vmcs12->vm_exit_intr_error_code,
- KVM_ISA_VMX);
-
vm_entry_controls_reset_shadow(vmx);
vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
@@ -11436,8 +11441,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (VMCS02_POOL_SIZE == 0)
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
- load_vmcs12_host_state(vcpu, vmcs12);
-
/* Update any VMCS fields that might have changed while L2 ran */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
@@ -11486,21 +11489,57 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
- /*
- * Exiting from L2 to L1, we're now back to L1 which thinks it just
- * finished a VMLAUNCH or VMRESUME instruction, so we need to set the
- * success or failure flag accordingly.
- */
- if (unlikely(vmx->fail)) {
- vmx->fail = 0;
- nested_vmx_failValid(vcpu, vm_inst_error);
- } else
- nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+ if (likely(!vmx->fail)) {
+ /*
+ * TODO: SDM says that with acknowledge interrupt on
+ * exit, bit 31 of the VM-exit interrupt information
+ * (valid interrupt) is always set to 1 on
+ * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
+ * need kvm_cpu_has_interrupt(). See the commit
+ * message for details.
+ */
+ if (nested_exit_intr_ack_set(vcpu) &&
+ exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+ kvm_cpu_has_interrupt(vcpu)) {
+ int irq = kvm_cpu_get_interrupt(vcpu);
+ WARN_ON(irq < 0);
+ vmcs12->vm_exit_intr_info = irq |
+ INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
+ }
+
+ trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
+ vmcs12->exit_qualification,
+ vmcs12->idt_vectoring_info_field,
+ vmcs12->vm_exit_intr_info,
+ vmcs12->vm_exit_intr_error_code,
+ KVM_ISA_VMX);
+
+ load_vmcs12_host_state(vcpu, vmcs12);
+
+ return;
+ }
+
+ /*
+ * After an early L2 VM-entry failure, we're now back
+ * in L1 which thinks it just finished a VMLAUNCH or
+ * VMRESUME instruction, so we need to set the failure
+ * flag and the VM-instruction error field of the VMCS
+ * accordingly.
+ */
+ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ /*
+ * The emulated instruction was already skipped in
+ * nested_vmx_run, but the updated RIP was never
+ * written back to the vmcs01.
+ */
+ skip_emulated_instruction(vcpu);
+ vmx->fail = 0;
}
/*
@@ -11671,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ do {
+ old.control = new.control = pi_desc->control;
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+}
+
/*
* This routine does the following things for vCPU which is going
* to be blocked if VT-d PI is enabled.
@@ -11686,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
*/
static int pi_pre_block(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
unsigned int dest;
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11696,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
!kvm_vcpu_apicv_active(vcpu))
return 0;
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
do {
old.control = new.control = pi_desc->control;
- /*
- * We should not block the vCPU if
- * an interrupt is posted for it.
- */
- if (pi_test_on(pi_desc) == 1) {
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
-
- return 1;
- }
-
WARN((pi_desc->sn == 1),
"Warning: SN field of posted-interrupts "
"is set before blocking\n");
@@ -11745,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
- return 0;
+ /* We should not block the vCPU if an interrupt is posted for it. */
+ if (pi_test_on(pi_desc) == 1)
+ __pi_post_block(vcpu);
+
+ local_irq_enable();
+ return (vcpu->pre_pcpu == -1);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11764,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
static void pi_post_block(struct kvm_vcpu *vcpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
- unsigned long flags;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (vcpu->pre_pcpu == -1)
return;
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if(vcpu->pre_pcpu != -1) {
- spin_lock_irqsave(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
- }
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ __pi_post_block(vcpu);
+ local_irq_enable();
}
static void vmx_post_block(struct kvm_vcpu *vcpu)
@@ -11829,7 +11858,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct vcpu_data vcpu_info;
- int idx, ret = -EINVAL;
+ int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
@@ -11838,7 +11867,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
idx = srcu_read_lock(&kvm->irq_srcu);
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+ if (guest_irq >= irq_rt->nr_rt_entries ||
+ hlist_empty(&irq_rt->map[guest_irq])) {
+ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+ guest_irq, irq_rt->nr_rt_entries);
+ goto out;
+ }
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
if (e->type != KVM_IRQ_ROUTING_MSI)
@@ -11881,12 +11915,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
if (set)
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else {
- /* suppress notification event before unposting */
- pi_set_sn(vcpu_to_pi_desc(vcpu));
+ else
ret = irq_set_vcpu_affinity(host_irq, NULL);
- pi_clear_sn(vcpu_to_pi_desc(vcpu));
- }
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6069af86da3b..03869eb7fcd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,16 +7225,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r;
sigset_t sigsaved;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+ if (kvm_run->immediate_exit) {
+ r = -EINTR;
+ goto out;
+ }
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
r = -EAGAIN;
+ if (signal_pending(current)) {
+ r = -EINTR;
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
+ ++vcpu->stat.signal_exits;
+ }
goto out;
}
@@ -7971,7 +7980,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
BUG_ON(vcpu->kvm == NULL);
kvm = vcpu->kvm;
- vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
+ vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
@@ -8452,6 +8461,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (vcpu->arch.pv.pv_unhalted)
return true;
+ if (vcpu->arch.exception.pending)
+ return true;
+
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
kvm_x86_ops->nmi_allowed(vcpu)))
@@ -8619,6 +8631,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
sizeof(val));
}
+static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
+ sizeof(u32));
+}
+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@@ -8646,6 +8665,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
struct x86_exception fault;
+ u32 val;
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
@@ -8653,15 +8673,26 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- fault.async_page_fault = true;
- kvm_inject_page_fault(vcpu, &fault);
+ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
+ !apf_get_user(vcpu, &val)) {
+ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
+ vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == PF_VECTOR &&
+ !apf_put_user(vcpu, 0)) {
+ vcpu->arch.exception.injected = false;
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.nr = 0;
+ vcpu->arch.exception.has_error_code = false;
+ vcpu->arch.exception.error_code = 0;
+ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
+ fault.vector = PF_VECTOR;
+ fault.error_code_valid = true;
+ fault.error_code = 0;
+ fault.nested_page_fault = false;
+ fault.address = work->arch.token;
+ fault.async_page_fault = true;
+ kvm_inject_page_fault(vcpu, &fault);
+ }
}
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2205b8..220638a4cb94 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
struct desc_struct code_descriptor;
struct fpu *fpu = &current->thread.fpu;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
#ifdef RE_ENTRANT_CHECKING
if (emulating) {
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..c3521e2be396 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <asm/fpu/internal.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_refcount);
+/*
+ * Handler for when we fail to restore a task's FPU state. We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid. However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU. Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+
+ WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+ (void *)instruction_pointer(regs));
+
+ __copy_kernel_to_fpregs(&init_fpstate, -1);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
bool ex_handler_ext(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..e2baeaa053a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
- struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* valid VMA, so we should never reach this without a
* valid VMA.
*/
- if (!vma) {
+ if (!pkey) {
WARN_ONCE(1, "PKU fault with no VMA passed in");
info->si_pkey = 0;
return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* absolutely guranteed to be 100% accurate because of
* the race explained above.
*/
- info->si_pkey = vma_pkey(vma);
+ info->si_pkey = *pkey;
}
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk, struct vm_area_struct *vma,
- int fault)
+ struct task_struct *tsk, u32 *pkey, int fault)
{
unsigned lsb = 0;
siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, vma);
+ fill_sig_info_pkey(si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
unsigned long flags;
int sig;
- /* No context means no VMA to pass down */
- struct vm_area_struct *vma = NULL;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address,
- tsk, vma, 0);
+ tsk, NULL, 0);
}
/*
@@ -806,7 +802,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- register void *__sp asm("rsp");
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
/*
* We're likely to be running with very little stack space
@@ -821,7 +816,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
asm volatile ("movq %[stack], %%rsp\n\t"
"call handle_stack_overflow\n\t"
"1: jmp 1b"
- : "+r" (__sp)
+ : ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
@@ -897,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- int si_code)
+ unsigned long address, u32 *pkey, int si_code)
{
struct task_struct *tsk = current;
@@ -946,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
return;
}
@@ -959,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, u32 *pkey)
{
- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
}
static void
@@ -969,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma, int si_code)
{
struct mm_struct *mm = current->mm;
+ u32 pkey;
+
+ if (vma)
+ pkey = vma_pkey(vma);
/*
* Something tried to access memory that isn't in our memory map..
@@ -976,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
*/
up_read(&mm->mmap_sem);
- __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+ __bad_area_nosemaphore(regs, error_code, address,
+ (vma) ? &pkey : NULL, si_code);
}
static noinline void
@@ -1019,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- struct vm_area_struct *vma, unsigned int fault)
+ u32 *pkey, unsigned int fault)
{
struct task_struct *tsk = current;
int code = BUS_ADRERR;
@@ -1046,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
}
static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- unsigned int fault)
+ unsigned long address, u32 *pkey, unsigned int fault)
{
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
no_context(regs, error_code, address, 0, 0);
@@ -1076,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
- do_sigbus(regs, error_code, address, vma, fault);
+ do_sigbus(regs, error_code, address, pkey, fault);
else if (fault & VM_FAULT_SIGSEGV)
- bad_area_nosemaphore(regs, error_code, address, vma);
+ bad_area_nosemaphore(regs, error_code, address, pkey);
else
BUG();
}
@@ -1268,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ u32 pkey;
tsk = current;
mm = tsk->mm;
@@ -1468,9 +1467,10 @@ good_area:
return;
}
+ pkey = vma_pkey(vma);
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, vma, fault);
+ mm_fault_error(regs, error_code, address, &pkey, fault);
return;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7777ccc0e9f9..af5c1ed21d43 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -19,6 +19,7 @@
#include <asm/microcode.h>
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
+#include <asm/cpufeature.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
}
}
+static void setup_pcid(void)
+{
+#ifdef CONFIG_X86_64
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ /*
+ * This can't be cr4_set_bits_and_update_boot() --
+ * the trampoline code can't handle CR4.PCIDE and
+ * it wouldn't do any good anyway. Despite the name,
+ * cr4_set_bits_and_update_boot() doesn't actually
+ * cause the bits in question to remain set all the
+ * way through the secondary boot asm.
+ *
+ * Instead, we brute-force it and set CR4.PCIDE
+ * manually in start_secondary().
+ */
+ cr4_set_bits(X86_CR4_PCIDE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ }
+ }
+#endif
+}
+
#ifdef CONFIG_X86_32
#define NR_RANGE_MR 3
#else /* CONFIG_X86_64 */
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
unsigned long end;
probe_page_size_mask();
+ setup_pcid();
#ifdef CONFIG_X86_64
end = max_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3fcc8e01683b..16c5f37933a2 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/mm.h>
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 218834a3e9ad..b372f3442bbf 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
{
int changed = !pte_same(*ptep, entry);
- if (changed && dirty) {
+ if (changed && dirty)
*ptep = entry;
- pte_update(vma->vm_mm, address, ptep);
- }
return changed;
}
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *) &ptep->pte);
- if (ret)
- pte_update(vma->vm_mm, addr, ptep);
-
return ret;
}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a706ec..d7bc0eea20a5 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
-#include <asm/fpu/internal.h> /* fpregs_active() */
int __execute_only_pkey(struct mm_struct *mm)
{
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
*/
preempt_disable();
if (!need_to_set_mm_pkey &&
- fpregs_active() &&
+ current->thread.fpu.initialized &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
preempt_enable();
return execute_only_pkey;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 37689a7cc03b..49d9778376d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,8 +121,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* hypothetical buggy code that directly switches to swapper_pg_dir
* without going through leave_mm() / switch_mm_irqs_off() or that
* does something like write_cr3(read_cr3_pa()).
+ *
+ * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+ * isn't free.
*/
- VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+ /*
+ * If we were to BUG here, we'd be very likely to kill
+ * the system so hard that we don't see the call trace.
+ * Try to recover instead by ignoring the error and doing
+ * a global flush to minimize the chance of corruption.
+ *
+ * (This is far from being a fully correct recovery.
+ * Architecturally, the CPU could prefetch something
+ * back into an incorrect ASID slot and leave it there
+ * to cause trouble down the road. It's better than
+ * nothing, though.)
+ */
+ __flush_tlb_all();
+ }
+#endif
if (real_prev == next) {
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
@@ -152,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | prev_asid);
+ write_cr3(build_cr3(next, prev_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
TLB_FLUSH_ALL);
}
@@ -172,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int index = pgd_index(current_stack_pointer());
+ unsigned int index = pgd_index(current_stack_pointer);
pgd_t *pgd = next->pgd + index;
if (unlikely(pgd_none(*pgd)))
@@ -196,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | new_asid);
+ write_cr3(build_cr3(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+ write_cr3(build_cr3_noflush(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -245,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
- write_cr3(cr3 & ~CR3_PCID_MASK);
+ write_cr3(build_cr3(mm, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c9573660d51..0554e8aef4d5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (index >= array->map.max_entries)
* goto out;
*/
- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
#define OFFSET1 43 /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 9bd115484745..0f5f60b14f48 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1092,7 +1092,7 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
return 0;
}
-static struct dmi_system_id __initdata pciirq_dmi_table[] = {
+static const struct dmi_system_id pciirq_dmi_table[] __initconst = {
{
.callback = fix_broken_hp_bios_irq9,
.ident = "HP Pavilion N5400 Series Laptop",
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 4d68d59f457d..84fcfde53f8f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -428,7 +428,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d)
return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
}
-static struct dmi_system_id msr_save_dmi_table[] = {
+static const struct dmi_system_id msr_save_dmi_table[] = {
{
.callback = msr_initialize_bdw,
.ident = "BROADWELL BDX_EP",
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2598d81cd55..f910c514438f 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
return -EOVERFLOW;
rdr->jump_address = (unsigned long)restore_registers;
rdr->jump_address_phys = __pa_symbol(restore_registers);
- rdr->cr3 = restore_cr3;
+
+ /*
+ * The restore code fixes up CR3 and CR4 in the following sequence:
+ *
+ * [in hibernation asm]
+ * 1. CR3 <= temporary page tables
+ * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+ * 3. CR3 <= rdr->cr3
+ * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+ * [in restore_processor_state()]
+ * 5. CR4 <= saved CR4
+ * 6. CR3 <= saved CR3
+ *
+ * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+ * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+ * rdr->cr3 needs to point to valid page tables but must not
+ * have any of the PCID bits set.
+ */
+ rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
rdr->magic = RESTORE_MAGIC;
hibernation_e820_save(rdr->e820_digest);
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 28775f55bde2..3c423dfcd78b 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -5,6 +5,7 @@
*/
#include <errno.h>
+#include <stdlib.h>
#include <sys/ptrace.h>
#ifdef __i386__
#include <sys/user.h>
@@ -31,7 +32,7 @@ int save_fp_registers(int pid, unsigned long *fp_regs)
if (have_xstate_support) {
iov.iov_base = fp_regs;
- iov.iov_len = sizeof(struct _xstate);
+ iov.iov_len = FP_SIZE * sizeof(unsigned long);
if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
return -errno;
return 0;
@@ -51,10 +52,9 @@ int restore_fp_registers(int pid, unsigned long *fp_regs)
{
#ifdef PTRACE_SETREGSET
struct iovec iov;
-
if (have_xstate_support) {
iov.iov_base = fp_regs;
- iov.iov_len = sizeof(struct _xstate);
+ iov.iov_len = FP_SIZE * sizeof(unsigned long);
if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
return -errno;
return 0;
@@ -125,13 +125,19 @@ int put_fp_registers(int pid, unsigned long *regs)
void arch_init_registers(int pid)
{
#ifdef PTRACE_GETREGSET
- struct _xstate fp_regs;
+ void * fp_regs;
struct iovec iov;
- iov.iov_base = &fp_regs;
- iov.iov_len = sizeof(struct _xstate);
+ fp_regs = malloc(FP_SIZE * sizeof(unsigned long));
+ if(fp_regs == NULL)
+ return;
+
+ iov.iov_base = fp_regs;
+ iov.iov_len = FP_SIZE * sizeof(unsigned long);
if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0)
have_xstate_support = 1;
+
+ free(fp_regs);
#endif
}
#endif
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
index 9d94b3b76c74..ed8ea90967dc 100644
--- a/arch/x86/um/os-Linux/tls.c
+++ b/arch/x86/um/os-Linux/tls.c
@@ -37,7 +37,7 @@ void check_host_supports_tls(int *supports_tls, int *tls_min)
continue;
else if (errno == ENOSYS)
*supports_tls = 0;
- return;
+ return;
}
}
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 02250b2633b8..3099c209546f 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -51,7 +51,7 @@ void foo(void)
DEFINE(HOST_ORIG_AX, ORIG_EAX);
#else
#ifdef FP_XSTATE_MAGIC1
- DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long));
+ DEFINE_LONGS(HOST_FP_SIZE, 2696);
#else
DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
#endif
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae2a2e2d6362..69b9deff7e5c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_cr0 = xen_read_cr0,
.write_cr0 = xen_write_cr0,
- .read_cr4 = native_read_cr4,
.write_cr4 = xen_write_cr4,
#ifdef CONFIG_X86_64
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
- .store_idt = native_store_idt,
.store_tr = xen_store_tr,
.write_ldt_entry = xen_write_ldt_entry,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 6b983b300666..71495f1a86d7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
* from _brk_limit way up to the max_pfn_mapped (which is the end of
* the ramdisk). We continue on, erasing PMD entries that point to page
* tables - do note that they are accessible at this stage via __va.
- * For good measure we also round up to the PMD - which means that if
+ * As Xen is aligning the memory end to a 4MB boundary, for good
+ * measure we also round up to PMD_SIZE * 2 - which means that if
* anybody is using __ka address to the initial boot-stack - and try
* to use it - they are going to crash. The xen_start_info has been
* taken care of already in xen_setup_kernel_pagetable. */
addr = xen_start_info->pt_base;
- size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+ size = xen_start_info->nr_pt_frames * PAGE_SIZE;
- xen_cleanhighmap(addr, addr + size);
+ xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
- /* This is superfluous and is not necessary, but you know what
- * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
- * anything at this stage. */
- xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
}
#endif
@@ -2220,7 +2215,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
* not the first page table in the page table pool.
* Iterate through the initial page tables to find the real page table base.
*/
-static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
{
phys_addr_t pt_base, paddr;
unsigned pmdidx;
@@ -2409,8 +2404,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_single = xen_flush_tlb_single,
.flush_tlb_others = xen_flush_tlb_others,
- .pte_update = paravirt_nop,
-
.pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free,