aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2016-02-29 15:50:19 -0800
committerIngo Molnar <mingo@kernel.org>2016-03-08 14:16:44 +0100
commit58a5aac5331388a175a42b6ed2154f0559cefb21 (patch)
treefd9562b81d85fc5e33d4a3414716aedb48106741 /arch/x86
parentMerge tag 'v4.5-rc7' into x86/asm, to pick up SMAP fix (diff)
downloadlinux-dev-58a5aac5331388a175a42b6ed2154f0559cefb21.tar.xz
linux-dev-58a5aac5331388a175a42b6ed2154f0559cefb21.zip
x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled
x86_64 has very clean espfix handling on paravirt: espfix64 is set up in native_iret, so paravirt systems that override iret bypass espfix64 automatically. This is robust and straightforward. x86_32 is messier. espfix is set up before the IRET paravirt patch point, so it can't be directly conditionalized on whether we use native_iret. We also can't easily move it into native_iret without regressing performance due to a bizarre consideration. Specifically, on 64-bit kernels, the logic is: if (regs->ss & 0x4) setup_espfix; On 32-bit kernels, the logic is: if ((regs->ss & 0x4) && (regs->cs & 0x3) == 3 && (regs->flags & X86_EFLAGS_VM) == 0) setup_espfix; The performance of setup_espfix itself is essentially irrelevant, but the comparison happens on every IRET so its performance matters. On x86_64, there's no need for any registers except flags to implement the comparison, so we fold the whole thing into native_iret. On x86_32, we don't do that because we need a free register to implement the comparison efficiently. We therefore do espfix setup before restoring registers on x86_32. This patch gets rid of the explicit paravirt_enabled check by introducing X86_BUG_ESPFIX on 32-bit systems and using an ALTERNATIVE to skip espfix on paravirt systems where iret != native_iret. This is also messy, but it's at least in line with other things we do. This improves espfix performance by removing a branch, but no one cares. More importantly, it removes a paravirt_enabled user, which is good because paravirt_enabled is ill-defined and is going away. Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Luis R. Rodriguez <mcgrof@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: lguest@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/entry/entry_32.S15
-rw-r--r--arch/x86/include/asm/cpufeatures.h8
-rw-r--r--arch/x86/kernel/cpu/common.c25
3 files changed, 35 insertions, 13 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 66350e6a6ca5..e13027247c90 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -361,6 +361,8 @@ restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
+ ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
+
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -387,19 +389,6 @@ ENTRY(iret_exc )
#ifdef CONFIG_X86_ESPFIX32
ldt_ss:
-#ifdef CONFIG_PARAVIRT
- /*
- * The kernel can't run on a non-flat stack if paravirt mode
- * is active. Rather than try to fixup the high bits of
- * ESP, bypass this code entirely. This may break DOSemu
- * and/or Wine support in a paravirt VM, although the option
- * is still available to implement the setting of the high
- * 16-bits in the INTERRUPT_RETURN paravirt-op.
- */
- cmpl $0, pv_info+PARAVIRT_enabled
- jne restore_nocheck
-#endif
-
/*
* Setup and switch to ESPFIX stack
*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6663fae71b12..d11a3aaafd96 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -286,4 +286,12 @@
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 079d83fc6488..d8337f34b5f4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -803,6 +803,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
}
static void generic_identify(struct cpuinfo_x86 *c)