From 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 1 May 2018 15:55:51 +0200 Subject: x86/nospec: Simplify alternative_msr_write() The macro is not type safe and I did look for why that "g" constraint for the asm doesn't work: it's because the asm is more fundamentally wrong. It does movl %[val], %%eax but "val" isn't a 32-bit value, so then gcc will pass it in a register, and generate code like movl %rsi, %eax and gas will complain about a nonsensical 'mov' instruction (it's moving a 64-bit register to a 32-bit one). Passing it through memory will just hide the real bug - gcc still thinks the memory location is 64-bit, but the "movl" will only load the first 32 bits and it all happens to work because x86 is little-endian. Convert it to a type safe inline function with a little trick which hands the feature into the ALTERNATIVE macro. Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..870acfc2fd34 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -241,15 +241,16 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" (val), + "d" (val >> 32), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { -- cgit v1.2.3-59-g8ed1b From 4a28bfe3267b68e22c663ac26185aa16c9b879ef Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:16 -0400 Subject: x86/bugs: Concentrate bug detection into a separate function Combine the various logic which goes through all those x86_cpu_id matching structures in one function. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ce243f7d2d4e..9ed0a18ff6f6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -927,21 +927,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (x86_match_cpu(cpu_no_meltdown)) - return false; + return; if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -992,12 +998,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); -- cgit v1.2.3-59-g8ed1b From d1059518b4789cabe34bb4b714d07e6089c82ca1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:17 -0400 Subject: x86/bugs: Concentrate bug reporting into a separate function Those SysFS functions have a similar preamble, as such make common code to handle them. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bfca937bdcc3..ad613f73d071 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -314,30 +314,48 @@ retpoline_auto: #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) + +ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + default: + break; + } + return sprintf(buf, "Vulnerable\n"); } +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); +} + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } #endif -- cgit v1.2.3-59-g8ed1b From 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:18 -0400 Subject: x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all the other bits as reserved. The Intel SDM glossary defines reserved as implementation specific - aka unknown. As such at bootup this must be taken it into account and proper masking for the bits in use applied. A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 [ tglx: Made x86_spec_ctrl_base __ro_after_init ] Suggested-by: Jon Masters Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++---- arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 870acfc2fd34..9ec3d4d448cd 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,6 +217,17 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * The Intel specification for the SPEC_CTRL MSR requires that we + * preserve any already set reserved bits at boot time (e.g. for + * future additions that this kernel is not currently aware of). + * We then set any additional mitigation bits that we want + * ourselves and always use this as the base for SPEC_CTRL. + * We also use this when handling guest entry/exit as below. + */ +extern void x86_spec_ctrl_set(u64); +extern u64 x86_spec_ctrl_get_default(void); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -254,8 +265,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } /* @@ -266,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_get_default(); \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ad613f73d071..6ed84f50d74a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -28,6 +28,12 @@ static void __init spectre_v2_select_mitigation(void); +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +static u64 __ro_after_init x86_spec_ctrl_base; + void __init check_bugs(void) { identify_boot_cpu(); @@ -37,6 +43,13 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -95,6 +108,21 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +void x86_spec_ctrl_set(u64 val) +{ + if (val & ~SPEC_CTRL_IBRS) + WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); + else + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); + +u64 x86_spec_ctrl_get_default(void) +{ + return x86_spec_ctrl_base; +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + #ifdef RETPOLINE static bool spectre_v2_bad_module; -- cgit v1.2.3-59-g8ed1b From 5cf687548705412da47c9cec342fd952d71ed3d5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:19 -0400 Subject: x86/bugs, KVM: Support the combination of guest and host IBRS A guest may modify the SPEC_CTRL MSR from the value used by the kernel. Since the kernel doesn't use IBRS, this means a value of zero is what is needed in the host. But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to the other bits as reserved so the kernel should respect the boot time SPEC_CTRL value and use that. This allows to deal with future extensions to the SPEC_CTRL interface if any at all. Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any difference as paravirt will over-write the callq *0xfff.. with the wrmsrl assembler code. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 10 ++++++++++ arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++++++++ arch/x86/kvm/svm.c | 6 ++---- arch/x86/kvm/vmx.c | 6 ++---- 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9ec3d4d448cd..d1c2630922da 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -228,6 +228,16 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6ed84f50d74a..38a8626c894c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -123,6 +123,24 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); + +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + #ifdef RETPOLINE static bool spectre_v2_bad_module; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fc05e428aba..8bfc8fe26851 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5557,8 +5557,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5670,8 +5669,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(svm->spec_ctrl); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c7668806163f..3091dc98a451 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9720,8 +9720,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; @@ -9869,8 +9868,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- cgit v1.2.3-59-g8ed1b From c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:20 -0400 Subject: x86/bugs: Expose /sys/../spec_store_bypass Add the sysfs file for the new vulerability. It does not do much except show the words 'Vulnerable' for recent x86 cores. Intel cores prior to family 6 are known not to be vulnerable, and so are some Atoms and some Xeon Phi. It assumes that older Cyrix, Centaur, etc. cores are immune. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 5 +++++ arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++ drivers/base/cpu.c | 8 ++++++++ include/linux/cpu.h | 2 ++ 6 files changed, 40 insertions(+) (limited to 'arch') diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 025b7cf3768d..bd4975e132d3 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 578793e97431..3493b58ef2cc 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -363,5 +363,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 38a8626c894c..59a9f3aa9f7a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -404,4 +404,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c { return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9ed0a18ff6f6..6ff0cc441a59 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -927,10 +927,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_ANY, 4, }, + {} +}; + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (!x86_match_cpu(cpu_no_spec_store_bypass)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + if (x86_match_cpu(cpu_no_speculation)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2da998baa75c..30cc9c877ebb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -534,14 +534,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, + &dev_attr_spec_store_bypass.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7b01bc11c692..a97a63eef59f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3-59-g8ed1b From 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 28 Apr 2018 22:34:17 +0200 Subject: x86/cpufeatures: Add X86_FEATURE_RDS Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU supports Reduced Data Speculation. [ tglx: Split it out from a later patch ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3493b58ef2cc..48305a1ddfd6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -334,6 +334,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ /* * BUG word(s) -- cgit v1.2.3-59-g8ed1b From 24f7fc83b9204d20f878c57cb77d261ae825e033 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:21 -0400 Subject: x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation Contemporary high performance processors use a common industry-wide optimization known as "Speculative Store Bypass" in which loads from addresses to which a recent store has occurred may (speculatively) see an older value. Intel refers to this feature as "Memory Disambiguation" which is part of their "Smart Memory Access" capability. Memory Disambiguation can expose a cache side-channel attack against such speculatively read values. An attacker can create exploit code that allows them to read memory outside of a sandbox environment (for example, malicious JavaScript in a web page), or to perform more complex attacks against code running within the same privilege level, e.g. via the stack. As a first step to mitigate against such attacks, provide two boot command line control knobs: nospec_store_bypass_disable spec_store_bypass_disable=[off,auto,on] By default affected x86 processors will power on with Speculative Store Bypass enabled. Hence the provided kernel parameters are written from the point of view of whether to enable a mitigation or not. The parameters are as follows: - auto - Kernel detects whether your CPU model contains an implementation of Speculative Store Bypass and picks the most appropriate mitigation. - on - disable Speculative Store Bypass - off - enable Speculative Store Bypass [ tglx: Reordered the checks so that the whole evaluation is not done when the CPU does not support RDS ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 33 ++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 6 ++ arch/x86/kernel/cpu/bugs.c | 103 ++++++++++++++++++++++++ 4 files changed, 143 insertions(+) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 11fc28ecdb6d..f32112c55f7e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2680,6 +2680,9 @@ allow data leaks with this option, which is equivalent to spectre_v2=off. + nospec_store_bypass_disable + [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -4025,6 +4028,36 @@ Not specifying this option is equivalent to spectre_v2=auto. + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) + + Certain CPUs are vulnerable to an exploit against a + a common industry wide performance optimization known + as "Speculative Store Bypass" in which recent stores + to the same memory location may not be observed by + later loads during speculative execution. The idea + is that such stores are unlikely and that they can + be detected prior to instruction retirement at the + end of a particular speculation execution window. + + In vulnerable processors, the speculatively forwarded + store can be used in a cache side channel attack, for + example to read memory to which the attacker does not + directly have access (e.g. inside sandboxed code). + + This parameter controls whether the Speculative Store + Bypass optimization is used. + + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 48305a1ddfd6..58689ed969bb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -214,6 +214,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d1c2630922da..7b9eacfc03f3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -238,6 +238,12 @@ extern u64 x86_spec_ctrl_get_default(void); extern void x86_spec_ctrl_set_guest(u64); extern void x86_spec_ctrl_restore_host(u64); +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 59a9f3aa9f7a..2d1f1cb604aa 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,7 @@ #include static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); /* * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any @@ -53,6 +54,12 @@ void __init check_bugs(void) /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -357,6 +364,99 @@ retpoline_auto: } } +#undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_RDS)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + if (mode != SPEC_STORE_BYPASS_NONE) + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + return mode; +} + +static void ssb_select_mitigation() +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + #undef pr_fmt #ifdef CONFIG_SYSFS @@ -383,6 +483,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + default: break; } -- cgit v1.2.3-59-g8ed1b From 772439717dbf703b39990be58d8d4e3e4ad0598a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:22 -0400 Subject: x86/bugs/intel: Set proper CPU features and setup RDS Intel CPUs expose methods to: - Detect whether RDS capability is available via CPUID.7.0.EDX[31], - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS. - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS. With that in mind if spec_store_bypass_disable=[auto,on] is selected set at boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it. Note that this does not fix the KVM case where the SPEC_CTRL is exposed to guests which can muck with it, see patch titled : KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS. And for the firmware (IBRS to be set), see patch titled: x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits [ tglx: Distangled it from the intel implementation and kept the call order ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 10 ++++++---- arch/x86/kernel/cpu/cpu.h | 2 ++ arch/x86/kernel/cpu/intel.c | 1 + 5 files changed, 43 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 53d5b1b9255e..21e1a6df9907 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,6 +42,7 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -68,6 +69,11 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_RDS_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Reduced Data Speculation control + * required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2d1f1cb604aa..17edf5bdc361 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -117,7 +117,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~SPEC_CTRL_IBRS) + if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -444,8 +444,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) break; } - if (mode != SPEC_STORE_BYPASS_NONE) + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode != SPEC_STORE_BYPASS_NONE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_set(SPEC_CTRL_RDS); + break; + case X86_VENDOR_AMD: + break; + } + } + return mode; } @@ -459,6 +479,12 @@ static void ssb_select_mitigation() #undef pr_fmt +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_IBRS)) + x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); +} + #ifdef CONFIG_SYSFS ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6ff0cc441a59..d6dc71d616ea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -951,7 +951,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (!x86_match_cpu(cpu_no_spec_store_bypass)) + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_RDS_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) @@ -963,9 +967,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (x86_match_cpu(cpu_no_meltdown)) return; - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) return; @@ -1383,6 +1384,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); validate_apic_and_package_id(c); + x86_spec_ctrl_setup_ap(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e806b11a99af..37672d299e35 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -50,4 +50,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 60d1897041da..ef3f9c01c274 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -189,6 +189,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_RDS); } /* -- cgit v1.2.3-59-g8ed1b From 1115a859f33276fe8afb31c60cf9d8e657872558 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:23 -0400 Subject: x86/bugs: Whitelist allowed SPEC_CTRL MSR values Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the future (or in fact use different MSRs for the same functionality). As such a run-time mechanism is required to whitelist the appropriate MSR values. [ tglx: Made the variable __ro_after_init ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 17edf5bdc361..d809d398675a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -35,6 +35,12 @@ static void __init ssb_select_mitigation(void); */ static u64 __ro_after_init x86_spec_ctrl_base; +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; + void __init check_bugs(void) { identify_boot_cpu(); @@ -117,7 +123,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) + if (val & x86_spec_ctrl_mask) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -459,6 +465,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: @@ -482,7 +489,7 @@ static void ssb_select_mitigation() void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) - x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); } #ifdef CONFIG_SYSFS -- cgit v1.2.3-59-g8ed1b From 764f3c21588a059cd783c6ba0734d4db2d72822d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:24 -0400 Subject: x86/bugs/AMD: Add support to disable RDS on Fam[15,16,17]h if requested AMD does not need the Speculative Store Bypass mitigation to be enabled. The parameters for this are already available and can be done via MSR C001_1020. Each family uses a different bit in that MSR for this. [ tglx: Expose the bit mask via a variable and move the actual MSR fiddling into the bugs code as that's the right thing to do and also required to prepare for dynamic enable/disable ] Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 4 ++++ arch/x86/kernel/cpu/amd.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 ++++ 5 files changed, 61 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 58689ed969bb..b2464c1787df 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -215,6 +215,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b9eacfc03f3..3a1541cf32de 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -244,6 +244,10 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_DISABLE, }; +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 12bc0a1139da..3d553fa5075d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -554,6 +555,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_FAM10H_NODE_ID, value); nodes_per_socket = ((value >> 3) & 7) + 1; } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable RDS. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_RDS); + setup_force_cpu_cap(X86_FEATURE_AMD_RDS); + x86_amd_ls_cfg_rds_mask = 1ULL << bit; + } + } } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -898,6 +919,11 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { + set_cpu_cap(c, X86_FEATURE_RDS); + set_cpu_cap(c, X86_FEATURE_AMD_RDS); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d809d398675a..86cdf635636d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,6 +41,13 @@ static u64 __ro_after_init x86_spec_ctrl_base; */ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + */ +u64 __ro_after_init x86_amd_ls_cfg_base; +u64 __ro_after_init x86_amd_ls_cfg_rds_mask; + void __init check_bugs(void) { identify_boot_cpu(); @@ -52,7 +59,8 @@ void __init check_bugs(void) /* * Read the SPEC_CTRL MSR to account for reserved bits which may - * have unknown values. + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. */ if (boot_cpu_has(X86_FEATURE_IBRS)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); @@ -154,6 +162,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +static void x86_amd_rds_enable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -443,6 +459,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: + /* + * AMD platforms by default don't need SSB mitigation. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; @@ -469,6 +490,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: + x86_amd_rds_enable(); break; } } @@ -490,6 +512,9 @@ void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_rds_enable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d6dc71d616ea..e0517bcee446 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -943,6 +943,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { { X86_VENDOR_CENTAUR, 5, }, { X86_VENDOR_INTEL, 5, }, { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, { X86_VENDOR_ANY, 4, }, {} }; -- cgit v1.2.3-59-g8ed1b From da39556f66f5cfe8f9c989206974f1cb16ca5d7c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:25 -0400 Subject: x86/KVM/VMX: Expose SPEC_CTRL Bit(2) to the guest Expose the CPUID.7.EDX[31] bit to the guest, and also guard against various combinations of SPEC_CTRL MSR values. The handling of the MSR (to take into account the host value of SPEC_CTRL Bit(2)) is taken care of in patch: KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 82055b90a8b3..376ac9a2a2b9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -407,7 +407,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3091dc98a451..4197942b6d9b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3524,7 +3524,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDS)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -3643,11 +3644,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) return 1; vmx->spec_ctrl = data; -- cgit v1.2.3-59-g8ed1b From 28a2775217b17208811fa43a9e96bd1fdf417b86 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:01:37 +0200 Subject: x86/speculation: Create spec-ctrl.h to avoid include hell Having everything in nospec-branch.h creates a hell of dependencies when adding the prctl based switching mechanism. Move everything which is not required in nospec-branch.h to spec-ctrl.h and fix up the includes in the relevant files. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 14 -------------- arch/x86/include/asm/spec-ctrl.h | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 6 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/spec-ctrl.h (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3a1541cf32de..1119f14bc883 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -228,26 +228,12 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); -/* - * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR - * the guest has, while on VMEXIT we restore the host view. This - * would be easier if SPEC_CTRL were architecturally maskable or - * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. - */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, }; -/* AMD specific Speculative Store Bypass MSR data */ -extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; - extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..3ad64420a06e --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + +#endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3d553fa5075d..18efc33a8d2e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 86cdf635636d..ec171873167a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8bfc8fe26851..437c1b371129 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include "trace.h" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4197942b6d9b..16a111e44691 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include "trace.h" -- cgit v1.2.3-59-g8ed1b From 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:21:42 +0200 Subject: x86/process: Allow runtime control of Speculative Store Bypass The Speculative Store Bypass vulnerability can be mitigated with the Reduced Data Speculation (RDS) feature. To allow finer grained control of this eventually expensive mitigation a per task mitigation control is required. Add a new TIF_RDS flag and put it into the group of TIF flags which are evaluated for mismatch in switch_to(). If these bits differ in the previous and the next task, then the slow path function __switch_to_xtra() is invoked. Implement the TIF_RDS dependent mitigation control in the slow path. If the prctl for controlling Speculative Store Bypass is disabled or no task uses the prctl then there is no overhead in the switch_to() fast path. Update the KVM related speculation control functions to take TID_RDS into account as well. Based on a patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/msr-index.h | 3 ++- arch/x86/include/asm/spec-ctrl.h | 17 +++++++++++++++++ arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++++----- arch/x86/kernel/process.c | 22 ++++++++++++++++++++++ 5 files changed, 65 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 21e1a6df9907..810f50bb338d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,7 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ +#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ +#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 3ad64420a06e..45ef00ad5105 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SPECCTRL_H_ #define _ASM_X86_SPECCTRL_H_ +#include #include /* @@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64); extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_rds_mask; +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + +static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); + return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); +} + +static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; +} + +extern void speculative_store_bypass_update(void); + #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a5d9521bb2cb..e5c26cc59619 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,6 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_RDS 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -105,6 +106,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_RDS (1 << TIF_RDS) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -144,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ec171873167a..2bc109d0f8ae 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -33,7 +33,7 @@ static void __init ssb_select_mitigation(void); * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any * writes to SPEC_CTRL contain whatever reserved bits have been set. */ -static u64 __ro_after_init x86_spec_ctrl_base; +u64 __ro_after_init x86_spec_ctrl_base; /* * The vendor and possibly platform specific bits which can be modified in @@ -140,25 +140,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); u64 x86_spec_ctrl_get_default(void) { - return x86_spec_ctrl_base; + u64 msrval = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 03408b942adb..397342725046 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -38,6 +38,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -278,6 +279,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + u64 msr; + + if (static_cpu_has(X86_FEATURE_AMD_RDS)) { + msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { + msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } +} + +void speculative_store_bypass_update(void) +{ + __speculative_store_bypass_update(current_thread_info()->flags); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -309,6 +328,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); + + if ((tifp ^ tifn) & _TIF_RDS) + __speculative_store_bypass_update(tifn); } /* -- cgit v1.2.3-59-g8ed1b From a73ec77ee17ec556fe7f165d00314cb7c047b1ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:26:40 +0200 Subject: x86/speculation: Add prctl for Speculative Store Bypass mitigation Add prctl based control for Speculative Store Bypass mitigation and make it the default mitigation for Intel and AMD. Andi Kleen provided the following rationale (slightly redacted): There are multiple levels of impact of Speculative Store Bypass: 1) JITed sandbox. It cannot invoke system calls, but can do PRIME+PROBE and may have call interfaces to other code 2) Native code process. No protection inside the process at this level. 3) Kernel. 4) Between processes. The prctl tries to protect against case (1) doing attacks. If the untrusted code can do random system calls then control is already lost in a much worse way. So there needs to be system call protection in some way (using a JIT not allowing them or seccomp). Or rather if the process can subvert its environment somehow to do the prctl it can already execute arbitrary code, which is much worse than SSB. To put it differently, the point of the prctl is to not allow JITed code to read data it shouldn't read from its JITed sandbox. If it already has escaped its sandbox then it can already read everything it wants in its address space, and do much worse. The ability to control Speculative Store Bypass allows to enable the protection selectively without affecting overall system performance. Based on an initial patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk --- Documentation/admin-guide/kernel-parameters.txt | 6 +- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 83 ++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f32112c55f7e..a8d2ae1e335b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4053,7 +4053,11 @@ off - Unconditionally enable Speculative Store Bypass auto - Kernel detects whether the CPU model contains an implementation of Speculative Store Bypass and - picks the most appropriate mitigation + picks the most appropriate mitigation. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. Not specifying this option is equivalent to spec_store_bypass_disable=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1119f14bc883..71ad01422655 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ extern u64 x86_spec_ctrl_get_default(void); enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2bc109d0f8ae..fc9187b6fae7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -412,20 +414,23 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_NONE, SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", - [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -475,14 +480,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* - * AMD platforms by default don't need SSB mitigation. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - break; + /* Choose prctl as the default mode */ + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_NONE: break; } @@ -493,7 +499,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ - if (mode != SPEC_STORE_BYPASS_NONE) { + if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses @@ -524,6 +530,63 @@ static void ssb_select_mitigation() #undef pr_fmt +static int ssb_prctl_set(unsigned long ctrl) +{ + bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + + if (ctrl == PR_SPEC_ENABLE) + clear_tsk_thread_flag(current, TIF_RDS); + else + set_tsk_thread_flag(current, TIF_RDS); + + if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + speculative_store_bypass_update(); + + return 0; +} + +static int ssb_prctl_get(void) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: + if (test_tsk_thread_flag(current, TIF_RDS)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +{ + if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) + return -ERANGE; + + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(ctrl); + default: + return -ENODEV; + } +} + +int arch_prctl_spec_ctrl_get(unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(); + default: + return -ENODEV; + } +} + void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) -- cgit v1.2.3-59-g8ed1b From 7bbf1373e228840bb0295a2ca26d548ef37f448e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 May 2018 15:19:04 -0700 Subject: nospec: Allow getting/setting on non-current task Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than current. This is needed both for /proc/$pid/status queries and for seccomp (since thread-syncing can trigger seccomp in non-current threads). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++----------- include/linux/nospec.h | 7 +++++-- kernel/sys.c | 9 +++++---- 3 files changed, 26 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fc9187b6fae7..e3afb610f2ad 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -530,31 +530,35 @@ static void ssb_select_mitigation() #undef pr_fmt -static int ssb_prctl_set(unsigned long ctrl) +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + bool rds = !!test_tsk_thread_flag(task, TIF_RDS); if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(current, TIF_RDS); + clear_tsk_thread_flag(task, TIF_RDS); else - set_tsk_thread_flag(current, TIF_RDS); + set_tsk_thread_flag(task, TIF_RDS); - if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) speculative_store_bypass_update(); return 0; } -static int ssb_prctl_get(void) +static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(current, TIF_RDS)) + if (test_tsk_thread_flag(task, TIF_RDS)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -564,24 +568,25 @@ static int ssb_prctl_get(void) } } -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) { if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) return -ERANGE; switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(ctrl); + return ssb_prctl_set(task, ctrl); default: return -ENODEV; } } -int arch_prctl_spec_ctrl_get(unsigned long which) +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_get(); + return ssb_prctl_get(task); default: return -ENODEV; } diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 700bb8a4e4ea..a908c954484d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, }) /* Speculation control prctl */ -int arch_prctl_spec_ctrl_get(unsigned long which); -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/sys.c b/kernel/sys.c index b76dee23bdc9..b0eee418ee0d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2244,12 +2244,13 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data) return 1; } -int __weak arch_prctl_spec_ctrl_get(unsigned long which) +int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) { return -EINVAL; } -int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + unsigned long ctrl) { return -EINVAL; } @@ -2465,12 +2466,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_get(arg2); + error = arch_prctl_spec_ctrl_get(me, arg2); break; case PR_SET_SPECULATION_CTRL: if (arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_set(arg2, arg3); + error = arch_prctl_spec_ctrl_set(me, arg2, arg3); break; default: error = -EINVAL; -- cgit v1.2.3-59-g8ed1b From f9544b2b076ca90d887c5ae5d74fab4c21bb7c13 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 15:03:30 -0700 Subject: x86/bugs: Make boot modes __ro_after_init There's no reason for these to be changed after boot. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3afb610f2ad..f8d9be0e86b1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -129,7 +129,8 @@ static const char *spectre_v2_strings[] = { #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { @@ -407,7 +408,7 @@ retpoline_auto: #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt -static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; +static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; /* The kernel command line selection */ enum ssb_mitigation_cmd { -- cgit v1.2.3-59-g8ed1b From 356e4bfff2c5489e016fdb925adbf12a1e3950ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 May 2018 22:09:15 +0200 Subject: prctl: Add force disable speculation For certain use cases it is desired to enforce mitigations so they cannot be undone afterwards. That's important for loader stubs which want to prevent a child from disabling the mitigation again. Will also be used for seccomp(). The extra state preserving of the prctl state for SSB is a preparatory step for EBPF dymanic speculation control. Signed-off-by: Thomas Gleixner --- Documentation/userspace-api/spec_ctrl.rst | 34 ++++++++++++++++++------------ arch/x86/kernel/cpu/bugs.c | 35 ++++++++++++++++++++++--------- fs/proc/array.c | 3 +++ include/linux/sched.h | 10 ++++++++- include/uapi/linux/prctl.h | 1 + 5 files changed, 59 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index ddbebcd01208..1b3690d30943 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL ----------------------- PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature -which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +which is selected with arg2 of prctl(2). The return value uses bits 0-3 with the following meaning: -==== ================ =================================================== -Bit Define Description -==== ================ =================================================== -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled -==== ================ =================================================== +==== ===================== =================================================== +Bit Define Description +==== ===================== =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +==== ===================== =================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. @@ -47,9 +49,11 @@ misfeature will fail. PR_SET_SPECULATION_CTRL ----------------------- + PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand -in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or +PR_SPEC_FORCE_DISABLE. Common error codes ------------------ @@ -70,10 +74,13 @@ Value Meaning 0 Success ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor - PR_SPEC_DISABLE + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE ENXIO Control of the selected speculation misfeature is not possible. See PR_GET_SPECULATION_CTRL. + +EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller + tried to enable it again. ======= ================================================================= Speculation misfeature controls @@ -84,3 +91,4 @@ Speculation misfeature controls * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f8d9be0e86b1..7e0f28160e5e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -533,21 +533,37 @@ static void ssb_select_mitigation() static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(task, TIF_RDS); + bool update; if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; - if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(task, TIF_RDS); - else - set_tsk_thread_flag(task, TIF_RDS); + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + default: + return -ERANGE; + } /* * If being set on non-current task, delay setting the CPU * mitigation until it is next scheduled. */ - if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) + if (task == current && update) speculative_store_bypass_update(); return 0; @@ -559,7 +575,9 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(task, TIF_RDS)) + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -572,9 +590,6 @@ static int ssb_prctl_get(struct task_struct *task) int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { - if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) - return -ERANGE; - switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); diff --git a/fs/proc/array.c b/fs/proc/array.c index 303c155f9b04..d178a0236514 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -344,6 +344,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) case PR_SPEC_NOT_AFFECTED: seq_printf(m, "not vulnerable"); break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; case PR_SPEC_PRCTL | PR_SPEC_DISABLE: seq_printf(m, "thread mitigated"); break; diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..e4218d4deba0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1393,7 +1393,8 @@ static inline bool is_percpu_thread(void) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ - +#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1418,6 +1419,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index ebf057ac1346..db9f15f5db04 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -217,5 +217,6 @@ struct prctl_mm_map { # define PR_SPEC_PRCTL (1UL << 0) # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3-59-g8ed1b From 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 May 2018 15:12:06 +0200 Subject: seccomp: Move speculation migitation control to arch code The migitation control is simpler to implement in architecture code as it avoids the extra function call to check the mode. Aside of that having an explicit seccomp enabled mode in the architecture mitigations would require even more workarounds. Move it into architecture code and provide a weak function in the seccomp code. Remove the 'which' argument as this allows the architecture to decide which mitigations are relevant for seccomp. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++----------- include/linux/nospec.h | 2 ++ kernel/seccomp.c | 15 ++------------- 3 files changed, 22 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7e0f28160e5e..5dab4c3d26e7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -569,6 +569,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { @@ -587,17 +605,6 @@ static int ssb_prctl_get(struct task_struct *task) } } -int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(task, ctrl); - default: - return -ENODEV; - } -} - int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { diff --git a/include/linux/nospec.h b/include/linux/nospec.h index a908c954484d..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 53eb946120c1..e691d9a6c58d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -229,18 +229,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } -/* - * If a given speculation mitigation is opt-in (prctl()-controlled), - * select it, by disabling speculation (enabling mitigation). - */ -static inline void spec_mitigate(struct task_struct *task, - unsigned long which) -{ - int state = arch_prctl_spec_ctrl_get(task, which); - - if (state > 0 && (state & PR_SPEC_PRCTL)) - arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); -} +void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode, @@ -256,7 +245,7 @@ static inline void seccomp_assign_mode(struct task_struct *task, smp_mb__before_atomic(); /* Assume default seccomp processes want spec flaw mitigation. */ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) - spec_mitigate(task, PR_SPEC_STORE_BYPASS); + arch_seccomp_spec_mitigate(task); set_tsk_thread_flag(task, TIF_SECCOMP); } -- cgit v1.2.3-59-g8ed1b From f21b53b20c754021935ea43364dbf53778eeba32 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 14:37:54 -0700 Subject: x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass Unless explicitly opted out of, anything running under seccomp will have SSB mitigations enabled. Choosing the "prctl" mode will disable this. [ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ] Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner --- Documentation/admin-guide/kernel-parameters.txt | 26 +++++++++++++------- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 32 ++++++++++++++++++------- 3 files changed, 41 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8d2ae1e335b..f2040d46f095 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4049,19 +4049,27 @@ This parameter controls whether the Speculative Store Bypass optimization is used. - on - Unconditionally disable Speculative Store Bypass - off - Unconditionally enable Speculative Store Bypass - auto - Kernel detects whether the CPU model contains an - implementation of Speculative Store Bypass and - picks the most appropriate mitigation. - prctl - Control Speculative Store Bypass per thread - via prctl. Speculative Store Bypass is enabled - for a process by default. The state of the control - is inherited on fork. + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation. If the + CPU is not vulnerable, "off" is selected. If the + CPU is vulnerable the default mitigation is + architecture and Kconfig dependent. See below. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. + seccomp - Same as "prctl" above, but all seccomp threads + will disable SSB unless they explicitly opt out. Not specifying this option is equivalent to spec_store_bypass_disable=auto. + Default mitigations: + X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 71ad01422655..328ea3cb769f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5dab4c3d26e7..563d8e54c863 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -416,22 +416,25 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", - [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ - { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -481,8 +484,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* Choose prctl as the default mode */ - mode = SPEC_STORE_BYPASS_PRCTL; + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; @@ -530,12 +540,14 @@ static void ssb_select_mitigation() } #undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { bool update; - if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; switch (ctrl) { @@ -583,7 +595,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, #ifdef CONFIG_SECCOMP void arch_seccomp_spec_mitigate(struct task_struct *task) { - ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -592,6 +605,7 @@ static int ssb_prctl_get(struct task_struct *task) switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; -- cgit v1.2.3-59-g8ed1b From 9f65fb29374ee37856dbad847b4e121aab72b510 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 9 May 2018 21:41:38 +0200 Subject: x86/bugs: Rename _RDS to _SSBD Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2] as SSBD (Speculative Store Bypass Disable). Hence changing it. It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name is going to be. Following the rename it would be SSBD_NO but that rolls out to Speculative Store Bypass Disable No. Also fixed the missing space in X86_FEATURE_AMD_SSBD. [ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 4 ++-- arch/x86/include/asm/msr-index.h | 10 +++++----- arch/x86/include/asm/spec-ctrl.h | 12 ++++++------ arch/x86/include/asm/thread_info.h | 6 +++--- arch/x86/kernel/cpu/amd.c | 14 +++++++------- arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++------------------ arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/process.c | 8 ++++---- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 6 +++--- 11 files changed, 51 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b2464c1787df..4e1c747acbf8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -215,7 +215,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ +#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -336,7 +336,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ +#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 810f50bb338d..0da3ca260b06 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,8 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ -#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -70,10 +70,10 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_RDS_NO (1 << 4) /* +#define ARCH_CAP_SSBD_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass - * attack, so no Reduced Data Speculation control - * required. + * attack, so no Speculative Store Bypass + * control required. */ #define MSR_IA32_BBL_CR_CTL 0x00000119 diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 45ef00ad5105..dc21209790bf 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; +extern u64 x86_amd_ls_cfg_ssbd_mask; /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { - BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); - return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } -static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { - return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } extern void speculative_store_bypass_update(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e5c26cc59619..2ff2a30a264f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_RDS 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -106,7 +106,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_RDS (1 << TIF_RDS) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -146,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 18efc33a8d2e..7bde990b0385 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -567,12 +567,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } /* * Try to cache the base value so further operations can - * avoid RMW. If that faults, do not enable RDS. + * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { - setup_force_cpu_cap(X86_FEATURE_RDS); - setup_force_cpu_cap(X86_FEATURE_AMD_RDS); - x86_amd_ls_cfg_rds_mask = 1ULL << bit; + setup_force_cpu_cap(X86_FEATURE_SSBD); + setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } } @@ -920,9 +920,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { - set_cpu_cap(c, X86_FEATURE_RDS); - set_cpu_cap(c, X86_FEATURE_AMD_RDS); + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_AMD_SSBD); } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 563d8e54c863..09b116b7f3bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -45,10 +45,10 @@ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. - * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). */ u64 __ro_after_init x86_amd_ls_cfg_base; -u64 __ro_after_init x86_amd_ls_cfg_rds_mask; +u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -146,7 +146,7 @@ u64 x86_spec_ctrl_get_default(void) u64 msrval = x86_spec_ctrl_base; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); @@ -159,7 +159,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); @@ -174,18 +174,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); -static void x86_amd_rds_enable(void) +static void x86_amd_ssb_disable(void) { - u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } @@ -473,7 +473,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; - if (!boot_cpu_has(X86_FEATURE_RDS)) + if (!boot_cpu_has(X86_FEATURE_SSBD)) return mode; cmd = ssb_parse_cmdline(); @@ -507,7 +507,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. - * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ if (mode == SPEC_STORE_BYPASS_DISABLE) { @@ -518,12 +518,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_RDS; - x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; - x86_spec_ctrl_set(SPEC_CTRL_RDS); + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_set(SPEC_CTRL_SSBD); break; case X86_VENDOR_AMD: - x86_amd_rds_enable(); + x86_amd_ssb_disable(); break; } } @@ -556,16 +556,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; default: return -ERANGE; @@ -635,7 +635,7 @@ void x86_spec_ctrl_setup_ap(void) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) - x86_amd_rds_enable(); + x86_amd_ssb_disable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0517bcee446..9fbb388fadac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -959,7 +959,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_RDS_NO)) + !(ia32_cap & ARCH_CAP_SSBD_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ef3f9c01c274..0eab6c89c8d9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -189,7 +189,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - setup_clear_cpu_cap(X86_FEATURE_RDS); + setup_clear_cpu_cap(X86_FEATURE_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 397342725046..b77a091bf3b8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -283,11 +283,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_RDS)) { - msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { - msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } } @@ -329,7 +329,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_RDS) + if ((tifp ^ tifn) & _TIF_SSBD) __speculative_store_bypass_update(tifn); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 376ac9a2a2b9..865c9a769864 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -407,7 +407,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(RDS) | + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 16a111e44691..9b8d80bf3889 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3525,7 +3525,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDS)) + !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -3645,11 +3645,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDS)) + !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; -- cgit v1.2.3-59-g8ed1b From d66d8ff3d21667b41eddbe86b35ab411e40d8c5f Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 10 May 2018 22:47:18 +0200 Subject: x86/bugs: Fix __ssb_select_mitigation() return type __ssb_select_mitigation() returns one of the members of enum ssb_mitigation, not ssb_mitigation_cmd; fix the prototype to reflect that. Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 09b116b7f3bf..3287a42d4df4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -468,7 +468,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) return cmd; } -static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +static enum ssb_mitigation __init __ssb_select_mitigation(void) { enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; -- cgit v1.2.3-59-g8ed1b From 7bb4d366cba992904bffa4820d24e70a3de93e76 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 10 May 2018 22:47:32 +0200 Subject: x86/bugs: Make cpu_show_common() static cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so make it static. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3287a42d4df4..784753f0b41e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -640,7 +640,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS -ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) -- cgit v1.2.3-59-g8ed1b From ffed645e3be0e32f8e9ab068d257aee8d0fe8eec Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 11 May 2018 16:50:35 -0400 Subject: x86/bugs: Fix the parameters alignment and missing void Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static") Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 784753f0b41e..bab05913f864 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -531,7 +531,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) return mode; } -static void ssb_select_mitigation() +static void ssb_select_mitigation(void) { ssb_mode = __ssb_select_mitigation(); @@ -641,7 +641,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, - char *buf, unsigned int bug) + char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); -- cgit v1.2.3-59-g8ed1b From 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Sun, 13 May 2018 17:33:57 -0400 Subject: x86/cpu: Make alternative_msr_write work for 32-bit code Cast val and (val >> 32) to (u32), so that they fit in a general-purpose register in both 32-bit and 64-bit code. [ tglx: Made it u32 instead of uintptr_t ] Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") Signed-off-by: Jim Mattson Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds --- arch/x86/include/asm/nospec-branch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 328ea3cb769f..bc258e644e5e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -265,8 +265,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) : : "c" (msr), - "a" (val), - "d" (val >> 32), + "a" ((u32)val), + "d" ((u32)(val >> 32)), [feature] "i" (feature) : "memory"); } -- cgit v1.2.3-59-g8ed1b From 15e6c22fd8e5a42c5ed6d487b7c9fe44c2517765 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 May 2018 15:21:01 +0200 Subject: KVM: SVM: Move spec control call after restore of GS svm_vcpu_run() invokes x86_spec_ctrl_restore_host() after VMEXIT, but before the host GS is restored. x86_spec_ctrl_restore_host() uses 'current' to determine the host SSBD state of the thread. 'current' is GS based, but host GS is not yet restored and the access causes a triple fault. Move the call after the host GS restore. Fixes: 885f82bfbc6f x86/process: Allow runtime control of Speculative Store Bypass Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 437c1b371129..fa891c8af842 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5651,6 +5651,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5671,18 +5683,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_restore_host(svm->spec_ctrl); - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif - reload_tss(vcpu); local_irq_disable(); -- cgit v1.2.3-59-g8ed1b From e7c587da125291db39ddf1f49b18e5970adbac17 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 May 2018 18:15:14 +0200 Subject: x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel and AMD have different CPUID bits hence for those use synthetic bits which get set on the respective vendor's in init_speculation_control(). So that debacles like what the commit message of c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") talks about don't happen anymore. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Jörg Otte Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic --- arch/x86/include/asm/cpufeatures.h | 10 ++++++---- arch/x86/kernel/cpu/common.c | 14 ++++++++++---- arch/x86/kvm/cpuid.c | 10 +++++----- arch/x86/kvm/svm.c | 6 +++--- arch/x86/kvm/vmx.c | 9 ++------- 5 files changed, 26 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e1c747acbf8..cb9bca8cecdc 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -216,6 +215,9 @@ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ #define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -276,9 +278,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9fbb388fadac..e6cd38b20375 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -757,17 +757,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + set_cpu_cap(c, X86_FEATURE_IBRS); + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 865c9a769864..e5ba48599428 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -648,10 +648,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa891c8af842..1a7a811118a8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4108,7 +4108,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; msr_info->data = svm->spec_ctrl; @@ -4203,7 +4203,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -4230,7 +4230,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b8d80bf3889..210e1b63d4b5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3523,9 +3523,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -3643,9 +3641,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -3675,7 +3671,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; -- cgit v1.2.3-59-g8ed1b From 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 19:13:18 +0200 Subject: x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on Intel and implied by IBRS or STIBP support on AMD. That's just confusing and in case an AMD CPU has IBRS not supported because the underlying problem has been fixed but has another bit valid in the SPEC_CTRL MSR, the thing falls apart. Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the availability on both Intel and AMD. While at it replace the boot_cpu_has() checks with static_cpu_has() where possible. This prevents late microcode loading from exposing SPEC_CTRL, but late loading is already very limited as it does not reevaluate the mitigation options and other bits and pieces. Having static_cpu_has() is the simplest and least fragile solution. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 18 +++++++++++------- arch/x86/kernel/cpu/common.c | 9 +++++++-- arch/x86/kernel/cpu/intel.c | 1 + 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cb9bca8cecdc..7d34eb0d3715 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,6 +206,7 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bab05913f864..316cb24092a3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -64,7 +64,7 @@ void __init check_bugs(void) * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD * init code as it is not enumerated and depends on the family. */ - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); /* Select the proper spectre mitigation before patching alternatives */ @@ -145,7 +145,7 @@ u64 x86_spec_ctrl_get_default(void) { u64 msrval = x86_spec_ctrl_base; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } @@ -155,10 +155,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -170,10 +172,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -631,7 +635,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e6cd38b20375..af54dbe2df9a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -761,19 +761,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } if (cpu_has(c, X86_FEATURE_AMD_IBPB)) set_cpu_cap(c, X86_FEATURE_IBPB); - if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0eab6c89c8d9..dd37244c587a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -188,6 +188,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); } -- cgit v1.2.3-59-g8ed1b From 52817587e706686fcdb27f14c1b000c92f266c96 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:21:36 +0200 Subject: x86/cpufeatures: Disentangle SSBD enumeration The SSBD enumeration is similarly to the other bits magically shared between Intel and AMD though the mechanisms are different. Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific features or family dependent setup. Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is controlled via MSR_SPEC_CTRL and fix up the usage sites. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/cpufeatures.h | 7 +++---- arch/x86/kernel/cpu/amd.c | 7 +------ arch/x86/kernel/cpu/bugs.c | 10 +++++----- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/process.c | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7d34eb0d3715..61c34c1a525c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -207,15 +207,14 @@ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ - +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ @@ -339,7 +338,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7bde990b0385..2d2d8985654b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -570,8 +570,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); setup_force_cpu_cap(X86_FEATURE_SSBD); - setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } @@ -919,11 +919,6 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { - set_cpu_cap(c, X86_FEATURE_SSBD); - set_cpu_cap(c, X86_FEATURE_AMD_SSBD); - } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 316cb24092a3..7ebd6373fc31 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -159,8 +159,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -176,8 +176,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -189,7 +189,7 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index af54dbe2df9a..68282514c025 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -767,6 +767,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dd37244c587a..577e7f7ae273 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -191,6 +191,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b77a091bf3b8..d71ef7eaa7ef 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -283,7 +283,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { -- cgit v1.2.3-59-g8ed1b From d1035d971829dcf80e8686ccde26f94b0a069472 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 16:26:00 +0200 Subject: x86/cpufeatures: Add FEATURE_ZEN Add a ZEN feature bit so family-dependent static_cpu_has() optimizations can be built for ZEN. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/amd.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 61c34c1a525c..8099be4fc3e1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d2d8985654b..1b18be3f35a8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -812,6 +812,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. -- cgit v1.2.3-59-g8ed1b From 1f50ddb4f4189243c05926b842dc1a0332195f31 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 May 2018 21:53:09 +0200 Subject: x86/speculation: Handle HT correctly on AMD The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when hyperthreading is enabled the SSBD bit toggle needs to take both cores into account. Otherwise the following situation can happen: CPU0 CPU1 disable SSB disable SSB enable SSB <- Enables it for the Core, i.e. for CPU0 as well So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled again. On Intel the SSBD control is per core as well, but the synchronization logic is implemented behind the per thread SPEC_CTRL MSR. It works like this: CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL i.e. if one of the threads enables a mitigation then this affects both and the mitigation is only disabled in the core when both threads disabled it. Add the necessary synchronization logic for AMD family 17H. Unfortunately that requires a spinlock to serialize the access to the MSR, but the locks are only shared between siblings. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/spec-ctrl.h | 6 ++ arch/x86/kernel/process.c | 125 +++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/smpboot.c | 5 ++ 3 files changed, 130 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index dc21209790bf..0cb49c4564b0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + extern void speculative_store_bypass_update(void); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d71ef7eaa7ef..d6fe8648d3f6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -279,22 +279,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) { - u64 msr; + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { - msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); } else { - msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); } } +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); +} void speculative_store_bypass_update(void) { + preempt_disable(); __speculative_store_bypass_update(current_thread_info()->flags); + preempt_enable(); } void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0f1cbb042f49..9dd324ae4832 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -79,6 +79,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -244,6 +245,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock, set CPU online and bring the vector * allocator online. Online must be set with vector_lock held @@ -1292,6 +1295,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) -- cgit v1.2.3-59-g8ed1b From ccbcd2674472a978b48c91c1fbfb66c0ff959f24 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 May 2018 23:01:01 +0200 Subject: x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care about the bit position of the SSBD bit and thus facilitate migration. Also, the sibling coordination on Family 17H CPUs can only be done on the host. Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an extra argument for the VIRT_SPEC_CTRL MSR. Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU data structure which is going to be used in later patches for the actual implementation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/spec-ctrl.h | 9 ++++++--- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- arch/x86/kvm/svm.c | 11 +++++++++-- arch/x86/kvm/vmx.c | 4 ++-- 4 files changed, 35 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 0cb49c4564b0..6e2874049afd 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -10,10 +10,13 @@ * the guest has, while on VMEXIT we restore the host view. This * would be easier if SPEC_CTRL were architecturally maskable or * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); +extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); +extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7ebd6373fc31..d3afd38f30d1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -151,7 +151,15 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; @@ -168,7 +176,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1a7a811118a8..c07dbcc6d449 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -213,6 +213,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -5557,7 +5564,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - x86_spec_ctrl_set_guest(svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5681,7 +5688,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(svm->spec_ctrl); + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 210e1b63d4b5..5d733a03a6fa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9717,7 +9717,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; @@ -9865,7 +9865,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(vmx->spec_ctrl); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- cgit v1.2.3-59-g8ed1b From 11fb0683493b2da112cd64c9dada221b52463bf7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 17 May 2018 17:09:18 +0200 Subject: x86/speculation: Add virtualized speculative store bypass disable support Some AMD processors only support a non-architectural means of enabling speculative store bypass disable (SSBD). To allow a simplified view of this to a guest, an architectural definition has been created through a new CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f. With this, a hypervisor can virtualize the existence of this definition and provide an architectural method for using SSBD to a guest. Add the new CPUID feature, the new MSR and update the existing SSBD support to use this MSR when present. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 4 +++- arch/x86/kernel/process.c | 13 ++++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8099be4fc3e1..fb00a2fca990 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -282,6 +282,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 0da3ca260b06..562414d5b834 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -347,6 +347,8 @@ #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3afd38f30d1..82422a04b506 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -205,7 +205,9 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d6fe8648d3f6..91c3398286d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -388,6 +388,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn) } #endif +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + static __always_inline void intel_set_ssb_state(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); @@ -397,7 +406,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn) static __always_inline void __speculative_store_bypass_update(unsigned long tifn) { - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else intel_set_ssb_state(tifn); -- cgit v1.2.3-59-g8ed1b From 0270be3e34efb05a88bc4c422572ece038ef3608 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:31:44 +0200 Subject: x86/speculation: Rework speculative_store_bypass_update() The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse speculative_store_bypass_update() to avoid code duplication. Add an argument for supplying a thread info (TIF) value and create a wrapper speculative_store_bypass_update_current() which is used at the existing call site. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/spec-ctrl.h | 7 ++++++- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/process.c | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 6e2874049afd..82b6c5a0d61e 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(void); +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 82422a04b506..f2f0c1b3bf50 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -598,7 +598,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update(); + speculative_store_bypass_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 91c3398286d8..30ca2d1a9231 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -414,10 +414,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn intel_set_ssb_state(tifn); } -void speculative_store_bypass_update(void) +void speculative_store_bypass_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(current_thread_info()->flags); + __speculative_store_bypass_update(tif); preempt_enable(); } -- cgit v1.2.3-59-g8ed1b From cc69b34989210f067b2c51d5539b5f96ebcc3a01 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 May 2018 00:14:51 +0200 Subject: x86/bugs: Unify x86_spec_ctrl_{set_guest,restore_host} Function bodies are very similar and are going to grow more almost identical code. Add a bool arg to determine whether SPEC_CTRL is being set for the guest or restored to the host. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++++++--- arch/x86/kernel/cpu/bugs.c | 60 ++++++++++------------------------------ 2 files changed, 44 insertions(+), 49 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 82b6c5a0d61e..9cecbe5e57ee 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,10 +13,35 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); -extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f2f0c1b3bf50..feb7d597c265 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -151,55 +151,25 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -/** - * x86_spec_ctrl_set_guest - Set speculation control registers for the guest - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 host = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); - -/** - * x86_spec_ctrl_restore_host - Restore host speculation control registers - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -{ - u64 host = x86_spec_ctrl_base; - - /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, host); + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + host |= ssbd_tif_to_spec_ctrl(ti->flags); + + if (host != guest_spec_ctrl) { + msr = setguest ? guest_spec_ctrl : host; + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } + } } -EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); static void x86_amd_ssb_disable(void) { -- cgit v1.2.3-59-g8ed1b From fa8ac4988249c38476f6ad678a4848a736373403 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:49:16 +0200 Subject: x86/bugs: Expose x86_spec_ctrl_base directly x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR. x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to prevent modification to that variable. Though the variable is read only after init and globaly visible already. Remove the function and export the variable instead. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/nospec-branch.h | 16 +++++----------- arch/x86/include/asm/spec-ctrl.h | 3 --- arch/x86/kernel/cpu/bugs.c | 11 +---------- 3 files changed, 6 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bc258e644e5e..8d9deec00de9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,16 +217,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -/* - * The Intel specification for the SPEC_CTRL MSR requires that we - * preserve any already set reserved bits at boot time (e.g. for - * future additions that this kernel is not currently aware of). - * We then set any additional mitigation bits that we want - * ourselves and always use this as the base for SPEC_CTRL. - * We also use this when handling guest entry/exit as below. - */ extern void x86_spec_ctrl_set(u64); -extern u64 x86_spec_ctrl_get_default(void); /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { @@ -278,6 +269,9 @@ static inline void indirect_branch_prediction_barrier(void) alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -286,7 +280,7 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ \ preempt_disable(); \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ @@ -295,7 +289,7 @@ do { \ #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_get_default(); \ + u64 val = x86_spec_ctrl_base; \ \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 9cecbe5e57ee..763d49710329 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_ssbd_mask; -/* The Intel SPEC CTRL MSR base value cache */ -extern u64 x86_spec_ctrl_base; - static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index feb7d597c265..00f51deba493 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -36,6 +36,7 @@ static void __init ssb_select_mitigation(void); * writes to SPEC_CTRL contain whatever reserved bits have been set. */ u64 __ro_after_init x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); /* * The vendor and possibly platform specific bits which can be modified in @@ -141,16 +142,6 @@ void x86_spec_ctrl_set(u64 val) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); -u64 x86_spec_ctrl_get_default(void) -{ - u64 msrval = x86_spec_ctrl_base; - - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - return msrval; -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { -- cgit v1.2.3-59-g8ed1b From 4b59bdb569453a60b752b274ca61f009e37f4dae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:53:14 +0200 Subject: x86/bugs: Remove x86_spec_ctrl_set() x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there provide no real value as both call sites can just write x86_spec_ctrl_base to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra masking or checking. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/nospec-branch.h | 2 -- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8d9deec00de9..8b38df98548e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,8 +217,6 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -extern void x86_spec_ctrl_set(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 00f51deba493..e0b2e3b3301e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -133,15 +133,6 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -void x86_spec_ctrl_set(u64 val) -{ - if (val & x86_spec_ctrl_mask) - WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); - else - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -503,7 +494,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; - x86_spec_ctrl_set(SPEC_CTRL_SSBD); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: x86_amd_ssb_disable(); @@ -615,7 +606,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); -- cgit v1.2.3-59-g8ed1b From be6fcb5478e95bb1c91f489121238deb3abca46a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:10:00 +0200 Subject: x86/bugs: Rework spec_ctrl base and mask logic x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value which are not to be modified. However the implementation is not really used and the bitmask was inverted to make a check easier, which was removed in "x86/bugs: Remove x86_spec_ctrl_set()" Aside of that it is missing the STIBP bit if it is supported by the platform, so if the mask would be used in x86_virt_spec_ctrl() then it would prevent a guest from setting STIBP. Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to sanitize the value which is supplied by the guest. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov --- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e0b2e3b3301e..8e327bfec513 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. */ -static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. @@ -68,6 +68,10 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -136,18 +140,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { + u64 msrval, guestval, hostval = x86_spec_ctrl_base; struct thread_info *ti = current_thread_info(); - u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + /* SSBD controlled in MSR_SPEC_CTRL */ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(ti->flags); + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - if (host != guest_spec_ctrl) { - msr = setguest ? guest_spec_ctrl : host; - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } } @@ -493,7 +505,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: -- cgit v1.2.3-59-g8ed1b From 47c61b3955cf712cadfc25635bf9bc174af030ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:42:48 +0200 Subject: x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to x86_virt_spec_ctrl(). If either X86_FEATURE_LS_CFG_SSBD or X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl argument to check whether the state must be modified on the host. The update reuses speculative_store_bypass_update() so the ZEN-specific sibling coordination can be reused. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/spec-ctrl.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 763d49710329..ae7c2c5cd7f0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8e327bfec513..7416fc206b4a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,6 +162,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); -- cgit v1.2.3-59-g8ed1b From bc226f07dcd3c9ef0b7f6236fe356ea4a9cb4769 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 May 2018 22:06:39 +0200 Subject: KVM: SVM: Implement VIRT_SPEC_CTRL support for SSBD Expose the new virtualized architectural mechanism, VIRT_SSBD, for using speculative store bypass disable (SSBD) under SVM. This will allow guests to use SSBD on hardware that uses non-architectural mechanisms for enabling SSBD. [ tglx: Folded the migration fixup from Paolo Bonzini ] Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kvm/cpuid.c | 11 +++++++++-- arch/x86/kvm/svm.c | 21 +++++++++++++++++++-- arch/x86/kvm/vmx.c | 18 +++++++++++++++--- arch/x86/kvm/x86.c | 13 ++++--------- 6 files changed, 50 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c25775fad4ed..f4b2588865e9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -924,7 +924,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); struct kvm *(*vm_alloc)(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 68282514c025..b4247ed0c81e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -767,7 +767,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) set_cpu_cap(c, X86_FEATURE_SSBD); if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e5ba48599428..ced851169730 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -647,13 +647,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) entry->ebx |= F(AMD_IBPB); if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c07dbcc6d449..26110c202b19 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4120,6 +4120,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_F15H_IC_CFG: { int family, model; @@ -4251,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5791,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -7017,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5d733a03a6fa..0c57fb4df2d1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9477,9 +9477,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -12625,7 +12637,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vm_init = vmx_vm_init, .vm_alloc = vmx_vm_alloc, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51ecd381793b..421a39e40d5e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1058,6 +1058,7 @@ static u32 emulated_msrs[] = { MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2903,7 +2904,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4603,14 +4604,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; -- cgit v1.2.3-59-g8ed1b From faf37c44a105f3608115785f17cbbf3500f8bc71 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 18 May 2018 11:37:42 +1000 Subject: powerpc/64s: Clear PCR on boot Clear the PCR (Processor Compatibility Register) on boot to ensure we are not running in a compatibility mode. We've seen this cause problems when a crash (and kdump) occurs while running compat mode guests. The kdump kernel then runs with the PCR set and causes problems. The symptom in the kdump kernel (also seen in petitboot after fast-reboot) is early userspace programs taking sigills on newer instructions (seen in libc). Signed-off-by: Michael Neuling Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 6 ++++++ arch/powerpc/kernel/dt_cpu_ftrs.c | 1 + 2 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 3f30c994e931..458b928dbd84 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ab51f6ca03a..c904477abaf3 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); + mtspr(SPRN_PCR, 0); } mtspr(SPRN_FSCR, system_registers.fscr); -- cgit v1.2.3-59-g8ed1b From 240da953fcc6a9008c92fae5b1f727ee5ed167ab Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 May 2018 23:18:09 -0400 Subject: x86/bugs: Rename SSBD_NO to SSB_NO The "336996 Speculative Execution Side Channel Mitigations" from May defines this as SSB_NO, hence lets sync-up. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr-index.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 562414d5b834..fda2114197b3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -70,7 +70,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SSBD_NO (1 << 4) /* +#define ARCH_CAP_SSB_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass * attack, so no Speculative Store Bypass * control required. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b4247ed0c81e..78decc3e3067 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -974,7 +974,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSBD_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) -- cgit v1.2.3-59-g8ed1b From 32c3fa7cdf0c4a3eb8405fc3e13398de019e828b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 May 2018 17:44:57 +0100 Subject: arm64: lse: Add early clobbers to some input/output asm operands For LSE atomics that read and write a register operand, we need to ensure that these operands are annotated as "early clobber" if the register is written before all of the input operands have been consumed. Failure to do so can result in the compiler allocating the same register to both operands, leading to splats such as: Unable to handle kernel paging request at virtual address 11111122222221 [...] x1 : 1111111122222222 x0 : 1111111122222221 Process swapper/0 (pid: 1, stack limit = 0x000000008209f908) Call trace: test_atomic64+0x1360/0x155c where x0 has been allocated as both the value to be stored and also the atomic_t pointer. This patch adds the missing clobbers. Cc: Cc: Dave Martin Cc: Robin Murphy Reported-by: Mark Salter Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_lse.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ -- cgit v1.2.3-59-g8ed1b From 255845fc43a3aaf806852a1d3bc89bff1411ebe3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 28 Apr 2018 00:42:52 +0200 Subject: arm64: export tishift functions to modules Otherwise modules that use these arithmetic operations will fail to link. We accomplish this with the usual EXPORT_SYMBOL, which on most architectures goes in the .S file but the ARM64 maintainers prefer that insead it goes into arm64ksyms. While we're at it, we also fix this up to use SPDX, and I personally choose to relicense this as GPL2||BSD so that these symbols don't need to be export_symbol_gpl, so all modules can use the routines, since these are important general purpose compiler-generated function calls. Signed-off-by: Jason A. Donenfeld Reported-by: PaX Team Cc: stable@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/arm64ksyms.c | 8 ++++++++ arch/arm64/lib/tishift.S | 15 ++------------- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..d894a20b70b2 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index d3db9b2cd479..0fdff97794de 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -1,17 +1,6 @@ -/* - * Copyright (C) 2017 Jason A. Donenfeld . All Rights Reserved. +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * Copyright (C) 2017-2018 Jason A. Donenfeld . All Rights Reserved. */ #include -- cgit v1.2.3-59-g8ed1b From a048a07d7f4535baa4cbad6bc024f175317ab938 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 May 2018 09:00:00 +1000 Subject: powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some CPUs we can prevent a vulnerability related to store-to-load forwarding by preventing store forwarding between privilege domains, by inserting a barrier in kernel entry and exit paths. This is known to be the case on at least Power7, Power8 and Power9 powerpc CPUs. Barriers must be inserted generally before the first load after moving to a higher privilege, and after the last store before moving to a lower privilege, HV and PR privilege transitions must be protected. Barriers are added as patch sections, with all kernel/hypervisor entry points patched, and the exit points to lower privilge levels patched similarly to the RFI flush patching. Firmware advertisement is not implemented yet, so CPU flush types are hard coded. Thanks to Michal Suchánek for bug fixes and review. Signed-off-by: Nicholas Piggin Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Michael Neuling Signed-off-by: Michal Suchánek Signed-off-by: Michael Ellerman Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/exception-64s.h | 29 ++++++ arch/powerpc/include/asm/feature-fixups.h | 19 ++++ arch/powerpc/include/asm/security_features.h | 11 ++ arch/powerpc/kernel/exceptions-64s.S | 19 +++- arch/powerpc/kernel/security.c | 149 +++++++++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 14 +++ arch/powerpc/lib/feature-fixups.c | 115 +++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 1 + arch/powerpc/platforms/pseries/setup.c | 1 + 9 files changed, 356 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 471b2274fbeb..c40b4380951c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -90,16 +111,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -108,21 +132,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..a9b64df34e2a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -199,6 +215,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; void apply_feature_fixups(void); diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fa4d2e1cf772..44989b22383c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -12,6 +12,17 @@ extern unsigned long powerpc_security_features; extern bool rfi_flush; +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + static inline void security_ftr_set(unsigned long feature) { powerpc_security_features |= feature; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae6a849db60b..f283958129f2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1507,6 +1509,19 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bab5a27ea805..b98a722da915 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,7 @@ #include #include +#include #include @@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c8af90ff49f0..b8d82678f8b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -133,6 +133,20 @@ SECTIONS RO_DATA(PAGE_SIZE) #ifdef CONFIG_PPC64 + . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 288fe4f0db4e..e1bcdc32a851 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include #include #include +#include #include struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ef8c9ce53a61..a6648ec99ca7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b55ad4286dc7..fdb32e056ef4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); -- cgit v1.2.3-59-g8ed1b From cc19846079a70abcfd91b5a0791a5f17d69458a5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 22 May 2018 17:11:20 +0100 Subject: arm64: fault: Don't leak data in ESR context for user fault on kernel VA If userspace faults on a kernel address, handing them the raw ESR value on the sigframe as part of the delivered signal can leak data useful to attackers who are using information about the underlying hardware fault type (e.g. translation vs permission) as a mechanism to defeat KASLR. However there are also legitimate uses for the information provided in the ESR -- notably the GCC and LLVM sanitizers use this to report whether wild pointer accesses by the application are reads or writes (since a wild write is a more serious bug than a wild read), so we don't want to drop the ESR information entirely. For faulting addresses in the kernel, sanitize the ESR. We choose to present userspace with the illusion that there is nothing mapped in the kernel's part of the address space at all, by reporting all faults as level 0 translation faults taken to EL1. These fields are safe to pass through to userspace as they depend only on the instruction that userspace used to provoke the fault: EC IL (always) ISV CM WNR (for all data aborts) All the other fields in ESR except DFSC are architecturally RES0 for an L0 translation fault taken to EL1, so can be zeroed out without confusing userspace. The illusion is not entirely perfect, as there is a tiny wrinkle where we will report an alignment fault that was not due to the memory type (for instance a LDREX to an unaligned address) as a translation fault, whereas if you do this on real unmapped memory the alignment fault takes precedence. This is not likely to trip anybody up in practice, as the only users we know of for the ESR information who care about the behaviour for kernel addresses only really want to know about the WnR bit. Signed-off-by: Peter Maydell Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..2af3dd89bcdb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, static void __do_user_fault(struct siginfo *info, unsigned int esr) { current->thread.fault_address = (unsigned long)info->si_addr; + + /* + * If the faulting address is in the kernel, we must sanitize the ESR. + * From userspace's point of view, kernel-only mappings don't exist + * at all, so we report them as level 0 translation faults. + * (This is not quite the way that "no mapping there at all" behaves: + * an alignment fault not caused by the memory type would take + * precedence over translation fault for a real access to empty + * space. Unfortunately we can't easily distinguish "alignment fault + * not caused by memory type" from "alignment fault caused by memory + * type", so we ignore this wrinkle and just return the translation + * fault.) + */ + if (current->thread.fault_address >= TASK_SIZE) { + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_LOW: + /* + * These bits provide only information about the + * faulting instruction, which userspace knows already. + * We explicitly clear bits which are architecturally + * RES0 in case they are given meanings in future. + * We always report the ESR as if the fault was taken + * to EL1 and so ISV and the bits in ISS[23:14] are + * clear. (In fact it always will be a fault to EL1.) + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | + ESR_ELx_CM | ESR_ELx_WNR; + esr |= ESR_ELx_FSC_FAULT; + break; + case ESR_ELx_EC_IABT_LOW: + /* + * Claim a level 0 translation fault. + * All other bits are architecturally RES0 for faults + * reported with that DFSC value, so we clear them. + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; + esr |= ESR_ELx_FSC_FAULT; + break; + default: + /* + * This should never happen (entry.S only brings us + * into this code for insn and data aborts from a lower + * exception level). Fail safe by not providing an ESR + * context record at all. + */ + WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } + } + current->thread.fault_code = esr; arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } -- cgit v1.2.3-59-g8ed1b From 6db615431a21b6057f68ed87583a663ee69f7601 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 May 2018 16:04:51 +0200 Subject: alpha: use dma_direct_ops for jensen The generic dma_direct implementation does the same thing as the alpha pci-noop implementation, just with more bells and whistles. And unlike the current code it at least has a theoretical chance to actually compile. Signed-off-by: Christoph Hellwig Signed-off-by: Matt Turner --- arch/alpha/Kconfig | 1 + arch/alpha/include/asm/dma-mapping.h | 4 ++++ arch/alpha/kernel/pci-noop.c | 33 --------------------------------- 3 files changed, 5 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b2022885ced8..f19dc31288c8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -211,6 +211,7 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" depends on BROKEN + select DMA_DIRECT_OPS help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one of the first-generation Alpha systems. A number of these systems diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index b78f61f20796..76ce923ecca1 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -6,7 +6,11 @@ extern const struct dma_map_ops *dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { +#ifdef CONFIG_ALPHA_JENSEN + return &dma_direct_ops; +#else return dma_ops; +#endif } #endif /* _ALPHA_DMA_MAPPING_H */ diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index b6ebb65127a8..c7c5879869d3 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, else return -ENODEV; } - -static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - if (!dev || *dev->dma_mask >= 0xffffffffUL) - gfp &= ~GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} - -static int alpha_noop_supported(struct device *dev, u64 mask) -{ - return mask < 0x00ffffffUL ? 0 : 1; -} - -const struct dma_map_ops alpha_noop_ops = { - .alloc = alpha_noop_alloc_coherent, - .free = dma_noop_free_coherent, - .map_page = dma_noop_map_page, - .map_sg = dma_noop_map_sg, - .mapping_error = dma_noop_mapping_error, - .dma_supported = alpha_noop_supported, -}; - -const struct dma_map_ops *dma_ops = &alpha_noop_ops; -EXPORT_SYMBOL(dma_ops); -- cgit v1.2.3-59-g8ed1b From f5e82fa26063e6fad10624ff600457d878fa6e41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 May 2018 16:04:52 +0200 Subject: alpha: simplify get_arch_dma_ops Remove the dma_ops indirection. Signed-off-by: Christoph Hellwig Signed-off-by: Matt Turner --- arch/alpha/include/asm/dma-mapping.h | 4 ++-- arch/alpha/kernel/pci_iommu.c | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index 76ce923ecca1..8beeafd4f68e 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -2,14 +2,14 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -extern const struct dma_map_ops *dma_ops; +extern const struct dma_map_ops alpha_pci_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_ALPHA_JENSEN return &dma_direct_ops; #else - return dma_ops; + return &alpha_pci_ops; #endif } diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 83b34b9188ea..6923b0d9c1e1 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = { .mapping_error = alpha_pci_mapping_error, .dma_supported = alpha_pci_supported, }; - -const struct dma_map_ops *dma_ops = &alpha_pci_ops; -EXPORT_SYMBOL(dma_ops); +EXPORT_SYMBOL(alpha_pci_ops); -- cgit v1.2.3-59-g8ed1b From 92d7223a74235054f2aa7227d207d9c57f84dca0 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 16 Apr 2018 18:16:56 -0400 Subject: alpha: io: reorder barriers to guarantee writeX() and iowriteX() ordering #2 memory-barriers.txt has been updated with the following requirement. "When using writel(), a prior wmb() is not needed to guarantee that the cache coherent memory writes have completed before writing to the MMIO region." Current writeX() and iowriteX() implementations on alpha are not satisfying this requirement as the barrier is after the register write. Move mb() in writeX() and iowriteX() functions to guarantee that HW observes memory changes before performing register operations. Signed-off-by: Sinan Kaya Reported-by: Arnd Bergmann Signed-off-by: Matt Turner --- arch/alpha/kernel/io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 3e3d49c254c5..c025a3e5e357 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr) void iowrite8(u8 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); } void iowrite16(u16 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); } void iowrite32(u32 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); } EXPORT_SYMBOL(ioread8); @@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr) void writeb(u8 b, volatile void __iomem *addr) { - __raw_writeb(b, addr); mb(); + __raw_writeb(b, addr); } void writew(u16 b, volatile void __iomem *addr) { - __raw_writew(b, addr); mb(); + __raw_writew(b, addr); } void writel(u32 b, volatile void __iomem *addr) { - __raw_writel(b, addr); mb(); + __raw_writel(b, addr); } void writeq(u64 b, volatile void __iomem *addr) { - __raw_writeq(b, addr); mb(); + __raw_writeq(b, addr); } EXPORT_SYMBOL(readb); -- cgit v1.2.3-59-g8ed1b From 82034c23fcbc2389c73d97737f61fa2dd6526413 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 23 May 2018 11:43:46 -0700 Subject: arm64: Make sure permission updates happen for pmd/pud Commit 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings") disallowed block mappings for ioremap since that code does not honor break-before-make. The same APIs are also used for permission updating though and the extra checks prevent the permission updates from happening, even though this should be permitted. This results in read-only permissions not being fully applied. Visibly, this can occasionaly be seen as a failure on the built in rodata test when the test data ends up in a section or as an odd RW gap on the page table dump. Fix this by using pgattr_change_is_safe instead of p*d_present for determining if the change is permitted. Reviewed-by: Kees Cook Tested-by: Peter Robinson Reported-by: Peter Robinson Fixes: 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings") Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2dbb2c9f1ec1..493ff75670ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } -- cgit v1.2.3-59-g8ed1b From d883c6cf3b39f1f42506e82ad2779fb88004acf3 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 23 May 2018 10:18:21 +0900 Subject: Revert "mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts the following commits that change CMA design in MM. 3d2054ad8c2d ("ARM: CMA: avoid double mapping to the CMA area if CONFIG_HIGHMEM=y") 1d47a3ec09b5 ("mm/cma: remove ALLOC_CMA") bad8c6c0b114 ("mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE") Ville reported a following error on i386. Inode-cache hash table entries: 65536 (order: 6, 262144 bytes) microcode: microcode updated early to revision 0x4, date = 2013-06-28 Initializing CPU#0 Initializing HighMem for node 0 (000377fe:00118000) Initializing Movable for node 0 (00000001:00118000) BUG: Bad page state in process swapper pfn:377fe page:f53effc0 count:0 mapcount:-127 mapping:00000000 index:0x0 flags: 0x80000000() raw: 80000000 00000000 00000000 ffffff80 00000000 00000100 00000200 00000001 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.17.0-rc5-elk+ #145 Hardware name: Dell Inc. Latitude E5410/03VXMC, BIOS A15 07/11/2013 Call Trace: dump_stack+0x60/0x96 bad_page+0x9a/0x100 free_pages_check_bad+0x3f/0x60 free_pcppages_bulk+0x29d/0x5b0 free_unref_page_commit+0x84/0xb0 free_unref_page+0x3e/0x70 __free_pages+0x1d/0x20 free_highmem_page+0x19/0x40 add_highpages_with_active_regions+0xab/0xeb set_highmem_pages_init+0x66/0x73 mem_init+0x1b/0x1d7 start_kernel+0x17a/0x363 i386_start_kernel+0x95/0x99 startup_32_smp+0x164/0x168 The reason for this error is that the span of MOVABLE_ZONE is extended to whole node span for future CMA initialization, and, normal memory is wrongly freed here. I submitted the fix and it seems to work, but, another problem happened. It's so late time to fix the later problem so I decide to reverting the series. Reported-by: Ville Syrjälä Acked-by: Laura Abbott Acked-by: Michal Hocko Cc: Andrew Morton Signed-off-by: Joonsoo Kim Signed-off-by: Linus Torvalds --- arch/arm/mm/dma-mapping.c | 16 +------- include/linux/memory_hotplug.h | 3 ++ include/linux/mm.h | 1 - mm/cma.c | 83 ++++++------------------------------------ mm/compaction.c | 4 +- mm/internal.h | 4 +- mm/page_alloc.c | 83 +++++++++++++++--------------------------- 7 files changed, 49 insertions(+), 145 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8c398fedbbb6..ada8eb206a90 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) void __init dma_contiguous_remap(void) { int i; - - if (!dma_mmu_remap_num) - return; - - /* call flush_cache_all() since CMA area would be large enough */ - flush_cache_all(); for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; @@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void) flush_tlb_kernel_range(__phys_to_virt(start), __phys_to_virt(end)); - /* - * All the memory in CMA region will be on ZONE_MOVABLE. - * If that zone is considered as highmem, the memory in CMA - * region is also considered as highmem even if it's - * physical address belong to lowmem. In this case, - * re-mapping isn't required. - */ - if (!is_highmem_idx(ZONE_MOVABLE)) - iotable_init(&map, 1); + iotable_init(&map, 1); } } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0e49b5b1ee1..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -216,6 +216,9 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fa9a255dbf..02a616e2f17d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2109,7 +2109,6 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); -extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; diff --git a/mm/cma.c b/mm/cma.c index aa40e6c7b042..5809bbe360d7 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -39,7 +39,6 @@ #include #include "cma.h" -#include "internal.h" struct cma cma_areas[MAX_CMA_AREAS]; unsigned cma_area_count; @@ -110,25 +109,23 @@ static int __init cma_activate_area(struct cma *cma) if (!cma->bitmap) return -ENOMEM; + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + do { unsigned j; base_pfn = pfn; - if (!pfn_valid(base_pfn)) - goto err; - - zone = page_zone(pfn_to_page(base_pfn)); for (j = pageblock_nr_pages; j; --j, pfn++) { - if (!pfn_valid(pfn)) - goto err; - + WARN_ON_ONCE(!pfn_valid(pfn)); /* - * In init_cma_reserved_pageblock(), present_pages - * is adjusted with assumption that all pages in - * the pageblock come from a single zone. + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. */ if (page_zone(pfn_to_page(pfn)) != zone) - goto err; + goto not_in_zone; } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); @@ -142,7 +139,7 @@ static int __init cma_activate_area(struct cma *cma) return 0; -err: +not_in_zone: pr_err("CMA area %s could not be activated\n", cma->name); kfree(cma->bitmap); cma->count = 0; @@ -152,41 +149,6 @@ err: static int __init cma_init_reserved_areas(void) { int i; - struct zone *zone; - pg_data_t *pgdat; - - if (!cma_area_count) - return 0; - - for_each_online_pgdat(pgdat) { - unsigned long start_pfn = UINT_MAX, end_pfn = 0; - - zone = &pgdat->node_zones[ZONE_MOVABLE]; - - /* - * In this case, we cannot adjust the zone range - * since it is now maximum node span and we don't - * know original zone range. - */ - if (populated_zone(zone)) - continue; - - for (i = 0; i < cma_area_count; i++) { - if (pfn_to_nid(cma_areas[i].base_pfn) != - pgdat->node_id) - continue; - - start_pfn = min(start_pfn, cma_areas[i].base_pfn); - end_pfn = max(end_pfn, cma_areas[i].base_pfn + - cma_areas[i].count); - } - - if (!end_pfn) - continue; - - zone->zone_start_pfn = start_pfn; - zone->spanned_pages = end_pfn - start_pfn; - } for (i = 0; i < cma_area_count; i++) { int ret = cma_activate_area(&cma_areas[i]); @@ -195,32 +157,9 @@ static int __init cma_init_reserved_areas(void) return ret; } - /* - * Reserved pages for ZONE_MOVABLE are now activated and - * this would change ZONE_MOVABLE's managed page counter and - * the other zones' present counter. We need to re-calculate - * various zone information that depends on this initialization. - */ - build_all_zonelists(NULL); - for_each_populated_zone(zone) { - if (zone_idx(zone) == ZONE_MOVABLE) { - zone_pcp_reset(zone); - setup_zone_pageset(zone); - } else - zone_pcp_update(zone); - - set_zone_contiguous(zone); - } - - /* - * We need to re-init per zone wmark by calling - * init_per_zone_wmark_min() but doesn't call here because it is - * registered on core_initcall and it will be called later than us. - */ - return 0; } -pure_initcall(cma_init_reserved_areas); +core_initcall(cma_init_reserved_areas); /** * cma_init_reserved_mem() - create custom contiguous area from reserved memory diff --git a/mm/compaction.c b/mm/compaction.c index 028b7210a669..29bd1df18b98 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1450,12 +1450,14 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, * if compaction succeeds. * For costly orders, we require low watermark instead of min for * compaction to proceed to increase its chances. + * ALLOC_CMA is used, as pages in CMA pageblocks are considered + * suitable migration targets */ watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? low_wmark_pages(zone) : min_wmark_pages(zone); watermark += compact_gap(order); if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, - 0, wmark_target)) + ALLOC_CMA, wmark_target)) return COMPACT_SKIPPED; return COMPACT_CONTINUE; diff --git a/mm/internal.h b/mm/internal.h index 62d8c34e63d5..502d14189794 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -168,9 +168,6 @@ extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -498,6 +495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 905db9d7962f..511a7124d7f9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1743,38 +1743,16 @@ void __init page_alloc_init_late(void) } #ifdef CONFIG_CMA -static void __init adjust_present_page_count(struct page *page, long count) -{ - struct zone *zone = page_zone(page); - - /* We don't need to hold a lock since it is boot-up process */ - zone->present_pages += count; -} - /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void __init init_cma_reserved_pageblock(struct page *page) { unsigned i = pageblock_nr_pages; - unsigned long pfn = page_to_pfn(page); struct page *p = page; - int nid = page_to_nid(page); - - /* - * ZONE_MOVABLE will steal present pages from other zones by - * changing page links so page_zone() is changed. Before that, - * we need to adjust previous zone's page count first. - */ - adjust_present_page_count(page, -pageblock_nr_pages); do { __ClearPageReserved(p); set_page_count(p, 0); - - /* Steal pages from other zones */ - set_page_links(p, ZONE_MOVABLE, nid, pfn); - } while (++p, ++pfn, --i); - - adjust_present_page_count(page, pageblock_nr_pages); + } while (++p, --i); set_pageblock_migratetype(page, MIGRATE_CMA); @@ -2889,7 +2867,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * exists. */ watermark = min_wmark_pages(zone) + (1UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -3165,6 +3143,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); +#endif + /* * Check watermarks for an order-0 allocation request. If these * are not met, then a high-order request also cannot go ahead @@ -3191,8 +3175,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } #ifdef CONFIG_CMA - if (!list_empty(&area->free_list[MIGRATE_CMA])) + if ((alloc_flags & ALLOC_CMA) && + !list_empty(&area->free_list[MIGRATE_CMA])) { return true; + } #endif if (alloc_harder && !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) @@ -3212,6 +3198,13 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags) { long free_pages = zone_page_state(z, NR_FREE_PAGES); + long cma_pages = 0; + +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES); +#endif /* * Fast check for order-0 only. If this fails then the reserves @@ -3220,7 +3213,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * the caller is !atomic then it'll uselessly search the free * list. That corner case is then slower but it is harmless. */ - if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx]) + if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx]) return true; return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, @@ -3856,6 +3849,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; +#ifdef CONFIG_CMA + if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) + alloc_flags |= ALLOC_CMA; +#endif return alloc_flags; } @@ -4322,6 +4319,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false; + if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) + *alloc_flags |= ALLOC_CMA; + return true; } @@ -6204,7 +6204,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) { enum zone_type j; int nid = pgdat->node_id; - unsigned long node_end_pfn = 0; pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING @@ -6232,13 +6231,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; unsigned long zone_start_pfn = zone->zone_start_pfn; - unsigned long movable_size = 0; size = zone->spanned_pages; realsize = freesize = zone->present_pages; - if (zone_end_pfn(zone) > node_end_pfn) - node_end_pfn = zone_end_pfn(zone); - /* * Adjust freesize so that it accounts for how much memory @@ -6287,30 +6282,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone_seqlock_init(zone); zone_pcp_init(zone); - /* - * The size of the CMA area is unknown now so we need to - * prepare the memory for the usemap at maximum. - */ - if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE && - pgdat->node_spanned_pages) { - movable_size = node_end_pfn - pgdat->node_start_pfn; - } - - if (!size && !movable_size) + if (!size) continue; set_pageblock_order(); - if (movable_size) { - zone->zone_start_pfn = pgdat->node_start_pfn; - zone->spanned_pages = movable_size; - setup_usemap(pgdat, zone, - pgdat->node_start_pfn, movable_size); - init_currently_empty_zone(zone, - pgdat->node_start_pfn, movable_size); - } else { - setup_usemap(pgdat, zone, zone_start_pfn, size); - init_currently_empty_zone(zone, zone_start_pfn, size); - } + setup_usemap(pgdat, zone, zone_start_pfn, size); + init_currently_empty_zone(zone, zone_start_pfn, size); memmap_init(size, nid, j, zone_start_pfn); } } @@ -7951,7 +7928,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) } #endif -#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA +#ifdef CONFIG_MEMORY_HOTPLUG /* * The zone indicated has a new number of managed_pages; batch sizes and percpu * page high values need to be recalulated. -- cgit v1.2.3-59-g8ed1b