From 8a4b06d391b0a42a373808979b5028f5c84d9c6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:51:43 +0100 Subject: x86/speculation/mds: Add sysfs reporting for MDS Add the sysfs reporting file for MDS. It exposes the vulnerability and mitigation state similar to the existing files for the other speculative hardware vulnerabilities. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/cpu.h') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5041357d0297..3c87ad888ed3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.3-8-gc7d7 From de7b77e5bb9451417ca57f1b6501da654587c048 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 27 Mar 2019 07:00:29 -0500 Subject: cpu/hotplug: Create SMT sysfs interface for all arches Make the /sys/devices/system/cpu/smt/* files available on all arches, so user space has a consistent way to detect whether SMT is enabled. The 'control' file now shows 'notimplemented' for architectures which don't yet have CONFIG_HOTPLUG_SMT. [ tglx: Make notimplemented a real state ] Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Andrea Arcangeli Cc: Waiman Long Cc: Peter Zijlstra Cc: Jiri Kosina Link: https://lkml.kernel.org/r/469c2b98055f2c41e75748e06447d592a64080c9.1553635520.git.jpoimboe@redhat.com --- Documentation/ABI/testing/sysfs-devices-system-cpu | 10 ++-- include/linux/cpu.h | 3 +- kernel/cpu.c | 64 +++++++++++++--------- 3 files changed, 47 insertions(+), 30 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 9605dbd4b5b5..5eea46fefcb2 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -511,10 +511,12 @@ Description: Control Symetric Multi Threading (SMT) control: Read/write interface to control SMT. Possible values: - "on" SMT is enabled - "off" SMT is disabled - "forceoff" SMT is force disabled. Cannot be changed. - "notsupported" SMT is not supported by the CPU + "on" SMT is enabled + "off" SMT is disabled + "forceoff" SMT is force disabled. Cannot be changed. + "notsupported" SMT is not supported by the CPU + "notimplemented" SMT runtime toggling is not + implemented for the architecture If control status is "forceoff" or "notsupported" writes are rejected. diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5041357d0297..ae99dde02320 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -175,6 +175,7 @@ enum cpuhp_smt_control { CPU_SMT_DISABLED, CPU_SMT_FORCE_DISABLED, CPU_SMT_NOT_SUPPORTED, + CPU_SMT_NOT_IMPLEMENTED, }; #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) @@ -182,7 +183,7 @@ extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); extern void cpu_smt_check_topology(void); #else -# define cpu_smt_control (CPU_SMT_ENABLED) +# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 6754f3ecfd94..b8bf3f93e39b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2033,19 +2033,6 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = { #ifdef CONFIG_HOTPLUG_SMT -static const char *smt_states[] = { - [CPU_SMT_ENABLED] = "on", - [CPU_SMT_DISABLED] = "off", - [CPU_SMT_FORCE_DISABLED] = "forceoff", - [CPU_SMT_NOT_SUPPORTED] = "notsupported", -}; - -static ssize_t -show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]); -} - static void cpuhp_offline_cpu_device(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); @@ -2116,9 +2103,10 @@ static int cpuhp_smt_enable(void) return ret; } + static ssize_t -store_smt_control(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +__store_smt_control(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { int ctrlval, ret; @@ -2156,14 +2144,44 @@ store_smt_control(struct device *dev, struct device_attribute *attr, unlock_device_hotplug(); return ret ? ret : count; } + +#else /* !CONFIG_HOTPLUG_SMT */ +static ssize_t +__store_smt_control(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return -ENODEV; +} +#endif /* CONFIG_HOTPLUG_SMT */ + +static const char *smt_states[] = { + [CPU_SMT_ENABLED] = "on", + [CPU_SMT_DISABLED] = "off", + [CPU_SMT_FORCE_DISABLED] = "forceoff", + [CPU_SMT_NOT_SUPPORTED] = "notsupported", + [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented", +}; + +static ssize_t +show_smt_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + const char *state = smt_states[cpu_smt_control]; + + return snprintf(buf, PAGE_SIZE - 2, "%s\n", state); +} + +static ssize_t +store_smt_control(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + return __store_smt_control(dev, attr, buf, count); +} static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control); static ssize_t show_smt_active(struct device *dev, struct device_attribute *attr, char *buf) { - bool active = topology_max_smt_threads() > 1; - - return snprintf(buf, PAGE_SIZE - 2, "%d\n", active); + return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active()); } static DEVICE_ATTR(active, 0444, show_smt_active, NULL); @@ -2179,21 +2197,17 @@ static const struct attribute_group cpuhp_smt_attr_group = { NULL }; -static int __init cpu_smt_state_init(void) +static int __init cpu_smt_sysfs_init(void) { return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group); } -#else -static inline int cpu_smt_state_init(void) { return 0; } -#endif - static int __init cpuhp_sysfs_init(void) { int cpu, ret; - ret = cpu_smt_state_init(); + ret = cpu_smt_sysfs_init(); if (ret) return ret; @@ -2214,7 +2228,7 @@ static int __init cpuhp_sysfs_init(void) return 0; } device_initcall(cpuhp_sysfs_init); -#endif +#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */ /* * cpu_bit_bitmap[] is a special, "compressed" data structure that -- cgit v1.3-8-gc7d7 From 98af8452945c55652de68536afdde3b520fec429 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:28 -0500 Subject: cpu/speculation: Add 'mitigations=' cmdline option Keeping track of the number of mitigations for all the CPU speculation bugs has become overwhelming for many users. It's getting more and more complicated to decide which mitigations are needed for a given architecture. Complicating matters is the fact that each arch tends to have its own custom way to mitigate the same vulnerability. Most users fall into a few basic categories: a) they want all mitigations off; b) they want all reasonable mitigations on, with SMT enabled even if it's vulnerable; or c) they want all reasonable mitigations on, with SMT disabled if vulnerable. Define a set of curated, arch-independent options, each of which is an aggregation of existing options: - mitigations=off: Disable all mitigations. - mitigations=auto: [default] Enable all the default mitigations, but leave SMT enabled, even if it's vulnerable. - mitigations=auto,nosmt: Enable all the default mitigations, disabling SMT if needed by a mitigation. Currently, these options are placeholders which don't actually do anything. They will be fleshed out in upcoming patches. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com --- Documentation/admin-guide/kernel-parameters.txt | 24 ++++++++++++++++++++++++ include/linux/cpu.h | 24 ++++++++++++++++++++++++ kernel/cpu.c | 15 +++++++++++++++ 3 files changed, 63 insertions(+) (limited to 'include/linux/cpu.h') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 858b6c0b9a15..720ffa9c4e04 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2513,6 +2513,30 @@ in the "bleeding edge" mini2440 support kernel at http://repo.or.cz/w/linux-2.6/mini2440.git + mitigations= + Control optional mitigations for CPU vulnerabilities. + This is a set of curated, arch-independent options, each + of which is an aggregation of existing arch-specific + options. + + off + Disable all optional CPU mitigations. This + improves system performance, but it may also + expose users to several CPU vulnerabilities. + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT + enabled, even if it's vulnerable. This is for + users who don't want to be surprised by SMT + getting disabled across kernel upgrades, or who + have other ways of avoiding SMT-based attacks. + This is the default behavior. + + auto,nosmt + Mitigate all CPU vulnerabilities, disabling SMT + if needed. This is for users who always want to + be fully mitigated, even if it means losing SMT. + mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this parameter allows control of the logging verbosity for diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5041357d0297..2d9c6f4b78f5 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -187,4 +187,28 @@ static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } #endif +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +extern enum cpu_mitigations cpu_mitigations; + +/* mitigations=off */ +static inline bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} + +/* mitigations=auto,nosmt */ +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index d1c6d152da89..e70a90634b41 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2279,3 +2279,18 @@ void __init boot_cpu_hotplug_init(void) #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } + +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; + +static int __init mitigations_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + cpu_mitigations = CPU_MITIGATIONS_OFF; + else if (!strcmp(arg, "auto")) + cpu_mitigations = CPU_MITIGATIONS_AUTO; + else if (!strcmp(arg, "auto,nosmt")) + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + + return 0; +} +early_param("mitigations", mitigations_parse_cmdline); -- cgit v1.3-8-gc7d7 From 2f1a6fbbef7781382850c3104ecb658f21b5d460 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Apr 2019 13:34:45 +1000 Subject: power/suspend: Add function to disable secondaries for suspend This adds a function to disable secondary CPUs for suspend that are not necessarily non-zero / non-boot CPUs. Platforms will be able to use this to suspend using non-zero CPUs. Signed-off-by: Nicholas Piggin Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linuxppc-dev@lists.ozlabs.org Link: https://lkml.kernel.org/r/20190411033448.20842-3-npiggin@gmail.com Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 12 ++++++++++++ kernel/kexec_core.c | 4 ++-- kernel/power/hibernate.c | 12 ++++++------ kernel/power/suspend.c | 4 ++-- 4 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5041357d0297..7ab2f09c0a14 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -137,9 +137,21 @@ static inline int disable_nonboot_cpus(void) return freeze_secondary_cpus(0); } extern void enable_nonboot_cpus(void); + +static inline int suspend_disable_secondary_cpus(void) +{ + return freeze_secondary_cpus(0); +} +static inline void suspend_enable_secondary_cpus(void) +{ + return enable_nonboot_cpus(); +} + #else /* !CONFIG_PM_SLEEP_SMP */ static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} +static inline int suspend_disable_secondary_cpus(void) { return 0; } +static inline void suspend_enable_secondary_cpus(void) { } #endif /* !CONFIG_PM_SLEEP_SMP */ void cpu_startup_entry(enum cpuhp_state state); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index d7140447be75..fd5c95ff9251 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1150,7 +1150,7 @@ int kernel_kexec(void) error = dpm_suspend_end(PMSG_FREEZE); if (error) goto Resume_devices; - error = disable_nonboot_cpus(); + error = suspend_disable_secondary_cpus(); if (error) goto Enable_cpus; local_irq_disable(); @@ -1183,7 +1183,7 @@ int kernel_kexec(void) Enable_irqs: local_irq_enable(); Enable_cpus: - enable_nonboot_cpus(); + suspend_enable_secondary_cpus(); dpm_resume_start(PMSG_RESTORE); Resume_devices: dpm_resume_end(PMSG_RESTORE); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index abef759de7c8..cfc7a57049e4 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -281,7 +281,7 @@ static int create_image(int platform_mode) if (error || hibernation_test(TEST_PLATFORM)) goto Platform_finish; - error = disable_nonboot_cpus(); + error = suspend_disable_secondary_cpus(); if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; @@ -323,7 +323,7 @@ static int create_image(int platform_mode) local_irq_enable(); Enable_cpus: - enable_nonboot_cpus(); + suspend_enable_secondary_cpus(); Platform_finish: platform_finish(platform_mode); @@ -417,7 +417,7 @@ int hibernation_snapshot(int platform_mode) int __weak hibernate_resume_nonboot_cpu_disable(void) { - return disable_nonboot_cpus(); + return suspend_disable_secondary_cpus(); } /** @@ -486,7 +486,7 @@ static int resume_target_kernel(bool platform_mode) local_irq_enable(); Enable_cpus: - enable_nonboot_cpus(); + suspend_enable_secondary_cpus(); Cleanup: platform_restore_cleanup(platform_mode); @@ -564,7 +564,7 @@ int hibernation_platform_enter(void) if (error) goto Platform_finish; - error = disable_nonboot_cpus(); + error = suspend_disable_secondary_cpus(); if (error) goto Enable_cpus; @@ -586,7 +586,7 @@ int hibernation_platform_enter(void) local_irq_enable(); Enable_cpus: - enable_nonboot_cpus(); + suspend_enable_secondary_cpus(); Platform_finish: hibernation_ops->finish(); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0bd595a0b610..59b6def23046 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -428,7 +428,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (suspend_test(TEST_PLATFORM)) goto Platform_wake; - error = disable_nonboot_cpus(); + error = suspend_disable_secondary_cpus(); if (error || suspend_test(TEST_CPUS)) goto Enable_cpus; @@ -458,7 +458,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) BUG_ON(irqs_disabled()); Enable_cpus: - enable_nonboot_cpus(); + suspend_enable_secondary_cpus(); Platform_wake: platform_resume_noirq(state); -- cgit v1.3-8-gc7d7 From 9ca12ac04bb7d7cfb28aa549dcd3d15761f15543 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Apr 2019 13:34:46 +1000 Subject: kernel/cpu: Allow non-zero CPU to be primary for suspend / kexec freeze This patch provides an arch option, ARCH_SUSPEND_NONZERO_CPU, to opt-in to allowing suspend to occur on one of the housekeeping CPUs rather than hardcoded CPU0. This will allow CPU0 to be a nohz_full CPU with a later change. It may be possible for platforms with hardware/firmware restrictions on suspend/wake effectively support this by handing off the final stage to CPU0 when kernel housekeeping is no longer required. Another option is to make housekeeping / nohz_full mask dynamic at runtime, but the complexity could not be justified at this time. Signed-off-by: Nicholas Piggin Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linuxppc-dev@lists.ozlabs.org Link: https://lkml.kernel.org/r/20190411033448.20842-4-npiggin@gmail.com Signed-off-by: Ingo Molnar --- arch/powerpc/Kconfig | 4 ++++ include/linux/cpu.h | 7 ++++++- kernel/cpu.c | 10 +++++++++- kernel/power/Kconfig | 9 +++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2d0be82c3061..bc98b0e37a10 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -318,6 +318,10 @@ config ARCH_SUSPEND_POSSIBLE (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \ || 44x || 40x +config ARCH_SUSPEND_NONZERO_CPU + def_bool y + depends on PPC_POWERNV || PPC_PSERIES + config PPC_DCR_NATIVE bool diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7ab2f09c0a14..73baab8535c1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -140,7 +140,12 @@ extern void enable_nonboot_cpus(void); static inline int suspend_disable_secondary_cpus(void) { - return freeze_secondary_cpus(0); + int cpu = 0; + + if (IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) + cpu = -1; + + return freeze_secondary_cpus(cpu); } static inline void suspend_enable_secondary_cpus(void) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 6754f3ecfd94..d1bf6e2b4752 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1199,8 +1200,15 @@ int freeze_secondary_cpus(int primary) int cpu, error = 0; cpu_maps_update_begin(); - if (!cpu_online(primary)) + if (primary == -1) { primary = cpumask_first(cpu_online_mask); + if (!housekeeping_cpu(primary, HK_FLAG_TIMER)) + primary = housekeeping_any_cpu(HK_FLAG_TIMER); + } else { + if (!cpu_online(primary)) + primary = cpumask_first(cpu_online_mask); + } + /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index f8fe57d1022e..9bbaaab14b36 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -114,6 +114,15 @@ config PM_SLEEP_SMP depends on PM_SLEEP select HOTPLUG_CPU +config PM_SLEEP_SMP_NONZERO_CPU + def_bool y + depends on PM_SLEEP_SMP + depends on ARCH_SUSPEND_NONZERO_CPU + ---help--- + If an arch can suspend (for suspend, hibernate, kexec, etc) on a + non-zero numbered CPU, it may define ARCH_SUSPEND_NONZERO_CPU. This + will allow nohz_full mask to include CPU0. + config PM_AUTOSLEEP bool "Opportunistic sleep" depends on PM_SLEEP -- cgit v1.3-8-gc7d7 From ec527c318036a65a083ef68d8ba95789d2212246 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 30 May 2019 00:09:39 +0200 Subject: x86/power: Fix 'nosmt' vs hibernation triple fault during resume As explained in 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once") we always, no matter what, have to bring up x86 HT siblings during boot at least once in order to avoid first MCE bringing the system to its knees. That means that whenever 'nosmt' is supplied on the kernel command-line, all the HT siblings are as a result sitting in mwait or cpudile after going through the online-offline cycle at least once. This causes a serious issue though when a kernel, which saw 'nosmt' on its commandline, is going to perform resume from hibernation: if the resume from the hibernated image is successful, cr3 is flipped in order to point to the address space of the kernel that is being resumed, which in turn means that all the HT siblings are all of a sudden mwaiting on address which is no longer valid. That results in triple fault shortly after cr3 is switched, and machine reboots. Fix this by always waking up all the SMT siblings before initiating the 'restore from hibernation' process; this guarantees that all the HT siblings will be properly carried over to the resumed kernel waiting in resume_play_dead(), and acted upon accordingly afterwards, based on the target kernel configuration. Symmetricaly, the resumed kernel has to push the SMT siblings to mwait again in case it has SMT disabled; this means it has to online all the siblings when resuming (so that they come out of hlt) and offline them again to let them reach mwait. Cc: 4.19+ # v4.19+ Debugged-by: Thomas Gleixner Fixes: 0cc3cd21657b ("cpu/hotplug: Boot HT siblings at least once") Signed-off-by: Jiri Kosina Acked-by: Pavel Machek Reviewed-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Rafael J. Wysocki --- arch/x86/power/cpu.c | 10 ++++++++++ arch/x86/power/hibernate.c | 33 +++++++++++++++++++++++++++++++++ include/linux/cpu.h | 4 ++++ kernel/cpu.c | 4 ++-- kernel/power/hibernate.c | 9 +++++++++ 5 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index a7d966964c6f..513ce09e9950 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -299,7 +299,17 @@ int hibernate_resume_nonboot_cpu_disable(void) * address in its instruction pointer may not be possible to resolve * any more at that point (the page tables used by it previously may * have been overwritten by hibernate image data). + * + * First, make sure that we wake up all the potentially disabled SMT + * threads which have been initially brought up and then put into + * mwait/cpuidle sleep. + * Those will be put to proper (not interfering with hibernation + * resume) sleep afterwards, and the resumed kernel will decide itself + * what to do with them. */ + ret = cpuhp_smt_enable(); + if (ret) + return ret; smp_ops.play_dead = resume_play_dead; ret = disable_nonboot_cpus(); smp_ops.play_dead = play_dead; diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index 4845b8c7be7f..fc413717a45f 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -245,3 +246,35 @@ out: __flush_tlb_all(); return 0; } + +int arch_resume_nosmt(void) +{ + int ret = 0; + /* + * We reached this while coming out of hibernation. This means + * that SMT siblings are sleeping in hlt, as mwait is not safe + * against control transition during resume (see comment in + * hibernate_resume_nonboot_cpu_disable()). + * + * If the resumed kernel has SMT disabled, we have to take all the + * SMT siblings out of hlt, and offline them again so that they + * end up in mwait proper. + * + * Called with hotplug disabled. + */ + cpu_hotplug_enable(); + if (cpu_smt_control == CPU_SMT_DISABLED || + cpu_smt_control == CPU_SMT_FORCE_DISABLED) { + enum cpuhp_smt_control old = cpu_smt_control; + + ret = cpuhp_smt_enable(); + if (ret) + goto out; + ret = cpuhp_smt_disable(old); + if (ret) + goto out; + } +out: + cpu_hotplug_disable(); + return ret; +} diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3813fe45effd..fcb1386bb0d4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -201,10 +201,14 @@ enum cpuhp_smt_control { extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); extern void cpu_smt_check_topology(void); +extern int cpuhp_smt_enable(void); +extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); #else # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } +static inline int cpuhp_smt_enable(void) { return 0; } +static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif /* diff --git a/kernel/cpu.c b/kernel/cpu.c index f2ef10460698..077fde6fb953 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2061,7 +2061,7 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) +int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2093,7 +2093,7 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } -static int cpuhp_smt_enable(void) +int cpuhp_smt_enable(void) { int cpu, ret = 0; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c8c272df7154..b65635753e8e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -257,6 +257,11 @@ void swsusp_show_speed(ktime_t start, ktime_t stop, (kps % 1000) / 10); } +__weak int arch_resume_nosmt(void) +{ + return 0; +} + /** * create_image - Create a hibernation image. * @platform_mode: Whether or not to use the platform driver. @@ -324,6 +329,10 @@ static int create_image(int platform_mode) Enable_cpus: suspend_enable_secondary_cpus(); + /* Allow architectures to do nosmt-specific post-resume dances */ + if (!in_suspend) + error = arch_resume_nosmt(); + Platform_finish: platform_finish(platform_mode); -- cgit v1.3-8-gc7d7