diff options
Diffstat (limited to 'arch/arm64/kernel/smp.c')
-rw-r--r-- | arch/arm64/kernel/smp.c | 432 |
1 files changed, 297 insertions, 135 deletions
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 62ed361a4376..3b3f6b56e733 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -32,7 +32,9 @@ #include <linux/irq_work.h> #include <linux/kernel_stat.h> #include <linux/kexec.h> +#include <linux/kgdb.h> #include <linux/kvm_host.h> +#include <linux/nmi.h> #include <asm/alternative.h> #include <asm/atomic.h> @@ -51,12 +53,8 @@ #include <asm/ptrace.h> #include <asm/virt.h> -#define CREATE_TRACE_POINTS #include <trace/events/ipi.h> -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -70,16 +68,24 @@ enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, - IPI_CPU_CRASH_STOP, + IPI_CPU_STOP_NMI, IPI_TIMER, IPI_IRQ_WORK, - IPI_WAKEUP, - NR_IPI + NR_IPI, + /* + * Any enum >= NR_IPI and < MAX_IPI is special and not tracable + * with trace_ipi_* + */ + IPI_CPU_BACKTRACE = NR_IPI, + IPI_KGDB_ROUNDUP, + MAX_IPI }; -static int ipi_irq_base __read_mostly; -static int nr_ipi __read_mostly = NR_IPI; -static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; +static int ipi_irq_base __ro_after_init; +static int nr_ipi __ro_after_init = NR_IPI; +static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init; + +static bool crash_stop; static void ipi_setup(int cpu); @@ -125,7 +131,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } @@ -216,7 +223,7 @@ asmlinkage notrace void secondary_start_kernel(void) if (system_uses_irq_prio_masking()) init_gic_priority_masking(); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); trace_hardirqs_off(); /* @@ -257,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Secondary CPUs enter the kernel with all DAIF exceptions masked. + * + * As with setup_arch() we must unmask Debug and SError exceptions, and + * as the root irqchip has already been detected and initialized we can + * unmask IRQ and FIQ at the same time. + */ local_daif_restore(DAIF_PROCCTX); /* @@ -333,17 +347,13 @@ static int op_cpu_kill(unsigned int cpu) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * Called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int err; - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); /* @@ -361,7 +371,7 @@ void __cpu_die(unsigned int cpu) * Called from the idle thread for the CPU which has been shutdown. * */ -void cpu_die(void) +void __noreturn cpu_die(void) { unsigned int cpu = smp_processor_id(); const struct cpu_operations *ops = get_cpu_ops(cpu); @@ -370,8 +380,8 @@ void cpu_die(void) local_daif_mask(); - /* Tell __cpu_die() that this CPU is now safe to dispose of */ - (void)cpu_report_death(); + /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */ + cpuhp_ap_report_dead(); /* * Actually shutdown the CPU. This must never fail. The specific hotplug @@ -398,7 +408,7 @@ static void __cpu_try_die(int cpu) * Kill the calling secondary CPU, early in bringup before it is turned * online. */ -void cpu_die_early(void) +void __noreturn cpu_die_early(void) { int cpu = smp_processor_id(); @@ -406,7 +416,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); - rcu_report_dead(cpu); + rcutree_report_cpu_dead(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); @@ -436,9 +446,9 @@ static void __init hyp_mode_check(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - setup_cpu_features(); hyp_mode_check(); - apply_alternatives_all(); + setup_system_features(); + setup_user_features(); mark_linear_text_alias_ro(); } @@ -450,20 +460,17 @@ void __init smp_prepare_boot_cpu(void) * freed shortly, so we must move over to the runtime per-cpu area. */ set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - cpuinfo_store_boot_cpu(); - /* - * We now know enough about the boot CPU to apply the - * alternatives that cannot wait until interrupt handling - * and/or scheduling is enabled. - */ - apply_boot_alternatives(); + cpuinfo_store_boot_cpu(); + setup_boot_cpu_features(); /* Conditionally switch to GIC PMR for interrupt masking */ if (system_uses_irq_prio_masking()) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* @@ -505,6 +512,59 @@ static int __init smp_cpu_setup(int cpu) static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +int arch_register_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (!acpi_disabled && !acpi_handle && + IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU)) + return -EPROBE_DEFER; + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + /* For now block anything that looks like physical CPU Hotplug */ + if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } +#endif + + /* + * Availability of the acpi handle is sufficient to establish + * that _STA has aleady been checked. No need to recheck here. + */ + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +void arch_unregister_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + acpi_status status; + unsigned long long sta; + + if (!acpi_handle) { + pr_err_once("Removing a CPU without associated ACPI handle\n"); + return; + } + + status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + /* For now do not allow anything that looks like physical CPU HP */ + if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return; + } + + unregister_cpu(c); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; @@ -525,7 +585,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!(processor->flags & ACPI_MADT_ENABLED)) { + if (!(processor->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } @@ -744,8 +805,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ for_each_possible_cpu(cpu) { - per_cpu(cpu_number, cpu) = cpu; - if (cpu == smp_processor_id()) continue; @@ -762,14 +821,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static const char *ipi_types[NR_IPI] __tracepoint_string = { +static const char *ipi_types[MAX_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_CPU_STOP_NMI] = "CPU stop NMIs", [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_WAKEUP] = "CPU wake-up interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -780,7 +840,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) { unsigned int cpu, i; - for (i = 0; i < NR_IPI; i++) { + for (i = 0; i < MAX_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) @@ -802,13 +862,6 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL -void arch_send_wakeup_ipi_mask(const struct cpumask *mask) -{ - smp_cross_call(mask, IPI_WAKEUP); -} -#endif - #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -816,9 +869,9 @@ void arch_irq_work_raise(void) } #endif -static void local_cpu_stop(void) +static void __noreturn local_cpu_stop(unsigned int cpu) { - set_cpu_online(smp_processor_id(), false); + set_cpu_online(cpu, false); local_daif_mask(); sdei_mask_local_cpu(); @@ -830,23 +883,28 @@ static void local_cpu_stop(void) * that cpu_online_mask gets correctly updated and smp_send_stop() can skip * CPUs that have already stopped themselves. */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void) { - local_cpu_stop(); + local_cpu_stop(smp_processor_id()); } -#ifdef CONFIG_KEXEC_CORE -static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); -#endif - -static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) { #ifdef CONFIG_KEXEC_CORE + /* + * Use local_daif_mask() instead of local_irq_disable() to make sure + * that pseudo-NMIs are disabled. The "crash stop" code starts with + * an IRQ and falls back to NMI (which might be pseudo). If the IRQ + * finally goes through right as we're timing out then the NMI could + * interrupt us. It's better to prevent the NMI and let the IRQ + * finish since the pt_regs will be better. + */ + local_daif_mask(); + crash_save_cpu(regs, cpu); - atomic_dec(&waiting_for_crash_ipi); + set_cpu_online(cpu, false); - local_irq_disable(); sdei_mask_local_cpu(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) @@ -854,9 +912,43 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) /* just in case */ cpu_park_loop(); +#else + BUG(); #endif } +static void arm64_backtrace_ipi(cpumask_t *mask) +{ + __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) +{ + /* + * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name, + * nothing about it truly needs to be implemented using an NMI, it's + * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi() + * returned false our backtrace attempt will just use a regular IPI. + */ + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + int this_cpu = raw_smp_processor_id(); + int cpu; + + for_each_online_cpu(cpu) { + /* No need to roundup ourselves */ + if (cpu == this_cpu) + continue; + + __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu); + } +} +#endif + /* * Main handler for inter-processor interrupts */ @@ -865,7 +957,7 @@ static void do_handle_IPI(int ipinr) unsigned int cpu = smp_processor_id(); if ((unsigned)ipinr < NR_IPI) - trace_ipi_entry_rcuidle(ipi_types[ipinr]); + trace_ipi_entry(ipi_types[ipinr]); switch (ipinr) { case IPI_RESCHEDULE: @@ -877,14 +969,12 @@ static void do_handle_IPI(int ipinr) break; case IPI_CPU_STOP: - local_cpu_stop(); - break; - - case IPI_CPU_CRASH_STOP: - if (IS_ENABLED(CONFIG_KEXEC_CORE)) { + case IPI_CPU_STOP_NMI: + if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) { ipi_cpu_crash_stop(cpu, get_irq_regs()); - unreachable(); + } else { + local_cpu_stop(cpu); } break; @@ -900,13 +990,17 @@ static void do_handle_IPI(int ipinr) break; #endif -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL - case IPI_WAKEUP: - WARN_ONCE(!acpi_parking_protocol_valid(cpu), - "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", - cpu); + case IPI_CPU_BACKTRACE: + /* + * NOTE: in some cases this _won't_ be NMI context. See the + * comment in arch_trigger_cpumask_backtrace(). + */ + nmi_cpu_backtrace(get_irq_regs()); + break; + + case IPI_KGDB_ROUNDUP: + kgdb_nmicallback(cpu, get_irq_regs()); break; -#endif default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); @@ -914,7 +1008,7 @@ static void do_handle_IPI(int ipinr) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit_rcuidle(ipi_types[ipinr]); + trace_ipi_exit(ipi_types[ipinr]); } static irqreturn_t ipi_handler(int irq, void *data) @@ -929,6 +1023,21 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) __ipi_send_mask(ipi_desc[ipinr], target); } +static bool ipi_should_be_nmi(enum ipi_msg_type ipi) +{ + if (!system_uses_irq_prio_masking()) + return false; + + switch (ipi) { + case IPI_CPU_STOP_NMI: + case IPI_CPU_BACKTRACE: + case IPI_KGDB_ROUNDUP: + return true; + default: + return false; + } +} + static void ipi_setup(int cpu) { int i; @@ -936,8 +1045,14 @@ static void ipi_setup(int cpu) if (WARN_ON_ONCE(!ipi_irq_base)) return; - for (i = 0; i < nr_ipi; i++) - enable_percpu_irq(ipi_irq_base + i, 0); + for (i = 0; i < nr_ipi; i++) { + if (ipi_should_be_nmi(i)) { + prepare_percpu_nmi(ipi_irq_base + i); + enable_percpu_nmi(ipi_irq_base + i, 0); + } else { + enable_percpu_irq(ipi_irq_base + i, 0); + } + } } #ifdef CONFIG_HOTPLUG_CPU @@ -948,8 +1063,14 @@ static void ipi_teardown(int cpu) if (WARN_ON_ONCE(!ipi_irq_base)) return; - for (i = 0; i < nr_ipi; i++) - disable_percpu_irq(ipi_irq_base + i); + for (i = 0; i < nr_ipi; i++) { + if (ipi_should_be_nmi(i)) { + disable_percpu_nmi(ipi_irq_base + i); + teardown_percpu_nmi(ipi_irq_base + i); + } else { + disable_percpu_irq(ipi_irq_base + i); + } + } } #endif @@ -957,15 +1078,23 @@ void __init set_smp_ipi_range(int ipi_base, int n) { int i; - WARN_ON(n < NR_IPI); - nr_ipi = min(n, NR_IPI); + WARN_ON(n < MAX_IPI); + nr_ipi = min(n, MAX_IPI); for (i = 0; i < nr_ipi; i++) { int err; - err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); - WARN_ON(err); + if (ipi_should_be_nmi(i)) { + err = request_percpu_nmi(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN(err, "Could not request IPI %d as NMI, err=%d\n", + i, err); + } else { + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN(err, "Could not request IPI %d as IRQ, err=%d\n", + i, err); + } ipi_desc[i] = irq_to_desc(ipi_base + i); irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); @@ -977,11 +1106,22 @@ void __init set_smp_ipi_range(int ipi_base, int n) ipi_setup(smp_processor_id()); } -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi(unsigned int cpu) +{ + /* + * We use a scheduler IPI to wake the CPU as this avoids the need for a + * dedicated IPI and we can safely handle spurious scheduler IPIs. + */ + smp_send_reschedule(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { @@ -1002,90 +1142,112 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { + static unsigned long stop_in_progress; + cpumask_t mask; unsigned long timeout; - if (num_other_online_cpus()) { - cpumask_t mask; + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + goto skip_ipi; - cpumask_copy(&mask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &mask); + /* Only proceed if this is the first CPU to reach this code */ + if (test_and_set_bit(0, &stop_in_progress)) + return; - if (system_state <= SYSTEM_RUNNING) - pr_crit("SMP: stopping secondary CPUs\n"); - smp_cross_call(&mask, IPI_CPU_STOP); - } + /* + * Send an IPI to all currently online CPUs except the CPU running + * this code. + * + * NOTE: we don't do anything here to prevent other CPUs from coming + * online after we snapshot `cpu_online_mask`. Ideally, the calling code + * should do something to prevent other CPUs from coming up. This code + * can be called in the panic path and thus it doesn't seem wise to + * grab the CPU hotplug mutex ourselves. Worst case: + * - If a CPU comes online as we're running, we'll likely notice it + * during the 1 second wait below and then we'll catch it when we try + * with an NMI (assuming NMIs are enabled) since we re-snapshot the + * mask before sending an NMI. + * - If we leave the function and see that CPUs are still online we'll + * at least print a warning. Especially without NMIs this function + * isn't foolproof anyway so calling code will just have to accept + * the fact that there could be cases where a CPU can't be stopped. + */ + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + if (system_state <= SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); - /* Wait up to one second for other CPUs to stop */ + /* + * Start with a normal IPI and wait up to one second for other CPUs to + * stop. We do this first because it gives other processors a chance + * to exit critical sections / drop locks and makes the rest of the + * stop process (especially console flush) more robust. + */ + smp_cross_call(&mask, IPI_CPU_STOP); timeout = USEC_PER_SEC; while (num_other_online_cpus() && timeout--) udelay(1); - if (num_other_online_cpus()) + /* + * If CPUs are still online, try an NMI. There's no excuse for this to + * be slow, so we only give them an extra 10 ms to respond. + */ + if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) { + smp_rmb(); + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + pr_info("SMP: retry stop with NMI for CPUs %*pbl\n", + cpumask_pr_args(&mask)); + + smp_cross_call(&mask, IPI_CPU_STOP_NMI); + timeout = USEC_PER_MSEC * 10; + while (num_other_online_cpus() && timeout--) + udelay(1); + } + + if (num_other_online_cpus()) { + smp_rmb(); + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(cpu_online_mask)); + cpumask_pr_args(&mask)); + } +skip_ipi: sdei_mask_local_cpu(); } #ifdef CONFIG_KEXEC_CORE void crash_smp_send_stop(void) { - static int cpus_stopped; - cpumask_t mask; - unsigned long timeout; - /* * This function can be called twice in panic path, but obviously * we execute this only once. + * + * We use this same boolean to tell whether the IPI we send was a + * stop or a "crash stop". */ - if (cpus_stopped) - return; - - cpus_stopped = 1; - - /* - * If this cpu is the only one alive at this point in time, online or - * not, there are no stop messages to be sent around, so just back out. - */ - if (num_other_online_cpus() == 0) { - sdei_mask_local_cpu(); + if (crash_stop) return; - } - - cpumask_copy(&mask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &mask); - - atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); - - pr_crit("SMP: stopping secondary CPUs\n"); - smp_cross_call(&mask, IPI_CPU_CRASH_STOP); - - /* Wait up to one second for other CPUs to stop */ - timeout = USEC_PER_SEC; - while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) - udelay(1); + crash_stop = 1; - if (atomic_read(&waiting_for_crash_ipi) > 0) - pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", - cpumask_pr_args(&mask)); + smp_send_stop(); - sdei_mask_local_cpu(); + sdei_handler_abort(); } bool smp_crash_stop_failed(void) { - return (atomic_read(&waiting_for_crash_ipi) > 0); + return num_other_online_cpus() != 0; } #endif -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - static bool have_cpu_die(void) { #ifdef CONFIG_HOTPLUG_CPU |