aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/interrupt.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r--arch/powerpc/kernel/interrupt.c484
1 files changed, 311 insertions, 173 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e0938ba298f2..21bbd615ca41 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -3,6 +3,7 @@
#include <linux/context_tracking.h>
#include <linux/err.h>
#include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
@@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS];
typedef long (*syscall_fn)(long, long, long, long, long, long);
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+ return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ if (exit_must_hard_disable() || !restartable)
+ __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (exit_must_hard_disable() || !restartable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
/* Has to run notrace because it is entered not completely "reconciled" */
notrace long system_call_exception(long r3, long r4, long r5,
long r6, long r7, long r8,
@@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
return f(r3, r4, r5, r6, r7, r8);
}
-/*
- * local irqs must be disabled. Returns false if the caller must re-enable
- * them, check for new work, and try again.
- *
- * This should be called with local irqs disabled, but if they were previously
- * enabled when the interrupt handler returns (indicating a process-context /
- * synchronous interrupt) then irqs_enabled should be true.
- */
-static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
-{
- /* This must be done with RI=1 because tracing may touch vmaps */
- trace_hardirqs_on();
-
- /* This pattern matches prep_irq_for_idle */
- if (clear_ri)
- __hard_EE_RI_disable();
- else
- __hard_irq_disable();
-#ifdef CONFIG_PPC64
- if (unlikely(lazy_irq_pending_nocheck())) {
- /* Took an interrupt, may have more exit work to do. */
- if (clear_ri)
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- return false;
- }
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);
-#endif
- return true;
-}
-
-static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
-{
- if (__prep_irq_for_enabled_exit(clear_ri))
- return true;
-
- /*
- * Must replay pending soft-masked interrupts now. Don't just
- * local_irq_enabe(); local_irq_disable(); because if we are
- * returning from an asynchronous interrupt here, another one
- * might hit after irqs are enabled, and it would exit via this
- * same path allowing another to fire, and so on unbounded.
- *
- * If interrupts were enabled when this interrupt exited,
- * indicating a process context (synchronous) interrupt,
- * local_irq_enable/disable can be used, which will enable
- * interrupts rather than keeping them masked (unclear how
- * much benefit this is over just replaying for all cases,
- * because we immediately disable again, so all we're really
- * doing is allowing hard interrupts to execute directly for
- * a very small time, rather than being masked and replayed).
- */
- if (irqs_enabled) {
- local_irq_enable();
- local_irq_disable();
- } else {
- replay_soft_interrupts();
- }
-
- return false;
-}
-
static notrace void booke_load_dbcr0(void)
{
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-/*
- * This should be called after a syscall returns, with r3 the return value
- * from the syscall. If this function returns non-zero, the system call
- * exit assembly should additionally load all GPR registers and CTR and XER
- * from the interrupt frame.
- *
- * The function graph tracer can not trace the return side of this function,
- * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
- */
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs,
- long scv)
+static void check_return_regs_valid(struct pt_regs *regs)
{
- unsigned long ti_flags;
- unsigned long ret = 0;
- bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
- CT_WARN_ON(ct_state() == CONTEXT_USER);
+ if (trap_is_scv(regs))
+ return;
- kuap_assert_locked();
+ trap = regs->trap;
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
- regs->result = r3;
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
- /* Check whether the syscall is issued inside a restartable sequence */
- rseq_syscall(regs);
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ barrier();
- ti_flags = current_thread_info()->flags;
+ if (!*validp)
+ return;
- if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
- if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
- r3 = -r3;
- regs->ccr |= 0x10000000; /* Set SO bit in CR */
- }
+ if (!warned) {
+ warned = true;
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
}
- if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
- if (ti_flags & _TIF_RESTOREALL)
- ret = _TIF_RESTOREALL;
- else
- regs->gpr[3] = r3;
- clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
- } else {
- regs->gpr[3] = r3;
- }
-
- if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
- do_syscall_trace_leave(regs);
- ret |= _TIF_RESTOREALL;
- }
+ *validp = 0; /* fixup */
+#endif
+}
- local_irq_disable();
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
again:
ti_flags = READ_ONCE(current_thread_info()->flags);
@@ -303,7 +321,7 @@ again:
ti_flags = READ_ONCE(current_thread_info()->flags);
}
- if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
unlikely((ti_flags & _TIF_RESTORE_TM))) {
restore_tm_state(regs);
@@ -327,10 +345,10 @@ again:
}
}
- user_enter_irqoff();
+ check_return_regs_valid(regs);
- /* scv need not set RI=0 because SRRs are not used */
- if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) {
+ user_enter_irqoff();
+ if (!prep_irq_for_enabled_exit(true)) {
user_exit_irqoff();
local_irq_enable();
local_irq_disable();
@@ -352,90 +370,131 @@ again:
return ret;
}
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
{
unsigned long ti_flags;
- unsigned long flags;
unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
- if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
- BUG_ON(!(regs->msr & MSR_RI));
- BUG_ON(!(regs->msr & MSR_PR));
- BUG_ON(arch_irq_disabled_regs(regs));
CT_WARN_ON(ct_state() == CONTEXT_USER);
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * AMR can only have been unlocked if we interrupted the kernel.
- */
kuap_assert_locked();
- local_irq_save(flags);
-
-again:
- ti_flags = READ_ONCE(current_thread_info()->flags);
- while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
- local_irq_enable(); /* returning to user: may enable */
- if (ti_flags & _TIF_NEED_RESCHED) {
- schedule();
- } else {
- if (ti_flags & _TIF_SIGPENDING)
- ret |= _TIF_RESTOREALL;
- do_notify_resume(regs, ti_flags);
- }
- local_irq_disable();
- ti_flags = READ_ONCE(current_thread_info()->flags);
- }
+ regs->result = r3;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
- if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
- unlikely((ti_flags & _TIF_RESTORE_TM))) {
- restore_tm_state(regs);
- } else {
- unsigned long mathflags = MSR_FP;
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
- if (cpu_has_feature(CPU_FTR_VSX))
- mathflags |= MSR_VEC | MSR_VSX;
- else if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mathflags |= MSR_VEC;
+ ti_flags = current_thread_info()->flags;
- /* See above restore_math comment */
- if ((regs->msr & mathflags) != mathflags)
- restore_math(regs);
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
}
}
- user_enter_irqoff();
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
- if (unlikely(!__prep_irq_for_enabled_exit(true))) {
- user_exit_irqoff();
- local_irq_enable();
- local_irq_disable();
- goto again;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
}
- booke_load_dbcr0();
+ local_irq_disable();
+ ret = interrupt_exit_user_prepare_main(ret, regs);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- local_paca->tm_scratch = regs->msr;
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
#endif
- account_cpu_user_exit();
+ return ret;
+}
- /* Restore user access locks last */
- kuap_user_restore(regs);
- kuep_unlock();
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+ return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+ unsigned long ret;
+
+ if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(arch_irq_disabled_regs(regs));
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * AMR can only have been unlocked if we interrupted the kernel.
+ */
+ kuap_assert_locked();
+
+ local_irq_disable();
+
+ ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
return ret;
}
void preempt_schedule_irq(void);
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
{
unsigned long flags;
unsigned long ret = 0;
unsigned long kuap;
+ bool stack_store = current_thread_info()->flags &
+ _TIF_EMULATE_STACK_STORE;
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
unlikely(!(regs->msr & MSR_RI)))
@@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
kuap = kuap_get_and_assert_locked();
- if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
- clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
- ret = 1;
- }
-
local_irq_save(flags);
if (!arch_irq_disabled_regs(regs)) {
@@ -469,17 +523,58 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
+ check_return_regs_valid(regs);
+
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
goto again;
- } else {
- /* Returning to a kernel context with local irqs disabled. */
- __hard_EE_RI_disable();
+ }
#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ check_return_regs_valid(regs);
+
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+ /*
+ * Returning to a kernel context with local irqs disabled.
+ * Here, if EE was enabled in the interrupted context, enable
+ * it on return as well. A problem exists here where a soft
+ * masked interrupt may have cleared MSR[EE] and set HARD_DIS
+ * here, and it will still exist on return to the caller. This
+ * will be resolved by the masked interrupt firing again.
+ */
if (regs->msr & MSR_EE)
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-#endif
+#endif /* CONFIG_PPC64 */
}
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
@@ -494,3 +589,46 @@ again:
return ret;
}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif