aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r--arch/arm64/mm/fault.c186
1 files changed, 118 insertions, 68 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2d115016feb4..115d7a0e4b08 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -8,6 +8,7 @@
*/
#include <linux/acpi.h>
+#include <linux/bitfield.h>
#include <linux/extable.h>
#include <linux/signal.h>
#include <linux/mm.h>
@@ -59,28 +60,6 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
return debug_fault_info + DBG_ESR_EVT(esr);
}
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, esr))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
- return 0;
-}
-#endif
-
static void data_abort_decode(unsigned int esr)
{
pr_alert("Data abort info:\n");
@@ -108,8 +87,8 @@ static void mem_abort_decode(unsigned int esr)
pr_alert("Mem abort info:\n");
pr_alert(" ESR = 0x%08x\n", esr);
- pr_alert(" Exception class = %s, IL = %u bits\n",
- esr_get_class_string(esr),
+ pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n",
+ ESR_ELx_EC(esr), esr_get_class_string(esr),
(esr & ESR_ELx_IL) ? 32 : 16);
pr_alert(" SET = %lu, FnV = %lu\n",
(esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
@@ -131,7 +110,7 @@ static inline bool is_ttbr0_addr(unsigned long addr)
static inline bool is_ttbr1_addr(unsigned long addr)
{
/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
- return arch_kasan_reset_tag(addr) >= VA_START;
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
}
/*
@@ -160,10 +139,9 @@ static void show_pte(unsigned long addr)
return;
}
- pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n",
+ pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
- mm == &init_mm ? VA_BITS : (int)vabits_user,
- (unsigned long)virt_to_phys(mm->pgd));
+ vabits_actual, (unsigned long)virt_to_phys(mm->pgd));
pgdp = pgd_offset(mm, addr);
pgd = READ_ONCE(*pgdp);
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
@@ -264,6 +242,34 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
return false;
}
+static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
+ unsigned int esr,
+ struct pt_regs *regs)
+{
+ unsigned long flags;
+ u64 par, dfsc;
+
+ if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR ||
+ (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
+ return false;
+
+ local_irq_save(flags);
+ asm volatile("at s1e1r, %0" :: "r" (addr));
+ isb();
+ par = read_sysreg(par_el1);
+ local_irq_restore(flags);
+
+ if (!(par & SYS_PAR_EL1_F))
+ return false;
+
+ /*
+ * If we got a different type of fault from the AT instruction,
+ * treat the translation fault as spurious.
+ */
+ dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par);
+ return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
+}
+
static void die_kernel_fault(const char *msg, unsigned long addr,
unsigned int esr, struct pt_regs *regs)
{
@@ -292,6 +298,10 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
+ if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
+ "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
+ return;
+
if (is_el1_permission_fault(addr, esr, regs)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
@@ -384,40 +394,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags,
- struct task_struct *tsk)
+ unsigned int mm_flags, unsigned long vm_flags)
{
- struct vm_area_struct *vma;
- vm_fault_t fault;
+ struct vm_area_struct *vma = find_vma(mm, addr);
- vma = find_vma(mm, addr);
- fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > addr))
- goto check_stack;
+ return VM_FAULT_BADMAP;
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
*/
-good_area:
+ if (unlikely(vma->vm_start > addr)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return VM_FAULT_BADMAP;
+ if (expand_stack(vma, addr))
+ return VM_FAULT_BADMAP;
+ }
+
/*
* Check that the permissions on the VMA allow for the fault which
* occurred.
*/
- if (!(vma->vm_flags & vm_flags)) {
- fault = VM_FAULT_BADACCESS;
- goto out;
- }
-
+ if (!(vma->vm_flags & vm_flags))
+ return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
-
-check_stack:
- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
- goto good_area;
-out:
- return fault;
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -425,22 +426,27 @@ static bool is_el0_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
+/*
+ * Note: not valid for EL1 DC IVAC, but we never use that such that it
+ * should fault. EL0 cannot issue DC IVAC (undef).
+ */
+static bool is_write_abort(unsigned int esr)
+{
+ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
- struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
vm_fault_t fault, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- if (notify_page_fault(regs, esr))
+ if (kprobe_page_fault(regs, esr))
return 0;
- tsk = current;
- mm = tsk->mm;
-
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
@@ -453,7 +459,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
- } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+ mm_flags |= FAULT_FLAG_INSTRUCTION;
+ } else if (is_write_abort(esr)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
@@ -492,12 +499,14 @@ retry:
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) && !search_exception_tables(regs->pc))
+ if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
+ up_read(&mm->mmap_sem);
goto no_context;
+ }
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
major |= fault & VM_FAULT_MAJOR;
if (fault & VM_FAULT_RETRY) {
@@ -537,11 +546,11 @@ retry:
* that point.
*/
if (major) {
- tsk->maj_flt++;
+ current->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
} else {
- tsk->min_flt++;
+ current->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
@@ -800,6 +809,53 @@ void __init hook_debug_fault_code(int nr,
debug_fault_info[nr].name = name;
}
+/*
+ * In debug exception context, we explicitly disable preemption despite
+ * having interrupts disabled.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+ /*
+ * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
+ * already disabled to preserve the last enabled/disabled addresses.
+ */
+ if (interrupts_enabled(regs))
+ trace_hardirqs_off();
+
+ if (user_mode(regs)) {
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+ } else {
+ /*
+ * We might have interrupted pretty much anything. In
+ * fact, if we're a debug exception, we can even interrupt
+ * NMI processing. We don't want this code makes in_nmi()
+ * to return true, but we need to notify RCU.
+ */
+ rcu_nmi_enter();
+ }
+
+ preempt_disable();
+
+ /* This code is a bit fragile. Test it. */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
+}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+ preempt_enable_no_resched();
+
+ if (!user_mode(regs))
+ rcu_nmi_exit();
+
+ if (interrupts_enabled(regs))
+ trace_hardirqs_on();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
+
#ifdef CONFIG_ARM64_ERRATUM_1463225
DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
@@ -840,12 +896,7 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
if (cortex_a76_erratum_1463225_debug_handler(regs))
return;
- /*
- * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
- * already disabled to preserve the last enabled/disabled addresses.
- */
- if (interrupts_enabled(regs))
- trace_hardirqs_off();
+ debug_exception_enter(regs);
if (user_mode(regs) && !is_ttbr0_addr(pc))
arm64_apply_bp_hardening();
@@ -855,7 +906,6 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
inf->sig, inf->code, (void __user *)pc, esr);
}
- if (interrupts_enabled(regs))
- trace_hardirqs_on();
+ debug_exception_exit(regs);
}
NOKPROBE_SYMBOL(do_debug_exception);