diff options
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r-- | arch/arm64/mm/fault.c | 432 |
1 files changed, 242 insertions, 190 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 85566d32958f..3e9cf9826417 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -10,10 +10,12 @@ #include <linux/acpi.h> #include <linux/bitfield.h> #include <linux/extable.h> +#include <linux/kfence.h> #include <linux/signal.h> #include <linux/mm.h> #include <linux/hardirq.h> #include <linux/init.h> +#include <linux/kasan.h> #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/page-flags.h> @@ -28,20 +30,21 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> +#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kprobes.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/system_misc.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/traps.h> struct fault_info { - int (*fn)(unsigned long addr, unsigned int esr, + int (*fn)(unsigned long far, unsigned long esr, struct pt_regs *regs); int sig; int code; @@ -51,17 +54,17 @@ struct fault_info { static const struct fault_info fault_info[]; static struct fault_info debug_fault_info[]; -static inline const struct fault_info *esr_to_fault_info(unsigned int esr) +static inline const struct fault_info *esr_to_fault_info(unsigned long esr) { return fault_info + (esr & ESR_ELx_FSC); } -static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr) { return debug_fault_info + DBG_ESR_EVT(esr); } -static void data_abort_decode(unsigned int esr) +static void data_abort_decode(unsigned long esr) { pr_alert("Data abort info:\n"); @@ -83,11 +86,11 @@ static void data_abort_decode(unsigned int esr) (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); } -static void mem_abort_decode(unsigned int esr) +static void mem_abort_decode(unsigned long esr) { pr_alert("Mem abort info:\n"); - pr_alert(" ESR = 0x%08x\n", esr); + pr_alert(" ESR = 0x%016lx\n", esr); pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); @@ -97,6 +100,8 @@ static void mem_abort_decode(unsigned int esr) pr_alert(" EA = %lu, S1PTW = %lu\n", (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); + pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC), + esr_to_fault_info(esr)->name); if (esr_is_data_abort(esr)) data_abort_decode(esr); @@ -145,6 +150,7 @@ static void show_pte(unsigned long addr) pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); do { + p4d_t *p4dp, p4d; pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep, pte; @@ -152,7 +158,13 @@ static void show_pte(unsigned long addr) if (pgd_none(pgd) || pgd_bad(pgd)) break; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + pr_cont(", p4d=%016llx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d)) + break; + + pudp = pud_offset(p4dp, addr); pud = READ_ONCE(*pudp); pr_cont(", pud=%016llx", pud_val(pud)); if (pud_none(pud) || pud_bad(pud)) @@ -212,22 +224,28 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - flush_tlb_fix_spurious_fault(vma, address); + /* Invalidate a stale read-only entry */ + if (dirty) + flush_tlb_page(vma, address); return 1; } -static bool is_el1_instruction_abort(unsigned int esr) +static bool is_el1_instruction_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, +static bool is_el1_data_abort(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR; +} + +static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { - unsigned int ec = ESR_ELx_EC(esr); - unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE; - if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr)) return false; if (fsc_type == ESR_ELx_FSC_PERM) @@ -241,20 +259,20 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, } static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, - unsigned int esr, + unsigned long esr, struct pt_regs *regs) { unsigned long flags; u64 par, dfsc; - if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || + if (!is_el1_data_abort(esr) || (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) return false; local_irq_save(flags); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); - par = read_sysreg(par_el1); + par = read_sysreg_par(); local_irq_restore(flags); /* @@ -273,22 +291,70 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, } static void die_kernel_fault(const char *msg, unsigned long addr, - unsigned int esr, struct pt_regs *regs) + unsigned long esr, struct pt_regs *regs) { bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, addr); + kasan_non_canonical_hook(addr); + mem_abort_decode(esr); show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } -static void __do_kernel_fault(unsigned long addr, unsigned int esr, +#ifdef CONFIG_KASAN_HW_TAGS +static void report_tag_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) +{ + /* + * SAS bits aren't set for all faults reported in EL1, so we can't + * find out access size. + */ + bool is_write = !!(esr & ESR_ELx_WNR); + kasan_report(addr, 0, is_write, regs->pc); +} +#else +/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ +static inline void report_tag_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) { } +#endif + +static void do_tag_recovery(unsigned long addr, unsigned long esr, + struct pt_regs *regs) +{ + + report_tag_fault(addr, esr, regs); + + /* + * Disable MTE Tag Checking on the local CPU for the current EL. + * It will be done lazily on the other CPUs when they will hit a + * tag fault. + */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); + isb(); +} + +static bool is_el1_mte_sync_tag_check_fault(unsigned long esr) +{ + unsigned long fsc = esr & ESR_ELx_FSC; + + if (!is_el1_data_abort(esr)) + return false; + + if (fsc == ESR_ELx_FSC_MTE) + return true; + + return false; +} + +static void __do_kernel_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { const char *msg; @@ -304,6 +370,12 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) return; + if (is_el1_mte_sync_tag_check_fault(esr)) { + do_tag_recovery(addr, esr, regs); + + return; + } + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; @@ -314,13 +386,19 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { + if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) + return; + msg = "paging request"; } + if (efi_runtime_fixup_exception(regs, msg)) + return; + die_kernel_fault(msg, addr, esr, regs); } -static void set_thread_esr(unsigned long address, unsigned int esr) +static void set_thread_esr(unsigned long address, unsigned long esr) { current->thread.fault_address = address; @@ -368,7 +446,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr) * exception level). Fail safe by not providing an ESR * context record at all. */ - WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr); esr = 0; break; } @@ -377,8 +455,11 @@ static void set_thread_esr(unsigned long address, unsigned int esr) current->thread.fault_code = esr; } -static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long far, unsigned long esr, + struct pt_regs *regs) { + unsigned long addr = untagged_addr(far); + /* * If we are in kernel mode at this point, we have no context to * handle this fault with. @@ -387,8 +468,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re const struct fault_info *inf = esr_to_fault_info(esr); set_thread_esr(addr, esr); - arm64_force_sig_fault(inf->sig, inf->code, (void __user *)addr, - inf->name); + arm64_force_sig_fault(inf->sig, inf->code, far, inf->name); } else { __do_kernel_fault(addr, esr, regs); } @@ -398,7 +478,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags) + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); @@ -422,10 +503,10 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr, mm_flags, regs); } -static bool is_el0_instruction_abort(unsigned int esr) +static bool is_el0_instruction_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } @@ -434,19 +515,20 @@ static bool is_el0_instruction_abort(unsigned int esr) * Note: not valid for EL1 DC IVAC, but we never use that such that it * should fault. EL0 cannot issue DC IVAC (undef). */ -static bool is_write_abort(unsigned int esr) +static bool is_write_abort(unsigned long esr) { return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); } -static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, +static int __kprobes do_page_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf; struct mm_struct *mm = current->mm; - vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; - unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + vm_fault_t fault; + unsigned long vm_flags; + unsigned int mm_flags = FAULT_FLAG_DEFAULT; + unsigned long addr = untagged_addr(far); if (kprobe_page_fault(regs, esr)) return 0; @@ -461,20 +543,31 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; + /* + * vm_flags tells us what bits we must have in vma->vm_flags + * for the fault to be benign, __do_page_fault() would check + * vma->vm_flags & vm_flags and returns an error if the + * intersection is empty + */ if (is_el0_instruction_abort(esr)) { + /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { + /* It was write fault */ vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; + } else { + /* It was read fault */ + vm_flags = VM_READ; + /* Write implies read */ + vm_flags |= VM_WRITE; + /* If EPAN is absent then exec implies read */ + if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags |= VM_EXEC; } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { - /* regs->orig_addr_limit may be 0 if we entered from EL0 */ - if (regs->orig_addr_limit == KERNEL_DS) - die_kernel_fault("access to user memory with fs=KERNEL_DS", - addr, esr, regs); - if (is_el1_instruction_abort(esr)) die_kernel_fault("execution of user memory", addr, esr, regs); @@ -491,76 +584,50 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * validly references user space from well defined areas of the code, * we can bug out early if this is from code which shouldn't. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: - down_read(&mm->mmap_sem); + mmap_read_lock(mm); } else { /* - * The above down_read_trylock() might have succeeded in which + * The above mmap_read_trylock() might have succeeded in which * case, we'll have missed the might_sleep() from down_read(). */ might_sleep(); #ifdef CONFIG_DEBUG_VM if (!user_mode(regs) && !search_exception_tables(regs->pc)) { - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); goto no_context; } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags); - major |= fault & VM_FAULT_MAJOR; + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs); - if (fault & VM_FAULT_RETRY) { - /* - * If we need to retry but a fatal signal is pending, - * handle the signal first. We do not need to release - * the mmap_sem because it would already be released - * in __lock_page_or_retry in mm/filemap.c. - */ - if (fatal_signal_pending(current)) { - if (!user_mode(regs)) - goto no_context; - return 0; - } + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ - if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; - mm_flags |= FAULT_FLAG_TRIED; - goto retry; - } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + + if (fault & VM_FAULT_RETRY) { + mm_flags |= FAULT_FLAG_TRIED; + goto retry; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); /* * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to @@ -586,8 +653,7 @@ retry: * We had some memory, but were unable to successfully fix up * this page fault. */ - arm64_force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, - inf->name); + arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name); } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) { unsigned int lsb; @@ -595,8 +661,7 @@ retry: if (fault & VM_FAULT_HWPOISON_LARGE) lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - arm64_force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr, lsb, - inf->name); + arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); } else { /* * Something tried to access memory that isn't in our memory @@ -604,8 +669,7 @@ retry: */ arm64_force_sig_fault(SIGSEGV, fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, - (void __user *)addr, - inf->name); + far, inf->name); } return 0; @@ -615,51 +679,77 @@ no_context: return 0; } -static int __kprobes do_translation_fault(unsigned long addr, - unsigned int esr, +static int __kprobes do_translation_fault(unsigned long far, + unsigned long esr, struct pt_regs *regs) { + unsigned long addr = untagged_addr(far); + if (is_ttbr0_addr(addr)) - return do_page_fault(addr, esr, regs); + return do_page_fault(far, esr, regs); - do_bad_area(addr, esr, regs); + do_bad_area(far, esr, regs); return 0; } -static int do_alignment_fault(unsigned long addr, unsigned int esr, +static int do_alignment_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { - do_bad_area(addr, esr, regs); + if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) && + compat_user_mode(regs)) + return do_compat_alignment_fixup(far, regs); + do_bad_area(far, esr, regs); return 0; } -static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs) { return 1; /* "fault" */ } -static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf; - void __user *siaddr; + unsigned long siaddr; inf = esr_to_fault_info(esr); - /* - * Return value ignored as we rely on signal merging. - * Future patches will make this more robust. - */ - apei_claim_sea(regs); + if (user_mode(regs) && apei_claim_sea(regs) == 0) { + /* + * APEI claimed this as a firmware-first notification. + * Some processing deferred to task_work before ret_to_user(). + */ + return 0; + } - if (esr & ESR_ELx_FnV) - siaddr = NULL; - else - siaddr = (void __user *)addr; + if (esr & ESR_ELx_FnV) { + siaddr = 0; + } else { + /* + * The architecture specifies that the tag bits of FAR_EL1 are + * UNKNOWN for synchronous external aborts. Mask them out now + * so that userspace doesn't see them. + */ + siaddr = untagged_addr(far); + } arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); return 0; } +static int do_tag_check_fault(unsigned long far, unsigned long esr, + struct pt_regs *regs) +{ + /* + * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN + * for tag check faults. Set them to corresponding bits in the untagged + * address. + */ + far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); + do_bad_area(far, esr, regs); + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, @@ -678,7 +768,7 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, + { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, @@ -727,39 +817,34 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) +void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); + unsigned long addr = untagged_addr(far); - if (!inf->fn(addr, esr, regs)) + if (!inf->fn(far, esr, regs)) return; - if (!user_mode(regs)) { - pr_alert("Unhandled fault at 0x%016lx\n", addr); - mem_abort_decode(esr); - show_pte(addr); - } + if (!user_mode(regs)) + die_kernel_fault(inf->name, addr, esr, regs); - arm64_notify_die(inf->name, regs, - inf->sig, inf->code, (void __user *)addr, esr); + /* + * At this point we have an unrecognized fault type whose tag bits may + * have been defined as UNKNOWN. Therefore we only expose the untagged + * address to the signal handler. + */ + arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr); } NOKPROBE_SYMBOL(do_mem_abort); -void do_el0_irq_bp_hardening(void) -{ - /* PC has already been checked in entry.S */ - arm64_apply_bp_hardening(); -} -NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); - -void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) { - arm64_notify_die("SP/PC alignment exception", regs, - SIGBUS, BUS_ADRALN, (void __user *)addr, esr); + arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, + addr, esr); } NOKPROBE_SYMBOL(do_sp_pc_abort); -int __init early_brk64(unsigned long addr, unsigned int esr, +int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); /* @@ -779,7 +864,7 @@ static struct fault_info __refdata debug_fault_info[] = { }; void __init hook_debug_fault_code(int nr, - int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int (*fn)(unsigned long, unsigned long, struct pt_regs *), int sig, int code, const char *name) { BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); @@ -799,25 +884,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -828,63 +894,49 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (!user_mode(regs)) - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); -#ifdef CONFIG_ARM64_ERRATUM_1463225 -DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - -static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) -{ - if (user_mode(regs)) - return 0; - - if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) - return 0; - - /* - * We've taken a dummy step exception from the kernel to ensure - * that interrupts are re-enabled on the syscall path. Return back - * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions - * masked so that we can safely restore the mdscr and get on with - * handling the syscall. - */ - regs->pstate |= PSR_D_BIT; - return 1; -} -#else -static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) -{ - return 0; -} -#endif /* CONFIG_ARM64_ERRATUM_1463225 */ -NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); - -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); - if (cortex_a76_erratum_1463225_debug_handler(regs)) - return; - debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); if (inf->fn(addr_if_watchpoint, esr, regs)) { - arm64_notify_die(inf->name, regs, - inf->sig, inf->code, (void __user *)pc, esr); + arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr); } debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception); + +/* + * Used during anonymous page fault handling. + */ +struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, + unsigned long vaddr) +{ + gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO; + + /* + * If the page is mapped with PROT_MTE, initialise the tags at the + * point of allocation and page zeroing as this is usually faster than + * separate DC ZVA and STGM. + */ + if (vma->vm_flags & VM_MTE) + flags |= __GFP_ZEROTAGS; + + return alloc_page_vma(flags, vma, vaddr); +} + +void tag_clear_highpage(struct page *page) +{ + mte_zero_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); +} |