aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r--arch/arm64/mm/fault.c432
1 files changed, 242 insertions, 190 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 85566d32958f..3e9cf9826417 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -10,10 +10,12 @@
#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/extable.h>
+#include <linux/kfence.h>
#include <linux/signal.h>
#include <linux/mm.h>
#include <linux/hardirq.h>
#include <linux/init.h>
+#include <linux/kasan.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/page-flags.h>
@@ -28,20 +30,21 @@
#include <asm/bug.h>
#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
+#include <asm/efi.h>
#include <asm/exception.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kprobes.h>
+#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/traps.h>
struct fault_info {
- int (*fn)(unsigned long addr, unsigned int esr,
+ int (*fn)(unsigned long far, unsigned long esr,
struct pt_regs *regs);
int sig;
int code;
@@ -51,17 +54,17 @@ struct fault_info {
static const struct fault_info fault_info[];
static struct fault_info debug_fault_info[];
-static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_fault_info(unsigned long esr)
{
return fault_info + (esr & ESR_ELx_FSC);
}
-static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr)
{
return debug_fault_info + DBG_ESR_EVT(esr);
}
-static void data_abort_decode(unsigned int esr)
+static void data_abort_decode(unsigned long esr)
{
pr_alert("Data abort info:\n");
@@ -83,11 +86,11 @@ static void data_abort_decode(unsigned int esr)
(esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
}
-static void mem_abort_decode(unsigned int esr)
+static void mem_abort_decode(unsigned long esr)
{
pr_alert("Mem abort info:\n");
- pr_alert(" ESR = 0x%08x\n", esr);
+ pr_alert(" ESR = 0x%016lx\n", esr);
pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n",
ESR_ELx_EC(esr), esr_get_class_string(esr),
(esr & ESR_ELx_IL) ? 32 : 16);
@@ -97,6 +100,8 @@ static void mem_abort_decode(unsigned int esr)
pr_alert(" EA = %lu, S1PTW = %lu\n",
(esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
(esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+ pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC),
+ esr_to_fault_info(esr)->name);
if (esr_is_data_abort(esr))
data_abort_decode(esr);
@@ -145,6 +150,7 @@ static void show_pte(unsigned long addr)
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
do {
+ p4d_t *p4dp, p4d;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
@@ -152,7 +158,13 @@ static void show_pte(unsigned long addr)
if (pgd_none(pgd) || pgd_bad(pgd))
break;
- pudp = pud_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ p4d = READ_ONCE(*p4dp);
+ pr_cont(", p4d=%016llx", p4d_val(p4d));
+ if (p4d_none(p4d) || p4d_bad(p4d))
+ break;
+
+ pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp);
pr_cont(", pud=%016llx", pud_val(pud));
if (pud_none(pud) || pud_bad(pud))
@@ -212,22 +224,28 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
- flush_tlb_fix_spurious_fault(vma, address);
+ /* Invalidate a stale read-only entry */
+ if (dirty)
+ flush_tlb_page(vma, address);
return 1;
}
-static bool is_el1_instruction_abort(unsigned int esr)
+static bool is_el1_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
-static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
+static bool is_el1_data_abort(unsigned long esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
+}
+
+static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
- unsigned int ec = ESR_ELx_EC(esr);
- unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+ unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
- if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
return false;
if (fsc_type == ESR_ELx_FSC_PERM)
@@ -241,20 +259,20 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
}
static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
- unsigned int esr,
+ unsigned long esr,
struct pt_regs *regs)
{
unsigned long flags;
u64 par, dfsc;
- if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR ||
+ if (!is_el1_data_abort(esr) ||
(esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
return false;
local_irq_save(flags);
asm volatile("at s1e1r, %0" :: "r" (addr));
isb();
- par = read_sysreg(par_el1);
+ par = read_sysreg_par();
local_irq_restore(flags);
/*
@@ -273,22 +291,70 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
}
static void die_kernel_fault(const char *msg, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+ unsigned long esr, struct pt_regs *regs)
{
bust_spinlocks(1);
pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
addr);
+ kasan_non_canonical_hook(addr);
+
mem_abort_decode(esr);
show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
-static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+#ifdef CONFIG_KASAN_HW_TAGS
+static void report_tag_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+{
+ /*
+ * SAS bits aren't set for all faults reported in EL1, so we can't
+ * find out access size.
+ */
+ bool is_write = !!(esr & ESR_ELx_WNR);
+ kasan_report(addr, 0, is_write, regs->pc);
+}
+#else
+/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
+static inline void report_tag_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs) { }
+#endif
+
+static void do_tag_recovery(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+{
+
+ report_tag_fault(addr, esr, regs);
+
+ /*
+ * Disable MTE Tag Checking on the local CPU for the current EL.
+ * It will be done lazily on the other CPUs when they will hit a
+ * tag fault.
+ */
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+ SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE));
+ isb();
+}
+
+static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
+{
+ unsigned long fsc = esr & ESR_ELx_FSC;
+
+ if (!is_el1_data_abort(esr))
+ return false;
+
+ if (fsc == ESR_ELx_FSC_MTE)
+ return true;
+
+ return false;
+}
+
+static void __do_kernel_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
const char *msg;
@@ -304,6 +370,12 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
"Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
return;
+ if (is_el1_mte_sync_tag_check_fault(esr)) {
+ do_tag_recovery(addr, esr, regs);
+
+ return;
+ }
+
if (is_el1_permission_fault(addr, esr, regs)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
@@ -314,13 +386,19 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
} else if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
+ if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
+ return;
+
msg = "paging request";
}
+ if (efi_runtime_fixup_exception(regs, msg))
+ return;
+
die_kernel_fault(msg, addr, esr, regs);
}
-static void set_thread_esr(unsigned long address, unsigned int esr)
+static void set_thread_esr(unsigned long address, unsigned long esr)
{
current->thread.fault_address = address;
@@ -368,7 +446,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
* exception level). Fail safe by not providing an ESR
* context record at all.
*/
- WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+ WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr);
esr = 0;
break;
}
@@ -377,8 +455,11 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
current->thread.fault_code = esr;
}
-static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static void do_bad_area(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
{
+ unsigned long addr = untagged_addr(far);
+
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
@@ -387,8 +468,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
const struct fault_info *inf = esr_to_fault_info(esr);
set_thread_esr(addr, esr);
- arm64_force_sig_fault(inf->sig, inf->code, (void __user *)addr,
- inf->name);
+ arm64_force_sig_fault(inf->sig, inf->code, far, inf->name);
} else {
__do_kernel_fault(addr, esr, regs);
}
@@ -398,7 +478,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags)
+ unsigned int mm_flags, unsigned long vm_flags,
+ struct pt_regs *regs)
{
struct vm_area_struct *vma = find_vma(mm, addr);
@@ -422,10 +503,10 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
*/
if (!(vma->vm_flags & vm_flags))
return VM_FAULT_BADACCESS;
- return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
+ return handle_mm_fault(vma, addr, mm_flags, regs);
}
-static bool is_el0_instruction_abort(unsigned int esr)
+static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
@@ -434,19 +515,20 @@ static bool is_el0_instruction_abort(unsigned int esr)
* Note: not valid for EL1 DC IVAC, but we never use that such that it
* should fault. EL0 cannot issue DC IVAC (undef).
*/
-static bool is_write_abort(unsigned int esr)
+static bool is_write_abort(unsigned long esr)
{
return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
}
-static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
+static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
- vm_fault_t fault, major = 0;
- unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
- unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ vm_fault_t fault;
+ unsigned long vm_flags;
+ unsigned int mm_flags = FAULT_FLAG_DEFAULT;
+ unsigned long addr = untagged_addr(far);
if (kprobe_page_fault(regs, esr))
return 0;
@@ -461,20 +543,31 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
+ /*
+ * vm_flags tells us what bits we must have in vma->vm_flags
+ * for the fault to be benign, __do_page_fault() would check
+ * vma->vm_flags & vm_flags and returns an error if the
+ * intersection is empty
+ */
if (is_el0_instruction_abort(esr)) {
+ /* It was exec fault */
vm_flags = VM_EXEC;
mm_flags |= FAULT_FLAG_INSTRUCTION;
} else if (is_write_abort(esr)) {
+ /* It was write fault */
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
+ } else {
+ /* It was read fault */
+ vm_flags = VM_READ;
+ /* Write implies read */
+ vm_flags |= VM_WRITE;
+ /* If EPAN is absent then exec implies read */
+ if (!cpus_have_const_cap(ARM64_HAS_EPAN))
+ vm_flags |= VM_EXEC;
}
if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
- /* regs->orig_addr_limit may be 0 if we entered from EL0 */
- if (regs->orig_addr_limit == KERNEL_DS)
- die_kernel_fault("access to user memory with fs=KERNEL_DS",
- addr, esr, regs);
-
if (is_el1_instruction_abort(esr))
die_kernel_fault("execution of user memory",
addr, esr, regs);
@@ -491,76 +584,50 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
* validly references user space from well defined areas of the code,
* we can bug out early if this is from code which shouldn't.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
+ if (!mmap_read_trylock(mm)) {
if (!user_mode(regs) && !search_exception_tables(regs->pc))
goto no_context;
retry:
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
} else {
/*
- * The above down_read_trylock() might have succeeded in which
+ * The above mmap_read_trylock() might have succeeded in which
* case, we'll have missed the might_sleep() from down_read().
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
goto no_context;
}
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
- major |= fault & VM_FAULT_MAJOR;
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
- if (fault & VM_FAULT_RETRY) {
- /*
- * If we need to retry but a fatal signal is pending,
- * handle the signal first. We do not need to release
- * the mmap_sem because it would already be released
- * in __lock_page_or_retry in mm/filemap.c.
- */
- if (fatal_signal_pending(current)) {
- if (!user_mode(regs))
- goto no_context;
- return 0;
- }
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ goto no_context;
+ return 0;
+ }
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
- * starvation.
- */
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED)
+ return 0;
+
+ if (fault & VM_FAULT_RETRY) {
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
/*
* Handle the "normal" (no error) case first.
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS)))) {
- /*
- * Major/minor page fault accounting is only done
- * once. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at
- * that point.
- */
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
- addr);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
- addr);
- }
-
+ VM_FAULT_BADACCESS))))
return 0;
- }
/*
* If we are in kernel mode at this point, we have no context to
@@ -586,8 +653,7 @@ retry:
* We had some memory, but were unable to successfully fix up
* this page fault.
*/
- arm64_force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr,
- inf->name);
+ arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name);
} else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) {
unsigned int lsb;
@@ -595,8 +661,7 @@ retry:
if (fault & VM_FAULT_HWPOISON_LARGE)
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
- arm64_force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr, lsb,
- inf->name);
+ arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
/*
* Something tried to access memory that isn't in our memory
@@ -604,8 +669,7 @@ retry:
*/
arm64_force_sig_fault(SIGSEGV,
fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
- (void __user *)addr,
- inf->name);
+ far, inf->name);
}
return 0;
@@ -615,51 +679,77 @@ no_context:
return 0;
}
-static int __kprobes do_translation_fault(unsigned long addr,
- unsigned int esr,
+static int __kprobes do_translation_fault(unsigned long far,
+ unsigned long esr,
struct pt_regs *regs)
{
+ unsigned long addr = untagged_addr(far);
+
if (is_ttbr0_addr(addr))
- return do_page_fault(addr, esr, regs);
+ return do_page_fault(far, esr, regs);
- do_bad_area(addr, esr, regs);
+ do_bad_area(far, esr, regs);
return 0;
}
-static int do_alignment_fault(unsigned long addr, unsigned int esr,
+static int do_alignment_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
- do_bad_area(addr, esr, regs);
+ if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) &&
+ compat_user_mode(regs))
+ return do_compat_alignment_fixup(far, regs);
+ do_bad_area(far, esr, regs);
return 0;
}
-static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
return 1; /* "fault" */
}
-static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
const struct fault_info *inf;
- void __user *siaddr;
+ unsigned long siaddr;
inf = esr_to_fault_info(esr);
- /*
- * Return value ignored as we rely on signal merging.
- * Future patches will make this more robust.
- */
- apei_claim_sea(regs);
+ if (user_mode(regs) && apei_claim_sea(regs) == 0) {
+ /*
+ * APEI claimed this as a firmware-first notification.
+ * Some processing deferred to task_work before ret_to_user().
+ */
+ return 0;
+ }
- if (esr & ESR_ELx_FnV)
- siaddr = NULL;
- else
- siaddr = (void __user *)addr;
+ if (esr & ESR_ELx_FnV) {
+ siaddr = 0;
+ } else {
+ /*
+ * The architecture specifies that the tag bits of FAR_EL1 are
+ * UNKNOWN for synchronous external aborts. Mask them out now
+ * so that userspace doesn't see them.
+ */
+ siaddr = untagged_addr(far);
+ }
arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
return 0;
}
+static int do_tag_check_fault(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
+{
+ /*
+ * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN
+ * for tag check faults. Set them to corresponding bits in the untagged
+ * address.
+ */
+ far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK);
+ do_bad_area(far, esr, regs);
+ return 0;
+}
+
static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" },
{ do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" },
@@ -678,7 +768,7 @@ static const struct fault_info fault_info[] = {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" },
- { do_bad, SIGKILL, SI_KERNEL, "unknown 17" },
+ { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 18" },
{ do_bad, SIGKILL, SI_KERNEL, "unknown 19" },
{ do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" },
@@ -727,39 +817,34 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
};
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_fault_info(esr);
+ unsigned long addr = untagged_addr(far);
- if (!inf->fn(addr, esr, regs))
+ if (!inf->fn(far, esr, regs))
return;
- if (!user_mode(regs)) {
- pr_alert("Unhandled fault at 0x%016lx\n", addr);
- mem_abort_decode(esr);
- show_pte(addr);
- }
+ if (!user_mode(regs))
+ die_kernel_fault(inf->name, addr, esr, regs);
- arm64_notify_die(inf->name, regs,
- inf->sig, inf->code, (void __user *)addr, esr);
+ /*
+ * At this point we have an unrecognized fault type whose tag bits may
+ * have been defined as UNKNOWN. Therefore we only expose the untagged
+ * address to the signal handler.
+ */
+ arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr);
}
NOKPROBE_SYMBOL(do_mem_abort);
-void do_el0_irq_bp_hardening(void)
-{
- /* PC has already been checked in entry.S */
- arm64_apply_bp_hardening();
-}
-NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
- arm64_notify_die("SP/PC alignment exception", regs,
- SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
+ arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
+ addr, esr);
}
NOKPROBE_SYMBOL(do_sp_pc_abort);
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
struct pt_regs *regs);
/*
@@ -779,7 +864,7 @@ static struct fault_info __refdata debug_fault_info[] = {
};
void __init hook_debug_fault_code(int nr,
- int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+ int (*fn)(unsigned long, unsigned long, struct pt_regs *),
int sig, int code, const char *name)
{
BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
@@ -799,25 +884,6 @@ void __init hook_debug_fault_code(int nr,
*/
static void debug_exception_enter(struct pt_regs *regs)
{
- /*
- * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
- * already disabled to preserve the last enabled/disabled addresses.
- */
- if (interrupts_enabled(regs))
- trace_hardirqs_off();
-
- if (user_mode(regs)) {
- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- } else {
- /*
- * We might have interrupted pretty much anything. In
- * fact, if we're a debug exception, we can even interrupt
- * NMI processing. We don't want this code makes in_nmi()
- * to return true, but we need to notify RCU.
- */
- rcu_nmi_enter();
- }
-
preempt_disable();
/* This code is a bit fragile. Test it. */
@@ -828,63 +894,49 @@ NOKPROBE_SYMBOL(debug_exception_enter);
static void debug_exception_exit(struct pt_regs *regs)
{
preempt_enable_no_resched();
-
- if (!user_mode(regs))
- rcu_nmi_exit();
-
- if (interrupts_enabled(regs))
- trace_hardirqs_on();
}
NOKPROBE_SYMBOL(debug_exception_exit);
-#ifdef CONFIG_ARM64_ERRATUM_1463225
-DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
-static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
-{
- if (user_mode(regs))
- return 0;
-
- if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
- return 0;
-
- /*
- * We've taken a dummy step exception from the kernel to ensure
- * that interrupts are re-enabled on the syscall path. Return back
- * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
- * masked so that we can safely restore the mdscr and get on with
- * handling the syscall.
- */
- regs->pstate |= PSR_D_BIT;
- return 1;
-}
-#else
-static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
-{
- return 0;
-}
-#endif /* CONFIG_ARM64_ERRATUM_1463225 */
-NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
-
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
- if (cortex_a76_erratum_1463225_debug_handler(regs))
- return;
-
debug_exception_enter(regs);
if (user_mode(regs) && !is_ttbr0_addr(pc))
arm64_apply_bp_hardening();
if (inf->fn(addr_if_watchpoint, esr, regs)) {
- arm64_notify_die(inf->name, regs,
- inf->sig, inf->code, (void __user *)pc, esr);
+ arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr);
}
debug_exception_exit(regs);
}
NOKPROBE_SYMBOL(do_debug_exception);
+
+/*
+ * Used during anonymous page fault handling.
+ */
+struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+ unsigned long vaddr)
+{
+ gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
+
+ /*
+ * If the page is mapped with PROT_MTE, initialise the tags at the
+ * point of allocation and page zeroing as this is usually faster than
+ * separate DC ZVA and STGM.
+ */
+ if (vma->vm_flags & VM_MTE)
+ flags |= __GFP_ZEROTAGS;
+
+ return alloc_page_vma(flags, vma, vaddr);
+}
+
+void tag_clear_highpage(struct page *page)
+{
+ mte_zero_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+}