aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r--arch/arm64/mm/fault.c215
1 files changed, 156 insertions, 59 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 37b95dff0b07..c7861c9864e6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/preempt.h>
+#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
@@ -42,6 +43,8 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <acpi/ghes.h>
+
struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -80,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
- * Dump out the page tables associated with 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(unsigned long addr)
{
+ struct mm_struct *mm;
pgd_t *pgd;
- if (!mm)
+ if (addr < TASK_SIZE) {
+ /* TTBR0 */
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ pr_alert("[%016lx] user address but active_mm is swapper\n",
+ addr);
+ return;
+ }
+ } else if (addr >= VA_START) {
+ /* TTBR1 */
mm = &init_mm;
+ } else {
+ pr_alert("[%016lx] address between user and kernel address ranges\n",
+ addr);
+ return;
+ }
- pr_alert("pgd = %p\n", mm->pgd);
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
+ VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
@@ -196,8 +216,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
/*
* The kernel tried to access some page that wasn't present.
*/
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
const char *msg;
@@ -227,7 +247,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
- show_pte(mm, addr);
+ show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -239,18 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
- struct pt_regs *regs)
+ struct pt_regs *regs, int fault)
{
struct siginfo si;
const struct fault_info *inf;
+ unsigned int lsb = 0;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
inf = esr_to_fault_info(esr);
- pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
+ pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
- show_pte(tsk->mm, addr);
- show_regs(regs);
+ print_vma_addr(KERN_CONT ", in ", regs->pc);
+ pr_cont("\n");
+ __show_regs(regs);
}
tsk->thread.fault_address = addr;
@@ -259,13 +281,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
+ /*
+ * Either small page or large page may be poisoned.
+ * In other words, VM_FAULT_HWPOISON_LARGE and
+ * VM_FAULT_HWPOISON are mutually exclusive.
+ */
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ else if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ si.si_addr_lsb = lsb;
+
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
const struct fault_info *inf;
/*
@@ -274,9 +306,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
inf = esr_to_fault_info(esr);
- __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
+ __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
} else
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
@@ -329,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
struct task_struct *tsk;
struct mm_struct *mm;
- int fault, sig, code;
+ int fault, sig, code, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -368,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -391,24 +425,42 @@ retry:
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ major |= fault & VM_FAULT_MAJOR;
- /*
- * If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because it
- * would already be released in __lock_page_or_retry in mm/filemap.c.
- */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
- return 0;
+ if (fault & VM_FAULT_RETRY) {
+ /*
+ * If we need to retry but a fatal signal is pending,
+ * handle the signal first. We do not need to release
+ * the mmap_sem because it would already be released
+ * in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fatal_signal_pending(current))
+ return 0;
+
+ /*
+ * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
+ * starvation.
+ */
+ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
+ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+ up_read(&mm->mmap_sem);
/*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
+ * Handle the "normal" (no error) case first.
*/
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
+ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
+ VM_FAULT_BADACCESS)))) {
+ /*
+ * Major/minor page fault accounting is only done
+ * once. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at
+ * that point.
+ */
+ if (major) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
@@ -417,25 +469,9 @@ retry:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
- if (fault & VM_FAULT_RETRY) {
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
- * starvation.
- */
- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
- }
-
- up_read(&mm->mmap_sem);
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS))))
return 0;
+ }
/*
* If we are in kernel mode at this point, we have no context to
@@ -461,6 +497,9 @@ retry:
*/
sig = SIGBUS;
code = BUS_ADRERR;
+ } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
+ sig = SIGBUS;
+ code = BUS_MCEERR_AR;
} else {
/*
* Something tried to access memory that isn't in our memory
@@ -471,11 +510,11 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, esr, sig, code, regs);
+ __do_user_fault(tsk, addr, esr, sig, code, regs, fault);
return 0;
no_context:
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
return 0;
}
@@ -522,6 +561,47 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
+/*
+ * This abort handler deals with Synchronous External Abort.
+ * It calls notifiers, and then returns "fault".
+ */
+static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+ struct siginfo info;
+ const struct fault_info *inf;
+ int ret = 0;
+
+ inf = esr_to_fault_info(esr);
+ pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+ inf->name, esr, addr);
+
+ /*
+ * Synchronous aborts may interrupt code which had interrupts masked.
+ * Before calling out into the wider kernel tell the interested
+ * subsystems.
+ */
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ if (interrupts_enabled(regs))
+ nmi_enter();
+
+ ret = ghes_notify_sea();
+
+ if (interrupts_enabled(regs))
+ nmi_exit();
+ }
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = 0;
+ if (esr & ESR_ELx_FnV)
+ info.si_addr = NULL;
+ else
+ info.si_addr = (void __user *)addr;
+ arm64_notify_die("", regs, &info, esr);
+
+ return ret;
+}
+
static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
@@ -539,22 +619,22 @@ static const struct fault_info fault_info[] = {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
- { do_bad, SIGBUS, 0, "synchronous external abort" },
+ { do_sea, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error" },
+ { do_sea, SIGBUS, 0, "level 0 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
@@ -590,6 +670,23 @@ static const struct fault_info fault_info[] = {
};
/*
+ * Handle Synchronous External Aborts that occur in a guest kernel.
+ *
+ * The return value will be zero if the SEA was successfully handled
+ * and non-zero if there was an error processing the error or there was
+ * no error to process.
+ */
+int handle_guest_sea(phys_addr_t addr, unsigned int esr)
+{
+ int ret = -ENOENT;
+
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
+ ret = ghes_notify_sea();
+
+ return ret;
+}
+
+/*
* Dispatch a data abort to the relevant handler.
*/
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,