diff options
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 75 |
1 files changed, 22 insertions, 53 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 622d12ec7f08..998bd807fc7b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,19 +7,18 @@ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_tables */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ -#include <linux/prefetch.h> /* prefetchw */ #include <linux/context_tracking.h> /* exception_enter(), ... */ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> #include <linux/mm.h> /* find_and_lock_vma() */ +#include <linux/vmalloc.h> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -38,7 +37,7 @@ #include <asm/sev.h> /* snp_dump_hva_rmpentry() */ #define CREATE_TRACE_POINTS -#include <asm/trace/exceptions.h> +#include <trace/events/exceptions.h> /* * Returns 0 if mmiotrace is disabled, or if the fault is not @@ -514,18 +513,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad if (error_code & X86_PF_INSTR) { unsigned int level; + bool nx, rw; pgd_t *pgd; pte_t *pte; pgd = __va(read_cr3_pa()); pgd += pgd_index(address); - pte = lookup_address_in_pgd(pgd, address, &level); + pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw); - if (pte && pte_present(*pte) && !pte_exec(*pte)) + if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx)) pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", from_kuid(&init_user_ns, current_uid())); - if (pte && pte_present(*pte) && pte_exec(*pte) && + if (pte && pte_present(*pte) && pte_exec(*pte) && !nx && (pgd_flags(*pgd) & _PAGE_USER) && (__read_cr4() & X86_CR4_SMEP)) pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", @@ -676,7 +676,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, ASM_CALL_ARG3, , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info)); - unreachable(); + BUG(); } #endif @@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - } /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH @@ -865,14 +834,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 pkey, int si_code) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma, u32 pkey, int si_code) { - struct mm_struct *mm = current->mm; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma); __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } @@ -896,7 +868,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma) { /* * This OSPKE check is not strictly necessary at runtime. @@ -926,9 +899,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, */ u32 pkey = vma_pkey(vma); - __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR); } else { - __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR); } } @@ -1356,8 +1329,9 @@ void do_user_addr_fault(struct pt_regs *regs, goto lock_mmap; if (unlikely(access_error(error_code, vma))) { - vma_end_read(vma); - goto lock_mmap; + bad_area_access_error(regs, error_code, address, NULL, vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return; } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) @@ -1393,7 +1367,7 @@ retry: * we can handle it.. */ if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address, vma); + bad_area_access_error(regs, error_code, address, mm, vma); return; } @@ -1480,9 +1454,6 @@ static __always_inline void trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - if (!trace_pagefault_enabled()) - return; - if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); else @@ -1521,8 +1492,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2(); - prefetchw(¤t->mm->mmap_lock); - /* * KVM uses #PF vector to deliver 'page not present' events to guests * (asynchronous page fault mechanism). The event happens when a |