From 7af5b901e84743c608aae90cb0e429702812c324 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 25 Mar 2024 08:31:13 +0100 Subject: ARM: 9358/2: Implement PAN for LPAE by TTBR0 page table walks disablement With LPAE enabled, privileged no-access cannot be enforced using CPU domains as such feature is not available. This patch implements PAN by disabling TTBR0 page table walks while in kernel mode. The ARM architecture allows page table walks to be split between TTBR0 and TTBR1. With LPAE enabled, the split is defined by a combination of TTBCR T0SZ and T1SZ bits. Currently, an LPAE-enabled kernel uses TTBR0 for user addresses and TTBR1 for kernel addresses with the VMSPLIT_2G and VMSPLIT_3G configurations. The main advantage for the 3:1 split is that TTBR1 is reduced to 2 levels, so potentially faster TLB refill (though usually the first level entries are already cached in the TLB). The PAN support on LPAE-enabled kernels uses TTBR0 when running in user space or in kernel space during user access routines (TTBCR T0SZ and T1SZ are both 0). When running user accesses are disabled in kernel mode, TTBR0 page table walks are disabled by setting TTBCR.EPD0. TTBR1 is used for kernel accesses (including loadable modules; anything covered by swapper_pg_dir) by reducing the TTBCR.T0SZ to the minimum (2^(32-7) = 32MB). To avoid user accesses potentially hitting stale TLB entries, the ASID is switched to 0 (reserved) by setting TTBCR.A1 and using the ASID value in TTBR1. The difference from a non-PAN kernel is that with the 3:1 memory split, TTBR1 always uses 3 levels of page tables. As part of the change we are using preprocessor elif definied() clauses so balance these clauses by converting relevant precedingt ifdef clauses to if defined() clauses. Signed-off-by: Catalin Marinas Reviewed-by: Kees Cook Tested-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/mm/fault.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/arm/mm/fault.c') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 439dc6a26bb9..dfa9554ef331 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -242,6 +242,27 @@ static inline bool is_permission_fault(unsigned int fsr) return false; } +#ifdef CONFIG_CPU_TTBR0_PAN +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) +{ + struct svc_pt_regs *svcregs; + + /* If we are in user mode: permission granted */ + if (user_mode(regs)) + return true; + + /* uaccess state saved above pt_regs on SVC exception entry */ + svcregs = to_svc_pt_regs(regs); + + return !(svcregs->ttbcr & TTBCR_EPD0); +} +#else +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) +{ + return true; +} +#endif + static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -285,6 +306,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* + * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are + * routed via the translation fault mechanism. Check whether uaccess + * is disabled while in kernel mode. + */ + if (!ttbr0_usermode_access_allowed(regs)) + goto no_context; + if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; -- cgit v1.2.3-59-g8ed1b From 15e4a5f5d8c9438530574d10241b8ece9f5edb61 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 3 Apr 2024 16:38:01 +0800 Subject: arm: mm: accelerate pagefault when VM_FAULT_BADACCESS The vm_flags of vma already checked under per-VMA lock, if it is a bad access, directly set fault to VM_FAULT_BADACCESS and handle error, no need to retry with mmap_lock again. Since the page faut is handled under per-VMA lock, count it as a vma lock event with VMA_LOCK_SUCCESS. Link: https://lkml.kernel.org/r/20240403083805.1818160-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Suren Baghdasaryan Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Gerald Schaefer Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm/fault.c') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 439dc6a26bb9..5c4b417e24f9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -294,7 +294,9 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); - goto lock_mmap; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + fault = VM_FAULT_BADACCESS; + goto bad_area; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) -- cgit v1.2.3-59-g8ed1b From e9016174621112624f957c8138bd3c34692e2e93 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 11 Apr 2024 21:09:25 +0800 Subject: arm: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESS If bad map or access, directly set code to SEGV_MAPRR or SEGV_ACCERR, also set fault to 0 and goto error handling, which make us to drop the arch's special vm fault reason. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20240411130925.73281-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Aishwarya TCV Cc: Catalin Marinas Cc: Cristian Marussi Cc: Mark Brown Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/fault.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/arm/mm/fault.c') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5c4b417e24f9..bf07afdb2dd9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -226,9 +226,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } #ifdef CONFIG_MMU -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) - static inline bool is_permission_fault(unsigned int fsr) { int fs = fsr_fs(fsr); @@ -295,7 +292,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - fault = VM_FAULT_BADACCESS; + fault = 0; + code = SEGV_ACCERR; goto bad_area; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); @@ -321,7 +319,8 @@ lock_mmap: retry: vma = lock_mm_and_find_vma(mm, addr, regs); if (unlikely(!vma)) { - fault = VM_FAULT_BADMAP; + fault = 0; + code = SEGV_MAPERR; goto bad_area; } @@ -329,10 +328,14 @@ retry: * ok, we have a good vm_area for this memory access, check the * permissions on the VMA allow for the fault which occurred. */ - if (!(vma->vm_flags & vm_flags)) - fault = VM_FAULT_BADACCESS; - else - fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); + if (!(vma->vm_flags & vm_flags)) { + mmap_read_unlock(mm); + fault = 0; + code = SEGV_ACCERR; + goto bad_area; + } + + fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -358,12 +361,11 @@ retry: mmap_read_unlock(mm); done: - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + /* Handle the "normal" case first */ + if (likely(!(fault & VM_FAULT_ERROR))) return 0; + code = SEGV_MAPERR; bad_area: /* * If we are in kernel mode at this point, we @@ -395,8 +397,6 @@ bad_area: * isn't in our memory map.. */ sig = SIGSEGV; - code = fault == VM_FAULT_BADACCESS ? - SEGV_ACCERR : SEGV_MAPERR; } __do_user_fault(addr, fsr, sig, code, regs); -- cgit v1.2.3-59-g8ed1b