diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 8 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/kmsan_shadow.c | 20 | ||||
-rw-r--r-- | arch/x86/mm/pat/cpa-test.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 53 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 5 |
10 files changed, 136 insertions, 17 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index f8220fd2c169..c80febc44cd2 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -4,19 +4,24 @@ KCOV_INSTRUMENT_tlb.o := n KCOV_INSTRUMENT_mem_encrypt.o := n KCOV_INSTRUMENT_mem_encrypt_amd.o := n KCOV_INSTRUMENT_mem_encrypt_identity.o := n +KCOV_INSTRUMENT_pgprot.o := n KASAN_SANITIZE_mem_encrypt.o := n KASAN_SANITIZE_mem_encrypt_amd.o := n KASAN_SANITIZE_mem_encrypt_identity.o := n +KASAN_SANITIZE_pgprot.o := n # Disable KCSAN entirely, because otherwise we get warnings that some functions # reference __initdata sections. KCSAN_SANITIZE := n +# Avoid recursion by not calling KMSAN hooks for CEA code. +KMSAN_SANITIZE_cpu_entry_area.o := n ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_amd.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg +CFLAGS_REMOVE_pgprot.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ @@ -41,6 +46,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o +KMSAN_SANITIZE_kmsan_shadow.o := n +obj-$(CONFIG_KMSAN) += kmsan_shadow.o + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fa71a5d12e87..7b0d4ab894c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address) } NOKPROBE_SYMBOL(vmalloc_fault); -void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -284,6 +284,27 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end) } } +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + __arch_sync_kernel_mappings(start, end); +#ifdef CONFIG_KMSAN + /* + * KMSAN maintains two additional metadata page mappings for the + * [VMALLOC_START, VMALLOC_END) range. These mappings start at + * KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and + * have to be synced together with the vmalloc memory mapping. + */ + if (start >= VMALLOC_START && end < VMALLOC_END) { + __arch_sync_kernel_mappings( + start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START, + end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START); + __arch_sync_kernel_mappings( + start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START, + end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START); + } +#endif +} + static bool low_pfn(unsigned long pfn) { return pfn < max_low_pfn; @@ -769,6 +790,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct task_struct *tsk) { const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; + /* This is a racy snapshot, but it's better than nothing. */ + int cpu = raw_smp_processor_id(); if (!unhandled_signal(tsk, SIGSEGV)) return; @@ -782,6 +805,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, print_vma_addr(KERN_CONT " in ", regs->ip); + /* + * Dump the likely CPU where the fatal segfault happened. + * This can help identify faulty hardware. + */ + printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu, + topology_core_id(cpu), topology_physical_package_id(cpu)); + + printk(KERN_CONT "\n"); show_opcodes(regs, loglvl); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 6b3033845c6d..5804bbae4f01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd) */ int pud_huge(pud_t pud) { +#if CONFIG_PGTABLE_LEVELS > 2 return !pud_none(pud) && (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; +#else + return 0; +#endif } #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 82a042c03824..9121bc1b9453 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1054,7 +1054,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) } #ifdef CONFIG_SWAP -unsigned long max_swapfile_size(void) +unsigned long arch_max_swapfile_size(void) { unsigned long pages; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0fe690ebc269..3f040c6e5d13 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init) DEFINE_ENTRY(pmd, pmd, init) DEFINE_ENTRY(pte, pte, init) +static inline pgprot_t prot_sethuge(pgprot_t prot) +{ + WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); + + return __pgprot(pgprot_val(prot) | _PAGE_PSE); +} /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the @@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_2M)) { pages++; spin_lock(&init_mm.page_table_lock); - set_pte_init((pte_t *)pmd, - pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE)), + set_pmd_init(pmd, + pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; spin_lock(&init_mm.page_table_lock); - - prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); - - set_pte_init((pte_t *)pud, - pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, - prot), + set_pud_init(pud, + pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -1287,7 +1288,7 @@ static void __init preallocate_vmalloc_pages(void) unsigned long addr; const char *lvl; - for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { pgd_t *pgd = pgd_offset_k(addr); p4d_t *p4d; pud_t *pud; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 1ad0228f8ceb..78c5bc654cff 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -17,6 +17,7 @@ #include <linux/cc_platform.h> #include <linux/efi.h> #include <linux/pgtable.h> +#include <linux/kmsan.h> #include <asm/set_memory.h> #include <asm/e820/api.h> @@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr) return; } + kmsan_iounmap_page_range((unsigned long)addr, + (unsigned long)addr + get_vm_area_size(p)); memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ diff --git a/arch/x86/mm/kmsan_shadow.c b/arch/x86/mm/kmsan_shadow.c new file mode 100644 index 000000000000..bee2ec4a3bfa --- /dev/null +++ b/arch/x86/mm/kmsan_shadow.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86-specific bits of KMSAN shadow implementation. + * + * Copyright (C) 2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + */ + +#include <asm/cpu_entry_area.h> +#include <linux/percpu-defs.h> + +/* + * Addresses within the CPU entry area (including e.g. exception stacks) do not + * have struct page entries corresponding to them, so they need separate + * handling. + * arch_kmsan_get_meta_or_null() (declared in the header) maps the addresses in + * CPU entry area to addresses in cpu_entry_area_shadow/cpu_entry_area_origin. + */ +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow); +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin); diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 0612a73638a8..423b21e80929 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -136,10 +136,10 @@ static int pageattr_test(void) failed += print_split(&sa); for (i = 0; i < NTEST; i++) { - unsigned long pfn = prandom_u32() % max_pfn_mapped; + unsigned long pfn = prandom_u32_max(max_pfn_mapped); addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); - len[i] = prandom_u32() % NPAGES; + len[i] = prandom_u32_max(NPAGES); len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); if (len[i] == 0) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1abd5438f126..2e5a045731de 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -580,6 +580,50 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, } /* + * Validate strict W^X semantics. + */ +static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start, + unsigned long pfn, unsigned long npg) +{ + unsigned long end; + + /* Kernel text is rw at boot up */ + if (system_state == SYSTEM_BOOTING) + return new; + + /* + * 32-bit has some unfixable W+X issues, like EFI code + * and writeable data being in the same page. Disable + * detection and enforcement there. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return new; + + /* Only verify when NX is supported: */ + if (!(__supported_pte_mask & _PAGE_NX)) + return new; + + if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX))) + return new; + + if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW) + return new; + + end = start + npg * PAGE_SIZE - 1; + WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", + (unsigned long long)pgprot_val(old), + (unsigned long long)pgprot_val(new), + start, end, pfn); + + /* + * For now, allow all permission change attempts by returning the + * attempted permissions. This can 'return old' to actively + * refuse the permission change at a later time. + */ + return new; +} + +/* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. */ @@ -885,6 +929,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, psize, CPA_DETECT); + new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages); + /* * If there is a conflict, split the large page. * @@ -1525,6 +1571,7 @@ repeat: if (level == PG_LEVEL_4K) { pte_t new_pte; + pgprot_t old_prot = pte_pgprot(old_pte); pgprot_t new_prot = pte_pgprot(old_pte); unsigned long pfn = pte_pfn(old_pte); @@ -1536,6 +1583,8 @@ repeat: new_prot = static_protections(new_prot, address, pfn, 1, 0, CPA_PROTECT); + new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1); + new_prot = pgprot_clear_protnone_bits(new_prot); /* @@ -1944,7 +1993,7 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_present(unsigned long *addr, int numpages) +static int set_memory_p(unsigned long *addr, int numpages) { return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); } @@ -1954,7 +2003,7 @@ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_present(&addr, 1); + return set_memory_p(&addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a932d7712d85..8525f2876fb4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { |