aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile8
-rw-r--r--arch/x86/mm/fault.c33
-rw-r--r--arch/x86/mm/hugetlbpage.c4
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_64.c21
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/kmsan_shadow.c20
-rw-r--r--arch/x86/mm/pat/cpa-test.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c53
-rw-r--r--arch/x86/mm/pgtable.c5
10 files changed, 136 insertions, 17 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index f8220fd2c169..c80febc44cd2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -4,19 +4,24 @@ KCOV_INSTRUMENT_tlb.o := n
KCOV_INSTRUMENT_mem_encrypt.o := n
KCOV_INSTRUMENT_mem_encrypt_amd.o := n
KCOV_INSTRUMENT_mem_encrypt_identity.o := n
+KCOV_INSTRUMENT_pgprot.o := n
KASAN_SANITIZE_mem_encrypt.o := n
KASAN_SANITIZE_mem_encrypt_amd.o := n
KASAN_SANITIZE_mem_encrypt_identity.o := n
+KASAN_SANITIZE_pgprot.o := n
# Disable KCSAN entirely, because otherwise we get warnings that some functions
# reference __initdata sections.
KCSAN_SANITIZE := n
+# Avoid recursion by not calling KMSAN hooks for CEA code.
+KMSAN_SANITIZE_cpu_entry_area.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_amd.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
+CFLAGS_REMOVE_pgprot.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
@@ -41,6 +46,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o
KASAN_SANITIZE_kasan_init_$(BITS).o := n
obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
+KMSAN_SANITIZE_kmsan_shadow.o := n
+obj-$(CONFIG_KMSAN) += kmsan_shadow.o
+
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fa71a5d12e87..7b0d4ab894c8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address)
}
NOKPROBE_SYMBOL(vmalloc_fault);
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -284,6 +284,27 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
}
}
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ __arch_sync_kernel_mappings(start, end);
+#ifdef CONFIG_KMSAN
+ /*
+ * KMSAN maintains two additional metadata page mappings for the
+ * [VMALLOC_START, VMALLOC_END) range. These mappings start at
+ * KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and
+ * have to be synced together with the vmalloc memory mapping.
+ */
+ if (start >= VMALLOC_START && end < VMALLOC_END) {
+ __arch_sync_kernel_mappings(
+ start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START,
+ end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START);
+ __arch_sync_kernel_mappings(
+ start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START,
+ end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START);
+ }
+#endif
+}
+
static bool low_pfn(unsigned long pfn)
{
return pfn < max_low_pfn;
@@ -769,6 +790,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct task_struct *tsk)
{
const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
+ /* This is a racy snapshot, but it's better than nothing. */
+ int cpu = raw_smp_processor_id();
if (!unhandled_signal(tsk, SIGSEGV))
return;
@@ -782,6 +805,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
print_vma_addr(KERN_CONT " in ", regs->ip);
+ /*
+ * Dump the likely CPU where the fatal segfault happened.
+ * This can help identify faulty hardware.
+ */
+ printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu,
+ topology_core_id(cpu), topology_physical_package_id(cpu));
+
+
printk(KERN_CONT "\n");
show_opcodes(regs, loglvl);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 6b3033845c6d..5804bbae4f01 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd)
*/
int pud_huge(pud_t pud)
{
+#if CONFIG_PGTABLE_LEVELS > 2
return !pud_none(pud) &&
(pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
+#else
+ return 0;
+#endif
}
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82a042c03824..9121bc1b9453 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1054,7 +1054,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
}
#ifdef CONFIG_SWAP
-unsigned long max_swapfile_size(void)
+unsigned long arch_max_swapfile_size(void)
{
unsigned long pages;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fe690ebc269..3f040c6e5d13 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init)
DEFINE_ENTRY(pmd, pmd, init)
DEFINE_ENTRY(pte, pte, init)
+static inline pgprot_t prot_sethuge(pgprot_t prot)
+{
+ WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT);
+
+ return __pgprot(pgprot_val(prot) | _PAGE_PSE);
+}
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
@@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_init((pte_t *)pmd,
- pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+ set_pmd_init(pmd,
+ pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
@@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
-
- prot = __pgprot(pgprot_val(prot) | _PAGE_PSE);
-
- set_pte_init((pte_t *)pud,
- pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
- prot),
+ set_pud_init(pud,
+ pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
@@ -1287,7 +1288,7 @@ static void __init preallocate_vmalloc_pages(void)
unsigned long addr;
const char *lvl;
- for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d;
pud_t *pud;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 1ad0228f8ceb..78c5bc654cff 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -17,6 +17,7 @@
#include <linux/cc_platform.h>
#include <linux/efi.h>
#include <linux/pgtable.h>
+#include <linux/kmsan.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr)
return;
}
+ kmsan_iounmap_page_range((unsigned long)addr,
+ (unsigned long)addr + get_vm_area_size(p));
memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
diff --git a/arch/x86/mm/kmsan_shadow.c b/arch/x86/mm/kmsan_shadow.c
new file mode 100644
index 000000000000..bee2ec4a3bfa
--- /dev/null
+++ b/arch/x86/mm/kmsan_shadow.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific bits of KMSAN shadow implementation.
+ *
+ * Copyright (C) 2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#include <asm/cpu_entry_area.h>
+#include <linux/percpu-defs.h>
+
+/*
+ * Addresses within the CPU entry area (including e.g. exception stacks) do not
+ * have struct page entries corresponding to them, so they need separate
+ * handling.
+ * arch_kmsan_get_meta_or_null() (declared in the header) maps the addresses in
+ * CPU entry area to addresses in cpu_entry_area_shadow/cpu_entry_area_origin.
+ */
+DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c
index 0612a73638a8..423b21e80929 100644
--- a/arch/x86/mm/pat/cpa-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
@@ -136,10 +136,10 @@ static int pageattr_test(void)
failed += print_split(&sa);
for (i = 0; i < NTEST; i++) {
- unsigned long pfn = prandom_u32() % max_pfn_mapped;
+ unsigned long pfn = prandom_u32_max(max_pfn_mapped);
addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
- len[i] = prandom_u32() % NPAGES;
+ len[i] = prandom_u32_max(NPAGES);
len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1);
if (len[i] == 0)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 1abd5438f126..2e5a045731de 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -580,6 +580,50 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
}
/*
+ * Validate strict W^X semantics.
+ */
+static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
+ unsigned long pfn, unsigned long npg)
+{
+ unsigned long end;
+
+ /* Kernel text is rw at boot up */
+ if (system_state == SYSTEM_BOOTING)
+ return new;
+
+ /*
+ * 32-bit has some unfixable W+X issues, like EFI code
+ * and writeable data being in the same page. Disable
+ * detection and enforcement there.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ return new;
+
+ /* Only verify when NX is supported: */
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return new;
+
+ if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX)))
+ return new;
+
+ if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
+ return new;
+
+ end = start + npg * PAGE_SIZE - 1;
+ WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
+ (unsigned long long)pgprot_val(old),
+ (unsigned long long)pgprot_val(new),
+ start, end, pfn);
+
+ /*
+ * For now, allow all permission change attempts by returning the
+ * attempted permissions. This can 'return old' to actively
+ * refuse the permission change at a later time.
+ */
+ return new;
+}
+
+/*
* Lookup the page table entry for a virtual address in a specific pgd.
* Return a pointer to the entry and the level of the mapping.
*/
@@ -885,6 +929,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);
+ new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+
/*
* If there is a conflict, split the large page.
*
@@ -1525,6 +1571,7 @@ repeat:
if (level == PG_LEVEL_4K) {
pte_t new_pte;
+ pgprot_t old_prot = pte_pgprot(old_pte);
pgprot_t new_prot = pte_pgprot(old_pte);
unsigned long pfn = pte_pfn(old_pte);
@@ -1536,6 +1583,8 @@ repeat:
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);
+ new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+
new_prot = pgprot_clear_protnone_bits(new_prot);
/*
@@ -1944,7 +1993,7 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
-static int set_memory_present(unsigned long *addr, int numpages)
+static int set_memory_p(unsigned long *addr, int numpages)
{
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}
@@ -1954,7 +2003,7 @@ int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
- return set_memory_present(&addr, 1);
+ return set_memory_p(&addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a932d7712d85..8525f2876fb4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp)
{