aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2020-01-10 18:53:14 +0100
committerIngo Molnar <mingo@kernel.org>2020-01-10 18:53:14 +0100
commit57ad87ddce79b6d54f8e442d0ecf4b5bbe8c5a9e (patch)
treeeaa43171e82737e8b33239e2e9bc3b4de67b7ddd
parentMerge branch 'linus' into efi/core, to pick up fixes (diff)
parentmm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions (diff)
downloadlinux-dev-57ad87ddce79b6d54f8e442d0ecf4b5bbe8c5a9e.tar.xz
linux-dev-57ad87ddce79b6d54f8e442d0ecf4b5bbe8c5a9e.zip
Merge branch 'x86/mm' into efi/core, to pick up dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/alpha/include/asm/vmalloc.h4
-rw-r--r--arch/arc/include/asm/vmalloc.h4
-rw-r--r--arch/arm/include/asm/vmalloc.h4
-rw-r--r--arch/arm64/include/asm/vmalloc.h4
-rw-r--r--arch/c6x/include/asm/vmalloc.h4
-rw-r--r--arch/csky/include/asm/vmalloc.h4
-rw-r--r--arch/h8300/include/asm/vmalloc.h4
-rw-r--r--arch/hexagon/include/asm/vmalloc.h4
-rw-r--r--arch/ia64/include/asm/vmalloc.h4
-rw-r--r--arch/m68k/include/asm/vmalloc.h4
-rw-r--r--arch/microblaze/include/asm/vmalloc.h4
-rw-r--r--arch/mips/include/asm/vmalloc.h4
-rw-r--r--arch/nds32/include/asm/vmalloc.h4
-rw-r--r--arch/nios2/include/asm/vmalloc.h4
-rw-r--r--arch/openrisc/include/asm/vmalloc.h4
-rw-r--r--arch/parisc/include/asm/vmalloc.h4
-rw-r--r--arch/powerpc/include/asm/vmalloc.h4
-rw-r--r--arch/riscv/include/asm/vmalloc.h4
-rw-r--r--arch/s390/include/asm/vmalloc.h4
-rw-r--r--arch/sh/include/asm/vmalloc.h4
-rw-r--r--arch/sparc/include/asm/vmalloc.h4
-rw-r--r--arch/um/include/asm/vmalloc.h4
-rw-r--r--arch/unicore32/include/asm/vmalloc.h4
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h10
-rw-r--r--arch/x86/include/asm/memtype.h27
-rw-r--r--arch/x86/include/asm/mmu_context.h86
-rw-r--r--arch/x86/include/asm/mtrr.h4
-rw-r--r--arch/x86/include/asm/pat.h27
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h53
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h57
-rw-r--r--arch/x86/include/asm/pgtable_areas.h16
-rw-r--r--arch/x86/include/asm/pgtable_types.h143
-rw-r--r--arch/x86/include/asm/vmalloc.h6
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/ldt.c83
-rw-r--r--arch/x86/kernel/setup.c164
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/mm/Makefile8
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/iomap_32.c6
-rw-r--r--arch/x86/mm/ioremap.c12
-rw-r--r--arch/x86/mm/pat/Makefile5
-rw-r--r--arch/x86/mm/pat/cpa-test.c (renamed from arch/x86/mm/pageattr-test.c)0
-rw-r--r--arch/x86/mm/pat/memtype.c (renamed from arch/x86/mm/pat.c)203
-rw-r--r--arch/x86/mm/pat/memtype.h (renamed from arch/x86/mm/pat_internal.h)12
-rw-r--r--arch/x86/mm/pat/memtype_interval.c194
-rw-r--r--arch/x86/mm/pat/set_memory.c (renamed from arch/x86/mm/pageattr.c)24
-rw-r--r--arch/x86/mm/pat_interval.c185
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/physaddr.c1
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/xtensa/include/asm/vmalloc.h4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c2
-rw-r--r--drivers/media/pci/ivtv/ivtvfb.c2
-rw-r--r--include/linux/mm.h15
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--mm/highmem.c2
-rw-r--r--mm/vmalloc.c8
67 files changed, 797 insertions, 681 deletions
diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h
new file mode 100644
index 000000000000..0a9a366a4d34
--- /dev/null
+++ b/arch/alpha/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ALPHA_VMALLOC_H
+#define _ASM_ALPHA_VMALLOC_H
+
+#endif /* _ASM_ALPHA_VMALLOC_H */
diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..973095aad665
--- /dev/null
+++ b/arch/arc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARC_VMALLOC_H
+#define _ASM_ARC_VMALLOC_H
+
+#endif /* _ASM_ARC_VMALLOC_H */
diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h
new file mode 100644
index 000000000000..a9b3718b8600
--- /dev/null
+++ b/arch/arm/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARM_VMALLOC_H
+#define _ASM_ARM_VMALLOC_H
+
+#endif /* _ASM_ARM_VMALLOC_H */
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..2ca708ab9b20
--- /dev/null
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARM64_VMALLOC_H
+#define _ASM_ARM64_VMALLOC_H
+
+#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h
new file mode 100644
index 000000000000..26c6c6696bbd
--- /dev/null
+++ b/arch/c6x/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_C6X_VMALLOC_H
+#define _ASM_C6X_VMALLOC_H
+
+#endif /* _ASM_C6X_VMALLOC_H */
diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h
new file mode 100644
index 000000000000..43dca6336b4c
--- /dev/null
+++ b/arch/csky/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_CSKY_VMALLOC_H
+#define _ASM_CSKY_VMALLOC_H
+
+#endif /* _ASM_CSKY_VMALLOC_H */
diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h
new file mode 100644
index 000000000000..08a55c1dfa23
--- /dev/null
+++ b/arch/h8300/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_H8300_VMALLOC_H
+#define _ASM_H8300_VMALLOC_H
+
+#endif /* _ASM_H8300_VMALLOC_H */
diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h
new file mode 100644
index 000000000000..7b04609e525c
--- /dev/null
+++ b/arch/hexagon/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_HEXAGON_VMALLOC_H
+#define _ASM_HEXAGON_VMALLOC_H
+
+#endif /* _ASM_HEXAGON_VMALLOC_H */
diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..a2b51141ad28
--- /dev/null
+++ b/arch/ia64/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_IA64_VMALLOC_H
+#define _ASM_IA64_VMALLOC_H
+
+#endif /* _ASM_IA64_VMALLOC_H */
diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h
new file mode 100644
index 000000000000..bc1dca6cf134
--- /dev/null
+++ b/arch/m68k/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_M68K_VMALLOC_H
+#define _ASM_M68K_VMALLOC_H
+
+#endif /* _ASM_M68K_VMALLOC_H */
diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h
new file mode 100644
index 000000000000..04013a42b0fe
--- /dev/null
+++ b/arch/microblaze/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_MICROBLAZE_VMALLOC_H
+#define _ASM_MICROBLAZE_VMALLOC_H
+
+#endif /* _ASM_MICROBLAZE_VMALLOC_H */
diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h
new file mode 100644
index 000000000000..25dc09b25eaf
--- /dev/null
+++ b/arch/mips/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_MIPS_VMALLOC_H
+#define _ASM_MIPS_VMALLOC_H
+
+#endif /* _ASM_MIPS_VMALLOC_H */
diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h
new file mode 100644
index 000000000000..caeed3898419
--- /dev/null
+++ b/arch/nds32/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NDS32_VMALLOC_H
+#define _ASM_NDS32_VMALLOC_H
+
+#endif /* _ASM_NDS32_VMALLOC_H */
diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h
new file mode 100644
index 000000000000..ec7a9260090b
--- /dev/null
+++ b/arch/nios2/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NIOS2_VMALLOC_H
+#define _ASM_NIOS2_VMALLOC_H
+
+#endif /* _ASM_NIOS2_VMALLOC_H */
diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..75435eceec32
--- /dev/null
+++ b/arch/openrisc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_OPENRISC_VMALLOC_H
+#define _ASM_OPENRISC_VMALLOC_H
+
+#endif /* _ASM_OPENRISC_VMALLOC_H */
diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..1088ae4e7af9
--- /dev/null
+++ b/arch/parisc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_PARISC_VMALLOC_H
+#define _ASM_PARISC_VMALLOC_H
+
+#endif /* _ASM_PARISC_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..b992dfaaa161
--- /dev/null
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_POWERPC_VMALLOC_H
+#define _ASM_POWERPC_VMALLOC_H
+
+#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
new file mode 100644
index 000000000000..ff9abc00d139
--- /dev/null
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_RISCV_VMALLOC_H
+#define _ASM_RISCV_VMALLOC_H
+
+#endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h
new file mode 100644
index 000000000000..3ba3a6bdca25
--- /dev/null
+++ b/arch/s390/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_S390_VMALLOC_H
+#define _ASM_S390_VMALLOC_H
+
+#endif /* _ASM_S390_VMALLOC_H */
diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h
new file mode 100644
index 000000000000..716b77472646
--- /dev/null
+++ b/arch/sh/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SH_VMALLOC_H
+#define _ASM_SH_VMALLOC_H
+
+#endif /* _ASM_SH_VMALLOC_H */
diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..04b8ab9518b8
--- /dev/null
+++ b/arch/sparc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SPARC_VMALLOC_H
+#define _ASM_SPARC_VMALLOC_H
+
+#endif /* _ASM_SPARC_VMALLOC_H */
diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h
new file mode 100644
index 000000000000..9a7b9ed93733
--- /dev/null
+++ b/arch/um/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UM_VMALLOC_H
+#define _ASM_UM_VMALLOC_H
+
+#endif /* _ASM_UM_VMALLOC_H */
diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h
new file mode 100644
index 000000000000..054435818a14
--- /dev/null
+++ b/arch/unicore32/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UNICORE32_VMALLOC_H
+#define _ASM_UNICORE32_VMALLOC_H
+
+#endif /* _ASM_UNICORE32_VMALLOC_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ded73bb5c0dd..c1cbfc7b3ae8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1512,7 +1512,7 @@ config X86_CPA_STATISTICS
bool "Enable statistic for Change Page Attribute"
depends on DEBUG_FS
---help---
- Expose statistics about the Change Page Attribute mechanims, which
+ Expose statistics about the Change Page Attribute mechanism, which
helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 804734058c77..02c0078d3787 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
#ifdef CONFIG_X86_64
@@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-/* Single page reserved for the readonly IDT mapping: */
-#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
-
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
static inline struct entry_stack *cpu_entry_stack(int cpu)
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..9c2447b3555d
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_disable(const char *reason);
+extern void pat_init(void);
+extern void init_cache_modes(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+ enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+ enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5f33924e200f..b243234e90cb 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -69,14 +69,6 @@ struct ldt_struct {
int slot;
};
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
- return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
/*
* Used for LDT copy/destruction.
*/
@@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
-static inline void load_mm_ldt(struct mm_struct *mm)
-{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
-
- /*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
- */
-
- if (unlikely(ldt)) {
- if (static_cpu_has(X86_FEATURE_PTI)) {
- if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
- /*
- * Whoops -- either the new LDT isn't mapped
- * (if slot == -1) or is mapped into a bogus
- * slot (if slot > 1).
- */
- clear_LDT();
- return;
- }
-
- /*
- * If page table isolation is enabled, ldt->entries
- * will not be mapped in the userspace pagetables.
- * Tell the CPU to access the LDT through the alias
- * at ldt_slot_va(ldt->slot).
- */
- set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
- } else {
- set_ldt(ldt->entries, ldt->nr_entries);
- }
- } else {
- clear_LDT();
- }
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
clear_LDT();
-#endif
}
-
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT if either the old or new mm had an LDT.
- *
- * An mm will never go from having an LDT to not having an LDT. Two
- * mms never share an LDT, so we don't gain anything by checking to
- * see whether the LDT changed. There's also no guarantee that
- * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
- * then prev->context.ldt will also be non-NULL.
- *
- * If we really cared, we could optimize the case where prev == next
- * and we're exiting lazy mode. Most of the time, if this happens,
- * we don't actually need to reload LDTR, but modify_ldt() is mostly
- * used by legacy code and emulators where we don't need this level of
- * performance.
- *
- * This uses | instead of || because it generates better code.
- */
- if (unlikely((unsigned long)prev->context.ldt |
- (unsigned long)next->context.ldt))
- load_mm_ldt(next);
-#endif
-
DEBUG_LOCKS_WARN_ON(preemptible());
}
+#endif
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..829df26fd7a3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,7 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
/*
@@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
}
static inline void mtrr_bp_init(void)
{
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
}
#define mtrr_ap_init() do {} while (0)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
- enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
- enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 90d0731fdcb6..c1fdd43fe187 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -9,7 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/numa.h>
#include <asm/io.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/x86_init.h>
struct pci_sysdata {
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..b6355416a15a
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE \
+ ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
+#define MODULES_LEN (MODULES_VADDR - MODULES_END)
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0416d42e5bdd..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,55 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
- * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
- * to avoid include recursion hell.
- */
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
-
-/* The +1 is for the readonly IDT page: */
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
-
-#define LDT_BASE_ADDR \
- ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE \
- ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR VMALLOC_START
-#define MODULES_END VMALLOC_END
-#define MODULES_LEN (MODULES_VADDR - MODULES_END)
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..d34cce1b995c
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b5e49e6bac63..ea7400726d7a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,11 +110,6 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -136,80 +131,93 @@
*/
#ifndef __ASSEMBLY__
enum page_cache_mode {
- _PAGE_CACHE_MODE_WB = 0,
- _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
- _PAGE_CACHE_MODE_UC = 3,
- _PAGE_CACHE_MODE_WT = 4,
- _PAGE_CACHE_MODE_WP = 5,
- _PAGE_CACHE_MODE_NUM = 8
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+
+ _PAGE_CACHE_MODE_NUM = 8
};
#endif
-#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
-#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
-#ifndef __ASSEMBLY__
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
-#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pg(x) __pgprot(x)
+
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+
+#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
+#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
+#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
+#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
+#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
+#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO __PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
-#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
+#ifndef __ASSEMBLY__
-#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
-#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
+#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
-#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
+#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
+#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
+#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
+#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
-#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
extern uint8_t __pte2cachemode_tbl[8];
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..29837740b520
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/pgtable_areas.h>
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2e4d90294fe6..9d6a35a4586e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -49,7 +49,7 @@
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index aa5c064a6a22..51b9190c628b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -15,7 +15,7 @@
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include "mtrr.h"
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 507039c20128..6a80f36b5d59 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -52,7 +52,7 @@
#include <asm/e820/api.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include "mtrr.h"
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index adf9b71386ef..62b137c3c97a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -4,7 +4,7 @@
*/
#include <linux/cpu.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/apic.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index ee48c3fc8a65..d3a0791bc052 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -7,7 +7,7 @@
#include <linux/cpu.h>
#include <asm/apic.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/processor.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b2463fcb20a8..c57e1ca70fd1 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -28,6 +28,89 @@
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+#include <asm/pgtable_areas.h>
+
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+ return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+}
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+ struct ldt_struct *ldt;
+
+ /* READ_ONCE synchronizes with smp_store_release */
+ ldt = READ_ONCE(mm->context.ldt);
+
+ /*
+ * Any change to mm->context.ldt is followed by an IPI to all
+ * CPUs with the mm active. The LDT will not be freed until
+ * after the IPI is handled by all such CPUs. This means that,
+ * if the ldt_struct changes before we return, the values we see
+ * will be safe, and the new values will be loaded before we run
+ * any user code.
+ *
+ * NB: don't try to convert this to use RCU without extreme care.
+ * We would still need IRQs off, because we don't want to change
+ * the local LDT after an IPI loaded a newer value than the one
+ * that we can see.
+ */
+
+ if (unlikely(ldt)) {
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+ /*
+ * Whoops -- either the new LDT isn't mapped
+ * (if slot == -1) or is mapped into a bogus
+ * slot (if slot > 1).
+ */
+ clear_LDT();
+ return;
+ }
+
+ /*
+ * If page table isolation is enabled, ldt->entries
+ * will not be mapped in the userspace pagetables.
+ * Tell the CPU to access the LDT through the alias
+ * at ldt_slot_va(ldt->slot).
+ */
+ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+ } else {
+ set_ldt(ldt->entries, ldt->nr_entries);
+ }
+ } else {
+ clear_LDT();
+ }
+}
+
+void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+ /*
+ * Load the LDT if either the old or new mm had an LDT.
+ *
+ * An mm will never go from having an LDT to not having an LDT. Two
+ * mms never share an LDT, so we don't gain anything by checking to
+ * see whether the LDT changed. There's also no guarantee that
+ * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+ * then prev->context.ldt will also be non-NULL.
+ *
+ * If we really cared, we could optimize the case where prev == next
+ * and we're exiting lazy mode. Most of the time, if this happens,
+ * we don't actually need to reload LDTR, but modify_ldt() is mostly
+ * used by legacy code and emulators where we don't need this level of
+ * performance.
+ *
+ * This uses | instead of || because it generates better code.
+ */
+ if (unlikely((unsigned long)prev->context.ldt |
+ (unsigned long)next->context.ldt))
+ load_mm_ldt(next);
+
+ DEBUG_LOCKS_WARN_ON(preemptible());
+}
static void refresh_ldt_segments(void)
{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cedfe2077a69..90296a04e5ad 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -2,130 +2,53 @@
/*
* Copyright (C) 1995 Linus Torvalds
*
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- *
- * Memory region support
- * David Parsons <orc@pell.chi.il.us>, July-August 1999
- *
- * Added E820 sanitization routine (removes overlapping memory regions);
- * Brian Moyle <bmoyle@mvista.com>, February 2001
- *
- * Moved CPU detection code to cpu/${cpu}.c
- * Patrick Mochel <mochel@osdl.org>, March 2002
- *
- * Provisions for empty E820 memory regions (reported by certain BIOSes).
- * Alex Achenbach <xela@slit.de>, December 2002.
- *
- */
-
-/*
- * This file handles the architecture-dependent parts of initialization
+ * This file contains the setup_arch() code, which handles the architecture-dependent
+ * parts of early kernel initialization.
*/
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/screen_info.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/sfi.h>
-#include <linux/apm_bios.h>
-#include <linux/initrd.h>
-#include <linux/memblock.h>
-#include <linux/seq_file.h>
#include <linux/console.h>
-#include <linux/root_dev.h>
-#include <linux/highmem.h>
-#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/dmi.h>
#include <linux/efi.h>
-#include <linux/init.h>
-#include <linux/edd.h>
+#include <linux/init_ohci1394_dma.h>
+#include <linux/initrd.h>
#include <linux/iscsi_ibft.h>
-#include <linux/nodemask.h>
-#include <linux/kexec.h>
-#include <linux/dmi.h>
-#include <linux/pfn.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
-#include <asm/pci-direct.h>
-#include <linux/init_ohci1394_dma.h>
-#include <linux/kvm_para.h>
-#include <linux/dma-contiguous.h>
-#include <xen/xen.h>
-#include <uapi/linux/mount.h>
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/delay.h>
-
-#include <linux/kallsyms.h>
-#include <linux/cpufreq.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <linux/uaccess.h>
-
-#include <linux/percpu.h>
-#include <linux/crash_dump.h>
+#include <linux/root_dev.h>
+#include <linux/sfi.h>
#include <linux/tboot.h>
-#include <linux/jiffies.h>
-#include <linux/mem_encrypt.h>
-#include <linux/sizes.h>
-
#include <linux/usb/xhci-dbgp.h>
-#include <video/edid.h>
-#include <asm/mtrr.h>
+#include <uapi/linux/mount.h>
+
+#include <xen/xen.h>
+
#include <asm/apic.h>
-#include <asm/realmode.h>
-#include <asm/e820/api.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/efi.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/sections.h>
-#include <asm/io_apic.h>
-#include <asm/ist.h>
-#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
-#include <asm/cacheflush.h>
-#include <asm/processor.h>
#include <asm/bugs.h>
-#include <asm/kasan.h>
-
-#include <asm/vsyscall.h>
#include <asm/cpu.h>
-#include <asm/desc.h>
-#include <asm/dma.h>
-#include <asm/iommu.h>
+#include <asm/efi.h>
#include <asm/gart.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-
-#include <asm/paravirt.h>
#include <asm/hypervisor.h>
-#include <asm/olpc_ofw.h>
-
-#include <asm/percpu.h>
-#include <asm/topology.h>
-#include <asm/apicdef.h>
-#include <asm/amd_nb.h>
+#include <asm/io_apic.h>
+#include <asm/kasan.h>
+#include <asm/kaslr.h>
#include <asm/mce.h>
-#include <asm/alternative.h>
+#include <asm/mtrr.h>
+#include <asm/olpc_ofw.h>
+#include <asm/pci-direct.h>
#include <asm/prom.h>
-#include <asm/microcode.h>
-#include <asm/kaslr.h>
+#include <asm/proto.h>
#include <asm/unwind.h>
+#include <asm/vsyscall.h>
+#include <linux/vmalloc.h>
/*
- * max_low_pfn_mapped: highest direct mapped pfn under 4GB
- * max_pfn_mapped: highest direct mapped pfn over 4GB
+ * max_low_pfn_mapped: highest directly mapped pfn < 4 GB
+ * max_pfn_mapped: highest directly mapped pfn > 4 GB
*
* The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are
- * represented by pfn_mapped
+ * represented by pfn_mapped[].
*/
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -135,14 +58,23 @@ RESERVE_BRK(dmi_alloc, 65536);
#endif
-static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
-unsigned long _brk_end = (unsigned long)__brk_base;
+/*
+ * Range of the BSS area. The size of the BSS area is determined
+ * at link time, with RESERVE_BRK*() facility reserving additional
+ * chunks.
+ */
+static __initdata
+unsigned long _brk_start = (unsigned long)__brk_base;
+unsigned long _brk_end = (unsigned long)__brk_base;
struct boot_params boot_params;
/*
- * Machine setup..
+ * These are the four main kernel memory regions, we put them into
+ * the resource tree so that kdump tools and other debugging tools
+ * recover it:
*/
+
static struct resource rodata_resource = {
.name = "Kernel rodata",
.start = 0,
@@ -173,16 +105,16 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
-/* cpu data as detected by the assembly code in head_32.S */
+/* CPU data as detected by the assembly code in head_32.S */
struct cpuinfo_x86 new_cpu_data;
-/* common cpu data for all cpus */
+/* Common CPU data for all CPUs */
struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
unsigned int def_to_bigsmp;
-/* for MCA, but anyone else can use it if they want */
+/* For MCA, but anyone else can use it if they want */
unsigned int machine_id;
unsigned int machine_submodel_id;
unsigned int BIOS_revision;
@@ -468,15 +400,15 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit.
*
- * On 32 bits earlier kernels would limit the kernel to the low 512 MiB
+ * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
* due to mapping restrictions.
*
- * On 64bit, kdump kernel need be restricted to be under 64TB, which is
+ * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
* the upper limit of system RAM in 4-level paging mode. Since the kdump
- * jumping could be from 5-level to 4-level, the jumping will fail if
- * kernel is put above 64TB, and there's no way to detect the paging mode
- * of the kernel which will be loaded for dumping during the 1st kernel
- * bootup.
+ * jump could be from 5-level paging to 4-level paging, the jump will fail if
+ * the kernel is put above 64 TB, and during the 1st kernel bootup there's
+ * no good way to detect the paging mode of the target kernel which will be
+ * loaded for dumping.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
@@ -887,7 +819,7 @@ void __init setup_arch(char **cmdline_p)
/*
* Note: Quark X1000 CPUs advertise PGE incorrectly and require
* a cr3 based tlb flush, so the following __flush_tlb_all()
- * will not flush anything because the cpu quirk which clears
+ * will not flush anything because the CPU quirk which clears
* X86_FEATURE_PGE has not been invoked yet. Though due to the
* load_cr3() above the TLB has been flushed already. The
* quirk is invoked before subsequent calls to __flush_tlb_all()
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ce89430a7f80..23e25f3034c2 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -20,7 +20,7 @@
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/hpet.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f92b40d798c..a32b847a8089 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -40,7 +40,7 @@
#include <linux/kthread.h>
#include <asm/page.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/cmpxchg.h>
#include <asm/e820/api.h>
#include <asm/io.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3b89c201ac26..345848f270e3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
-obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
+ pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+
+obj-y += pat/
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
-obj-$(CONFIG_X86_PAT) += pat_interval.o
-
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 304d31d8cbbc..c9c8523a3a48 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -29,6 +29,7 @@
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
#include <asm/cpu_entry_area.h> /* exception stack */
+#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0a74407ef92e..8d29ae8d3eb7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -52,6 +52,7 @@
#include <asm/page_types.h>
#include <asm/cpu_entry_area.h>
#include <asm/init.h>
+#include <asm/pgtable_areas.h>
#include "mm_internal.h"
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 6748b4c2baff..f60398aeb644 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -4,7 +4,7 @@
*/
#include <asm/iomap.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <linux/export.h>
#include <linux/highmem.h>
@@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
if (!is_io_mapping_possible(base, size))
return -EINVAL;
- ret = io_reserve_memtype(base, base + size, &pcm);
+ ret = memtype_reserve_io(base, base + size, &pcm);
if (ret)
return ret;
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc);
void iomap_free(resource_size_t base, unsigned long size)
{
- io_free_memtype(base, base + size);
+ memtype_free_io(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b3a2936377b5..44e4beb4239f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,7 +24,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/setup.h>
#include "physaddr.h"
@@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
phys_addr &= PHYSICAL_PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
- retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
+ retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm);
if (retval) {
- printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
+ printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
return NULL;
}
@@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, pcm))
+ if (memtype_kernel_map_sync(phys_addr, size, pcm))
goto err_free_area;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
@@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
err_free_area:
free_vm_area(area);
err_free_memtype:
- free_memtype(phys_addr, phys_addr + size);
+ memtype_free(phys_addr, phys_addr + size);
return NULL;
}
@@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr)
return;
}
- free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+ memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
o = remove_vm_area((void __force *)addr);
diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile
new file mode 100644
index 000000000000..ea464c995161
--- /dev/null
+++ b/arch/x86/mm/pat/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := set_memory.o memtype.o
+
+obj-$(CONFIG_X86_PAT) += memtype_interval.o
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pat/cpa-test.c
index facce271e8b9..facce271e8b9 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat/memtype.c
index 2d758e19ef22..394be8611748 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1,11 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Handle caching attributes in page tables (PAT)
+ * Page Attribute Table (PAT) support: handle memory caching attributes in page tables.
*
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com>
*
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
+ *
+ * Basic principles:
+ *
+ * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and
+ * the kernel to set one of a handful of 'caching type' attributes for physical
+ * memory ranges: uncached, write-combining, write-through, write-protected,
+ * and the most commonly used and default attribute: write-back caching.
+ *
+ * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
+ * a hardware interface to enumerate a limited number of physical memory ranges
+ * and set their caching attributes explicitly, programmed into the CPU via MSRs.
+ * Even modern CPUs have MTRRs enabled - but these are typically not touched
+ * by the kernel or by user-space (such as the X server), we rely on PAT for any
+ * additional cache attribute logic.
+ *
+ * PAT doesn't work via explicit memory ranges, but uses page table entries to add
+ * cache attribute information to the mapped memory range: there's 3 bits used,
+ * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the
+ * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT).
+ *
+ * ( There's a metric ton of finer details, such as compatibility with CPU quirks
+ * that only support 4 types of PAT entries, and interaction with MTRRs, see
+ * below for details. )
*/
#include <linux/seq_file.h>
@@ -29,44 +52,48 @@
#include <asm/mtrr.h>
#include <asm/page.h>
#include <asm/msr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/io.h>
-#include "pat_internal.h"
-#include "mm_internal.h"
+#include "memtype.h"
+#include "../mm_internal.h"
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
-static bool __read_mostly boot_cpu_done;
+static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
-static bool __read_mostly pat_initialized;
-static bool __read_mostly init_cm_done;
+static bool __read_mostly pat_bp_enabled;
+static bool __read_mostly pat_cm_initialized;
-void pat_disable(const char *reason)
+/*
+ * PAT support is enabled by default, but can be disabled for
+ * various user-requested or hardware-forced reasons:
+ */
+void pat_disable(const char *msg_reason)
{
if (pat_disabled)
return;
- if (boot_cpu_done) {
+ if (pat_bp_initialized) {
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
return;
}
pat_disabled = true;
- pr_info("x86/PAT: %s\n", reason);
+ pr_info("x86/PAT: %s\n", msg_reason);
}
static int __init nopat(char *str)
{
- pat_disable("PAT support disabled.");
+ pat_disable("PAT support disabled via boot option.");
return 0;
}
early_param("nopat", nopat);
bool pat_enabled(void)
{
- return pat_initialized;
+ return pat_bp_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -197,6 +224,8 @@ static void __init_cache_modes(u64 pat)
char pat_msg[33];
int i;
+ WARN_ON_ONCE(pat_cm_initialized);
+
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@@ -205,28 +234,28 @@ static void __init_cache_modes(u64 pat)
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
- init_cm_done = true;
+ pat_cm_initialized = true;
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
-static void pat_bsp_init(u64 pat)
+static void pat_bp_init(u64 pat)
{
u64 tmp_pat;
if (!boot_cpu_has(X86_FEATURE_PAT)) {
- pat_disable("PAT not supported by CPU.");
+ pat_disable("PAT not supported by the CPU.");
return;
}
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
- pat_disable("PAT MSR is 0, disabled.");
+ pat_disable("PAT support disabled by the firmware.");
return;
}
wrmsrl(MSR_IA32_CR_PAT, pat);
- pat_initialized = true;
+ pat_bp_enabled = true;
__init_cache_modes(pat);
}
@@ -248,7 +277,7 @@ void init_cache_modes(void)
{
u64 pat = 0;
- if (init_cm_done)
+ if (pat_cm_initialized)
return;
if (boot_cpu_has(X86_FEATURE_PAT)) {
@@ -291,7 +320,7 @@ void init_cache_modes(void)
}
/**
- * pat_init - Initialize PAT MSR and PAT table
+ * pat_init - Initialize the PAT MSR and PAT table on the current CPU
*
* This function initializes PAT MSR and PAT table with an OS-defined value
* to enable additional cache attributes, WC, WT and WP.
@@ -305,6 +334,10 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
+#ifndef CONFIG_X86_PAT
+ pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
+#endif
+
if (pat_disabled)
return;
@@ -364,9 +397,9 @@ void pat_init(void)
PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
- if (!boot_cpu_done) {
- pat_bsp_init(pat);
- boot_cpu_done = true;
+ if (!pat_bp_initialized) {
+ pat_bp_init(pat);
+ pat_bp_initialized = true;
} else {
pat_ap_init(pat);
}
@@ -542,10 +575,10 @@ static u64 sanitize_phys(u64 address)
* available type in new_type in case of no error. In case of any error
* it will return a negative return value.
*/
-int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
+int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type,
enum page_cache_mode *new_type)
{
- struct memtype *new;
+ struct memtype *entry_new;
enum page_cache_mode actual_type;
int is_range_ram;
int err = 0;
@@ -593,22 +626,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
return -EINVAL;
}
- new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
- if (!new)
+ entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!entry_new)
return -ENOMEM;
- new->start = start;
- new->end = end;
- new->type = actual_type;
+ entry_new->start = start;
+ entry_new->end = end;
+ entry_new->type = actual_type;
spin_lock(&memtype_lock);
- err = memtype_check_insert(new, new_type);
+ err = memtype_check_insert(entry_new, new_type);
if (err) {
- pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
- cattr_name(new->type), cattr_name(req_type));
- kfree(new);
+ cattr_name(entry_new->type), cattr_name(req_type));
+ kfree(entry_new);
spin_unlock(&memtype_lock);
return err;
@@ -616,18 +649,17 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
spin_unlock(&memtype_lock);
- dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
- start, end - 1, cattr_name(new->type), cattr_name(req_type),
+ dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+ start, end - 1, cattr_name(entry_new->type), cattr_name(req_type),
new_type ? cattr_name(*new_type) : "-");
return err;
}
-int free_memtype(u64 start, u64 end)
+int memtype_free(u64 start, u64 end)
{
- int err = -EINVAL;
int is_range_ram;
- struct memtype *entry;
+ struct memtype *entry_old;
if (!pat_enabled())
return 0;
@@ -640,28 +672,24 @@ int free_memtype(u64 start, u64 end)
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1) {
-
- err = free_ram_pages_type(start, end);
-
- return err;
- } else if (is_range_ram < 0) {
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ if (is_range_ram < 0)
return -EINVAL;
- }
spin_lock(&memtype_lock);
- entry = memtype_erase(start, end);
+ entry_old = memtype_erase(start, end);
spin_unlock(&memtype_lock);
- if (IS_ERR(entry)) {
+ if (IS_ERR(entry_old)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
}
- kfree(entry);
+ kfree(entry_old);
- dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
+ dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1);
return 0;
}
@@ -700,6 +728,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
rettype = _PAGE_CACHE_MODE_UC_MINUS;
spin_unlock(&memtype_lock);
+
return rettype;
}
@@ -723,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
/**
- * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * memtype_reserve_io - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
* @type: A pointer to memtype, with requested type. On success, requested
@@ -732,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
* On success, returns 0
* On failure, returns non-zero
*/
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
+int memtype_reserve_io(resource_size_t start, resource_size_t end,
enum page_cache_mode *type)
{
resource_size_t size = end - start;
@@ -742,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
WARN_ON_ONCE(iomem_map_sanity_check(start, size));
- ret = reserve_memtype(start, end, req_type, &new_type);
+ ret = memtype_reserve(start, end, req_type, &new_type);
if (ret)
goto out_err;
if (!is_new_memtype_allowed(start, size, req_type, new_type))
goto out_free;
- if (kernel_map_sync_memtype(start, size, new_type) < 0)
+ if (memtype_kernel_map_sync(start, size, new_type) < 0)
goto out_free;
*type = new_type;
return 0;
out_free:
- free_memtype(start, end);
+ memtype_free(start, end);
ret = -EBUSY;
out_err:
return ret;
}
/**
- * io_free_memtype - Release a memory type mapping for a region of memory
+ * memtype_free_io - Release a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
*/
-void io_free_memtype(resource_size_t start, resource_size_t end)
+void memtype_free_io(resource_size_t start, resource_size_t end)
{
- free_memtype(start, end);
+ memtype_free(start, end);
}
int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
{
enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
- return io_reserve_memtype(start, start + size, &type);
+ return memtype_reserve_io(start, start + size, &type);
}
EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
{
- io_free_memtype(start, start + size);
+ memtype_free_io(start, start + size);
}
EXPORT_SYMBOL(arch_io_free_memtype_wc);
@@ -839,10 +868,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
}
/*
- * Change the memory type for the physial address range in kernel identity
+ * Change the memory type for the physical address range in kernel identity
* mapping space if that range is a part of identity map.
*/
-int kernel_map_sync_memtype(u64 base, unsigned long size,
+int memtype_kernel_map_sync(u64 base, unsigned long size,
enum page_cache_mode pcm)
{
unsigned long id_sz;
@@ -851,15 +880,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
return 0;
/*
- * some areas in the middle of the kernel identity range
- * are not mapped, like the PCI space.
+ * Some areas in the middle of the kernel identity range
+ * are not mapped, for example the PCI space.
*/
if (!page_is_ram(base >> PAGE_SHIFT))
return 0;
id_sz = (__pa(high_memory-1) <= base + size) ?
- __pa(high_memory) - base :
- size;
+ __pa(high_memory) - base : size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
@@ -873,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
/*
* Internal interface to reserve a range of physical memory with prot.
- * Reserved non RAM regions only and after successful reserve_memtype,
+ * Reserved non RAM regions only and after successful memtype_reserve,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
@@ -910,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return 0;
}
- ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
+ ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm);
if (ret)
return ret;
if (pcm != want_pcm) {
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
- free_memtype(paddr, paddr + size);
+ memtype_free(paddr, paddr + size);
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
@@ -935,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
cachemode2protval(pcm));
}
- if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
- free_memtype(paddr, paddr + size);
+ if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
+ memtype_free(paddr, paddr + size);
return -EINVAL;
}
return 0;
@@ -952,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
- free_memtype(paddr, paddr + size);
+ memtype_free(paddr, paddr + size);
}
/*
@@ -1099,25 +1127,30 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
+/*
+ * We are allocating a temporary printout-entry to be passed
+ * between seq_start()/next() and seq_show():
+ */
static struct memtype *memtype_get_idx(loff_t pos)
{
- struct memtype *print_entry;
+ struct memtype *entry_print;
int ret;
- print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
- if (!print_entry)
+ entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!entry_print)
return NULL;
spin_lock(&memtype_lock);
- ret = memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(entry_print, pos);
spin_unlock(&memtype_lock);
- if (!ret) {
- return print_entry;
- } else {
- kfree(print_entry);
+ /* Free it on error: */
+ if (ret) {
+ kfree(entry_print);
return NULL;
}
+
+ return entry_print;
}
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1142,11 +1175,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v)
static int memtype_seq_show(struct seq_file *seq, void *v)
{
- struct memtype *print_entry = (struct memtype *)v;
+ struct memtype *entry_print = (struct memtype *)v;
+
+ seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n",
+ entry_print->start,
+ entry_print->end,
+ cattr_name(entry_print->type));
- seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
- print_entry->start, print_entry->end);
- kfree(print_entry);
+ kfree(entry_print);
return 0;
}
@@ -1178,7 +1214,6 @@ static int __init pat_memtype_list_init(void)
}
return 0;
}
-
late_initcall(pat_memtype_list_init);
#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat/memtype.h
index 79a06684349e..cacecdbceb55 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat/memtype.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PAT_INTERNAL_H_
-#define __PAT_INTERNAL_H_
+#ifndef __MEMTYPE_H_
+#define __MEMTYPE_H_
extern int pat_debug_enable;
@@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}
#ifdef CONFIG_X86_PAT
-extern int memtype_check_insert(struct memtype *new,
+extern int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type);
extern struct memtype *memtype_erase(u64 start, u64 end);
extern struct memtype *memtype_lookup(u64 addr);
-extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos);
#else
-static inline int memtype_check_insert(struct memtype *new,
+static inline int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type)
{ return 0; }
static inline struct memtype *memtype_erase(u64 start, u64 end)
@@ -46,4 +46,4 @@ static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif
-#endif /* __PAT_INTERNAL_H_ */
+#endif /* __MEMTYPE_H_ */
diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c
new file mode 100644
index 000000000000..a07e4882bf36
--- /dev/null
+++ b/arch/x86/mm/pat/memtype_interval.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/memtype.h>
+
+#include "memtype.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) which tree is ordered
+ * by the starting address. The tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course, as enforced by the PAT logic.
+ *
+ * memtype_lock protects the rbtree.
+ */
+
+static inline u64 interval_start(struct memtype *entry)
+{
+ return entry->start;
+}
+
+static inline u64 interval_end(struct memtype *entry)
+{
+ return entry->end - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ interval_start, interval_end,
+ static, interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+ struct memtype *entry_match;
+
+ entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+
+ while (entry_match != NULL && entry_match->start < end) {
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (entry_match->start == start) && (entry_match->end == end))
+ return entry_match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (entry_match->start < start) && (entry_match->end == end))
+ return entry_match;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ }
+
+ return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
+{
+ struct memtype *entry_match;
+ enum page_cache_mode found_type = reqtype;
+
+ entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
+ if (entry_match == NULL)
+ goto success;
+
+ if (entry_match->type != found_type && newtype == NULL)
+ goto failure;
+
+ dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end);
+ found_type = entry_match->type;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ while (entry_match) {
+ if (entry_match->type != found_type)
+ goto failure;
+
+ entry_match = interval_iter_next(entry_match, start, end-1);
+ }
+success:
+ if (newtype)
+ *newtype = found_type;
+
+ return 0;
+
+failure:
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(entry_match->type));
+
+ return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type)
+{
+ int err = 0;
+
+ err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type);
+ if (err)
+ return err;
+
+ if (ret_type)
+ entry_new->type = *ret_type;
+
+ interval_insert(entry_new, &memtype_rbroot);
+ return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+ struct memtype *entry_old;
+
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+ if (!entry_old) {
+ entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
+ if (!entry_old)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (entry_old->start == start) {
+ /* munmap: erase this node */
+ interval_remove(entry_old, &memtype_rbroot);
+ } else {
+ /* mremap: update the end value of this node */
+ interval_remove(entry_old, &memtype_rbroot);
+ entry_old->end = start;
+ interval_insert(entry_old, &memtype_rbroot);
+
+ return NULL;
+ }
+
+ return entry_old;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+ return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1);
+}
+
+/*
+ * Debugging helper, copy the Nth entry of the tree into a
+ * a copy for printout. This allows us to print out the tree
+ * via debugfs, without holding the memtype_lock too long:
+ */
+#ifdef CONFIG_DEBUG_FS
+int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos)
+{
+ struct memtype *entry_match;
+ int i = 1;
+
+ entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+
+ while (entry_match && pos != i) {
+ entry_match = interval_iter_next(entry_match, 0, ULONG_MAX);
+ i++;
+ }
+
+ if (entry_match) { /* pos == i */
+ *entry_out = *entry_match;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pat/set_memory.c
index 1b99ad05b117..20823392f4f2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -24,10 +24,10 @@
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/set_memory.h>
-#include "mm_internal.h"
+#include "../mm_internal.h"
/*
* The current flushing context - we pass it instead of 5 arguments:
@@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
-void __cpa_flush_tlb(void *data)
+static void __cpa_flush_tlb(void *data)
{
struct cpa_data *cpa = data;
unsigned int i;
@@ -1801,7 +1801,7 @@ int set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap()
*/
- ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_UC_MINUS, NULL);
if (ret)
goto out_err;
@@ -1813,7 +1813,7 @@ int set_memory_uc(unsigned long addr, int numpages)
return 0;
out_free:
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
out_err:
return ret;
}
@@ -1839,14 +1839,14 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
- ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_WC, NULL);
if (ret)
return ret;
ret = _set_memory_wc(addr, numpages);
if (ret)
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return ret;
}
@@ -1873,7 +1873,7 @@ int set_memory_wb(unsigned long addr, int numpages)
if (ret)
return ret;
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return 0;
}
EXPORT_SYMBOL(set_memory_wb);
@@ -2014,7 +2014,7 @@ static int _set_pages_array(struct page **pages, int numpages,
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- if (reserve_memtype(start, end, new_type, NULL))
+ if (memtype_reserve(start, end, new_type, NULL))
goto err_out;
}
@@ -2040,7 +2040,7 @@ err_out:
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- free_memtype(start, end);
+ memtype_free(start, end);
}
return -EINVAL;
}
@@ -2089,7 +2089,7 @@ int set_pages_array_wb(struct page **pages, int numpages)
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
- free_memtype(start, end);
+ memtype_free(start, end);
}
return 0;
@@ -2281,5 +2281,5 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
* be exposed to the rest of the kernel. Include these directly here.
*/
#ifdef CONFIG_CPA_DEBUG
-#include "pageattr-test.c"
+#include "cpa-test.c"
#endif
diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c
deleted file mode 100644
index 6855362eaf21..000000000000
--- a/arch/x86/mm/pat_interval.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * Suresh B Siddha <suresh.b.siddha@intel.com>
- *
- * Interval tree used to store the PAT memory type reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/interval_tree_generic.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-static inline u64 memtype_interval_start(struct memtype *memtype)
-{
- return memtype->start;
-}
-
-static inline u64 memtype_interval_end(struct memtype *memtype)
-{
- return memtype->end - 1;
-}
-INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
- memtype_interval_start, memtype_interval_end,
- static, memtype_interval)
-
-static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *match;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
- while (match != NULL && match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (match->start == start) && (match->end == end))
- return match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (match->start < start) && (match->end == end))
- return match;
-
- match = memtype_interval_iter_next(match, start, end-1);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_check_conflict(u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
-{
- struct memtype *match;
- enum page_cache_mode found_type = reqtype;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
- if (match == NULL)
- goto success;
-
- if (match->type != found_type && newtype == NULL)
- goto failure;
-
- dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
- found_type = match->type;
-
- match = memtype_interval_iter_next(match, start, end-1);
- while (match) {
- if (match->type != found_type)
- goto failure;
-
- match = memtype_interval_iter_next(match, start, end-1);
- }
-success:
- if (newtype)
- *newtype = found_type;
-
- return 0;
-
-failure:
- pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid, start, end,
- cattr_name(found_type), cattr_name(match->type));
- return -EBUSY;
-}
-
-int memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
-{
- int err = 0;
-
- err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
- if (err)
- return err;
-
- if (ret_type)
- new->type = *ret_type;
-
- memtype_interval_insert(new, &memtype_rbroot);
- return 0;
-}
-
-struct memtype *memtype_erase(u64 start, u64 end)
-{
- struct memtype *data;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!data) {
- data = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!data)
- return ERR_PTR(-EINVAL);
- }
-
- if (data->start == start) {
- /* munmap: erase this node */
- memtype_interval_remove(data, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- memtype_interval_remove(data, &memtype_rbroot);
- data->end = start;
- memtype_interval_insert(data, &memtype_rbroot);
- return NULL;
- }
-
- return data;
-}
-
-struct memtype *memtype_lookup(u64 addr)
-{
- return memtype_interval_iter_first(&memtype_rbroot, addr,
- addr + PAGE_SIZE-1);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
- struct memtype *match;
- int i = 1;
-
- match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
- while (match && pos != i) {
- match = memtype_interval_iter_next(match, 0, ULONG_MAX);
- i++;
- }
-
- if (match) { /* pos == i */
- *out = *match;
- return 0;
- } else {
- return 1;
- }
-}
-#endif
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 9bb7f0ab9fe6..0e6700eaa4f9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -18,6 +18,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <linux/vmalloc.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index bdc98150d4db..fc3f3d3e2ef2 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <asm/page.h>
+#include <linux/vmalloc.h>
#include "physaddr.h"
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 9df652d3d927..fa855bbaebaf 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,7 +34,7 @@
#include <linux/errno.h>
#include <linux/memblock.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/e820/api.h>
#include <asm/pci_x86.h>
#include <asm/io_apic.h>
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..bbba8b17829a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -67,7 +67,7 @@
#include <asm/linkage.h>
#include <asm/page.h>
#include <asm/init.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/smp.h>
#include <asm/tlb.h>
diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h
new file mode 100644
index 000000000000..0eb94b70be55
--- /dev/null
+++ b/arch/xtensa/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_XTENSA_VMALLOC_H
+#define _ASM_XTENSA_VMALLOC_H
+
+#endif /* _ASM_XTENSA_VMALLOC_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 997cbfe4b90c..df88bfbd7fed 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,7 +40,7 @@
#include <linux/slab.h>
#include <linux/bitmap.h>
#if defined(CONFIG_X86)
-#include <asm/pat.h>
+#include <asm/memtype.h>
#endif
#include <linux/sched.h>
#include <linux/sched/mm.h>
diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
index 95a56cce9b65..1daf9e07cad7 100644
--- a/drivers/media/pci/ivtv/ivtvfb.c
+++ b/drivers/media/pci/ivtv/ivtvfb.c
@@ -37,7 +37,7 @@
#include <linux/ivtvfb.h>
#ifdef CONFIG_X86_64
-#include <asm/pat.h>
+#include <asm/memtype.h>
#endif
/* card parameters */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80a9162b406c..5dfbc0e56e67 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr);
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
* is no special casing required.
*/
-static inline bool is_vmalloc_addr(const void *x)
-{
-#ifdef CONFIG_MMU
- unsigned long addr = (unsigned long)x;
-
- return addr >= VMALLOC_START && addr < VMALLOC_END;
-#else
- return false;
-#endif
-}
#ifndef is_ioremap_addr
#define is_ioremap_addr(x) is_vmalloc_addr(x)
#endif
#ifdef CONFIG_MMU
+extern bool is_vmalloc_addr(const void *x);
extern int is_vmalloc_or_module_addr(const void *x);
#else
+static inline bool is_vmalloc_addr(const void *x)
+{
+ return false;
+}
static inline int is_vmalloc_or_module_addr(const void *x)
{
return 0;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a4b241102771..ec3813236699 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -10,6 +10,8 @@
#include <linux/rbtree.h>
#include <linux/overflow.h>
+#include <asm/vmalloc.h>
+
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
struct notifier_block; /* in notifier.h */
diff --git a/mm/highmem.c b/mm/highmem.c
index 107b10f9878e..64d8dea47dd1 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,7 +29,7 @@
#include <linux/highmem.h>
#include <linux/kgdb.h>
#include <asm/tlbflush.h>
-
+#include <linux/vmalloc.h>
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
DEFINE_PER_CPU(int, __kmap_atomic_idx);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e9681dc4aa75..8e8302568699 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -41,6 +41,14 @@
#include "internal.h"
+bool is_vmalloc_addr(const void *x)
+{
+ unsigned long addr = (unsigned long)x;
+
+ return addr >= VMALLOC_START && addr < VMALLOC_END;
+}
+EXPORT_SYMBOL(is_vmalloc_addr);
+
struct vfree_deferred {
struct llist_head list;
struct work_struct wq;