aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 09:03:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 09:03:40 -0800
commit634cd4b6afe15dca8df02bcba242b9b0c5e9b5a5 (patch)
tree4865e4ec64a0575614a81d88f523269c04e17f09 /arch/x86/include
parentMerge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentefi/x86: Disable instrumentation in the EFI runtime handling code (diff)
downloadlinux-dev-634cd4b6afe15dca8df02bcba242b9b0c5e9b5a5.tar.xz
linux-dev-634cd4b6afe15dca8df02bcba242b9b0c5e9b5a5.zip
Merge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull EFI updates from Ingo Molnar: "The main changes in this cycle were: - Cleanup of the GOP [graphics output] handling code in the EFI stub - Complete refactoring of the mixed mode handling in the x86 EFI stub - Overhaul of the x86 EFI boot/runtime code - Increase robustness for mixed mode code - Add the ability to disable DMA at the root port level in the EFI stub - Get rid of RWX mappings in the EFI memory map and page tables, where possible - Move the support code for the old EFI memory mapping style into its only user, the SGI UV1+ support code. - plus misc fixes, updates, smaller cleanups. ... and due to interactions with the RWX changes, another round of PAT cleanups make a guest appearance via the EFI tree - with no side effects intended" * 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits) efi/x86: Disable instrumentation in the EFI runtime handling code efi/libstub/x86: Fix EFI server boot failure efi/x86: Disallow efi=old_map in mixed mode x86/boot/compressed: Relax sed symbol type regex for LLVM ld.lld efi/x86: avoid KASAN false positives when accessing the 1: 1 mapping efi: Fix handling of multiple efi_fake_mem= entries efi: Fix efi_memmap_alloc() leaks efi: Add tracking for dynamically allocated memmaps efi: Add a flags parameter to efi_memory_map efi: Fix comment for efi_mem_type() wrt absent physical addresses efi/arm: Defer probe of PCIe backed efifb on DT systems efi/x86: Limit EFI old memory map to SGI UV machines efi/x86: Avoid RWX mappings for all of DRAM efi/x86: Don't map the entire kernel text RW for mixed mode x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd efi/libstub/x86: Fix unused-variable warning efi/libstub/x86: Use mandatory 16-byte stack alignment in mixed mode efi/libstub/x86: Use const attribute for efi_is_64bit() efi: Allow disabling PCI busmastering on bridges during boot efi/x86: Allow translating 64-bit arguments for mixed mode calls ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h10
-rw-r--r--arch/x86/include/asm/efi.h244
-rw-r--r--arch/x86/include/asm/memtype.h27
-rw-r--r--arch/x86/include/asm/mmu_context.h86
-rw-r--r--arch/x86/include/asm/mtrr.h4
-rw-r--r--arch/x86/include/asm/pat.h27
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h53
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h57
-rw-r--r--arch/x86/include/asm/pgtable_areas.h16
-rw-r--r--arch/x86/include/asm/pgtable_types.h143
-rw-r--r--arch/x86/include/asm/vmalloc.h6
12 files changed, 355 insertions, 320 deletions
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 804734058c77..02c0078d3787 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
#ifdef CONFIG_X86_64
@@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-/* Single page reserved for the readonly IDT mapping: */
-#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
-
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
static inline struct entry_stack *cpu_entry_stack(int cpu)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d028e9acdf1c..86169a24b0d8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -8,6 +8,7 @@
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
+#include <linux/build_bug.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -19,13 +20,16 @@
* This is the main reason why we're doing stable VA mappings for RT
* services.
*
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
+ * SGI UV1 machines are known to be incompatible with this scheme, so we
+ * provide an opt-out for these machines via a DMI quirk that sets the
+ * attribute below.
*/
-#define EFI_OLD_MEMMAP EFI_ARCH_1
+#define EFI_UV1_MEMMAP EFI_ARCH_1
+
+static inline bool efi_have_uv1_memmap(void)
+{
+ return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP);
+}
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -34,10 +38,46 @@
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
-#ifdef CONFIG_X86_32
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \
+ __efi_arg_sentinel(7), __efi_arg_sentinel(6), \
+ __efi_arg_sentinel(5), __efi_arg_sentinel(4), \
+ __efi_arg_sentinel(3), __efi_arg_sentinel(2), \
+ __efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \
+ __take_second_arg(n, \
+ ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; }))
+#define __efi_arg_sentinel(n) , n
+/*
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
+ */
+
+#define __efi_nargs_check(f, n, ...) \
+ __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({ \
+ BUILD_BUG_ON_MSG( \
+ (p) > (n), \
+ #f " called with too many arguments (" #p ">" #n ")"); \
+})
+
+#ifdef CONFIG_X86_32
#define arch_efi_call_virt_setup() \
({ \
kernel_fpu_begin(); \
@@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
})
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-#define arch_efi_call_virt(p, f, args...) \
-({ \
- ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \
-})
+#define arch_efi_call_virt(p, f, args...) p->f(args)
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
#define EFI_LOADER_SIGNATURE "EL64"
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
-#define efi_call_phys(f, args...) efi_call((f), args)
+#define efi_call(...) ({ \
+ __efi_nargs_check(efi_call, 7, __VA_ARGS__); \
+ __efi_call(__VA_ARGS__); \
+})
/*
* struct efi_scratch - Scratch space used while switching to/from efi_mm
@@ -85,7 +122,7 @@ struct efi_scratch {
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
\
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(&efi_mm); \
})
@@ -94,7 +131,7 @@ struct efi_scratch {
#define arch_efi_call_virt_teardown() \
({ \
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(efi_scratch.prev_mm); \
\
firmware_restrict_branch_speculation_end(); \
@@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
extern struct efi_scratch efi_scratch;
extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_print_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -140,6 +175,8 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
+extern pgd_t * __init efi_uv1_memmap_phys_prolog(void);
+extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd);
struct efi_setup_data {
u64 fw_vendor;
@@ -152,93 +189,144 @@ struct efi_setup_data {
extern u64 efi_setup;
#ifdef CONFIG_EFI
+extern efi_status_t __efi64_thunk(u32, ...);
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({ \
+ __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \
+ __efi64_thunk(__VA_ARGS__); \
+})
+
+static inline bool efi_is_mixed(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return false;
+ return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
}
static inline bool efi_runtime_supported(void)
{
- if (efi_is_native())
- return true;
-
- if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
+ if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
return true;
- return false;
+ return IS_ENABLED(CONFIG_EFI_MIXED);
}
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
-#ifdef CONFIG_EFI_MIXED
extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- return EFI_SUCCESS;
-}
-#endif /* CONFIG_EFI_MIXED */
-
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map);
/* arch specific definitions used by the stub code */
-struct efi_config {
- u64 image_handle;
- u64 table;
- u64 runtime_services;
- u64 boot_services;
- u64 text_output;
- efi_status_t (*call)(unsigned long, ...);
- bool is64;
-} __packed;
-
-__pure const struct efi_config *__efi_early(void);
+__attribute_const__ bool efi_is_64bit(void);
-static inline bool efi_is_64bit(void)
+static inline bool efi_is_native(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
- return false;
-
+ return true;
if (!IS_ENABLED(CONFIG_EFI_MIXED))
return true;
+ return efi_is_64bit();
+}
+
+#define efi_mixed_mode_cast(attr) \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(u32, __typeof__(attr)), \
+ (unsigned long)(attr), (attr))
- return __efi_early()->is64;
+#define efi_table_attr(inst, attr) \
+ (efi_is_native() \
+ ? inst->attr \
+ : (__typeof__(inst->attr)) \
+ efi_mixed_mode_cast(inst->mixed_mode.attr))
+
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * free_pages(addr, size)
+ * must be translated to
+ * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
+
+static inline void *efi64_zero_upper(void *p)
+{
+ ((u32 *)p)[1] = 0;
+ return p;
}
-#define efi_table_attr(table, attr, instance) \
- (efi_is_64bit() ? \
- ((table##_64_t *)(unsigned long)instance)->attr : \
- ((table##_32_t *)(unsigned long)instance)->attr)
+#define __efi64_argmap_free_pages(addr, size) \
+ ((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \
+ ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer) \
+ ((type), (size), efi64_zero_upper(buffer))
-#define efi_call_proto(protocol, f, instance, ...) \
- __efi_early()->call(efi_table_attr(protocol, f, instance), \
- instance, ##__VA_ARGS__)
+#define __efi64_argmap_handle_protocol(handle, protocol, interface) \
+ ((handle), (protocol), efi64_zero_upper(interface))
-#define efi_call_early(f, ...) \
- __efi_early()->call(efi_table_attr(efi_boot_services, f, \
- __efi_early()->boot_services), __VA_ARGS__)
+#define __efi64_argmap_locate_protocol(protocol, reg, interface) \
+ ((protocol), (reg), efi64_zero_upper(interface))
-#define __efi_call_early(f, ...) \
- __efi_early()->call((unsigned long)f, __VA_ARGS__);
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \
+ ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \
+ efi64_zero_upper(dev), efi64_zero_upper(func))
+
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
-#define efi_call_runtime(f, ...) \
- __efi_early()->call(efi_table_attr(efi_runtime_services, f, \
- __efi_early()->runtime_services), __VA_ARGS__)
+#define __efi64_thunk_map(inst, func, ...) \
+ efi64_thunk(inst->mixed_mode.func, \
+ __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \
+ (__VA_ARGS__)))
+
+#define __efi64_argmap(mapped, args) \
+ __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
+
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
+
+/* The three macros below handle dispatching via the thunk if needed */
+
+#define efi_call_proto(inst, func, ...) \
+ (efi_is_native() \
+ ? inst->func(inst, ##__VA_ARGS__) \
+ : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__))
+
+#define efi_bs_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->boottime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ boottime), func, __VA_ARGS__))
+
+#define efi_rt_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->runtime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ runtime), func, __VA_ARGS__))
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..9c2447b3555d
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_disable(const char *reason);
+extern void pat_init(void);
+extern void init_cache_modes(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+ enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+ enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5f33924e200f..b243234e90cb 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -69,14 +69,6 @@ struct ldt_struct {
int slot;
};
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
- return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
/*
* Used for LDT copy/destruction.
*/
@@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
-static inline void load_mm_ldt(struct mm_struct *mm)
-{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
-
- /*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
- */
-
- if (unlikely(ldt)) {
- if (static_cpu_has(X86_FEATURE_PTI)) {
- if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
- /*
- * Whoops -- either the new LDT isn't mapped
- * (if slot == -1) or is mapped into a bogus
- * slot (if slot > 1).
- */
- clear_LDT();
- return;
- }
-
- /*
- * If page table isolation is enabled, ldt->entries
- * will not be mapped in the userspace pagetables.
- * Tell the CPU to access the LDT through the alias
- * at ldt_slot_va(ldt->slot).
- */
- set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
- } else {
- set_ldt(ldt->entries, ldt->nr_entries);
- }
- } else {
- clear_LDT();
- }
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
clear_LDT();
-#endif
}
-
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT if either the old or new mm had an LDT.
- *
- * An mm will never go from having an LDT to not having an LDT. Two
- * mms never share an LDT, so we don't gain anything by checking to
- * see whether the LDT changed. There's also no guarantee that
- * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
- * then prev->context.ldt will also be non-NULL.
- *
- * If we really cared, we could optimize the case where prev == next
- * and we're exiting lazy mode. Most of the time, if this happens,
- * we don't actually need to reload LDTR, but modify_ldt() is mostly
- * used by legacy code and emulators where we don't need this level of
- * performance.
- *
- * This uses | instead of || because it generates better code.
- */
- if (unlikely((unsigned long)prev->context.ldt |
- (unsigned long)next->context.ldt))
- load_mm_ldt(next);
-#endif
-
DEBUG_LOCKS_WARN_ON(preemptible());
}
+#endif
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..829df26fd7a3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,7 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
/*
@@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
}
static inline void mtrr_bp_init(void)
{
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
}
#define mtrr_ap_init() do {} while (0)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
- enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
- enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 90d0731fdcb6..c1fdd43fe187 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -9,7 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/numa.h>
#include <asm/io.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/x86_init.h>
struct pci_sysdata {
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..b6355416a15a
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE \
+ ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
+#define MODULES_LEN (MODULES_VADDR - MODULES_END)
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0416d42e5bdd..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,55 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
- * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
- * to avoid include recursion hell.
- */
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
-
-/* The +1 is for the readonly IDT page: */
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
-
-#define LDT_BASE_ADDR \
- ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE \
- ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR VMALLOC_START
-#define MODULES_END VMALLOC_END
-#define MODULES_LEN (MODULES_VADDR - MODULES_END)
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..d34cce1b995c
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b5e49e6bac63..ea7400726d7a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,11 +110,6 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -136,80 +131,93 @@
*/
#ifndef __ASSEMBLY__
enum page_cache_mode {
- _PAGE_CACHE_MODE_WB = 0,
- _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
- _PAGE_CACHE_MODE_UC = 3,
- _PAGE_CACHE_MODE_WT = 4,
- _PAGE_CACHE_MODE_WP = 5,
- _PAGE_CACHE_MODE_NUM = 8
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+
+ _PAGE_CACHE_MODE_NUM = 8
};
#endif
-#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
-#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
-#ifndef __ASSEMBLY__
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
-#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pg(x) __pgprot(x)
+
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+
+#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
+#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
+#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
+#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
+#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
+#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO __PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
-#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
+#ifndef __ASSEMBLY__
-#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
-#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
+#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
-#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
+#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
+#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
+#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
+#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
-#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
extern uint8_t __pte2cachemode_tbl[8];
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..29837740b520
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/pgtable_areas.h>
+
+#endif /* _ASM_X86_VMALLOC_H */