From aee7f91369a80d2cb9bba198331479cc9bfc0ade Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Dec 2019 10:05:45 +0100 Subject: x86/mm/pat: Update the comments in pat.c and pat_interval.c and refresh the code a bit Tidy up the code: - add comments explaining the PAT code, the role of the functions and the logic - fix various typos and grammar while at it - simplify the file-scope memtype_interval_*() namespace to interval_*() - simplify stylistic complications such as unnecessary linebreaks or convoluted control flow - use the simpler '#ifdef CONFIG_*' pattern instead of '#if defined(CONFIG_*)' pattern - remove the non-idiomatic newline between late_initcall() and its function definition Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 53 ++++++++++++++++++++++++++++++++++----------- arch/x86/mm/pat_interval.c | 54 ++++++++++++++++++++++++++-------------------- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2d758e19ef22..560ac5137a2f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -1,11 +1,34 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle caching attributes in page tables (PAT) + * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. * * Authors: Venkatesh Pallipadi * Suresh B Siddha * * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + * + * Basic principles: + * + * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and + * the kernel to set one of a handful of 'caching type' attributes for physical + * memory ranges: uncached, write-combining, write-through, write-protected, + * and the most commonly used and default attribute: write-back caching. + * + * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is + * a hardware interface to enumerate a limited number of physical memory ranges + * and set their caching attributes explicitly, programmed into the CPU via MSRs. + * Even modern CPUs have MTRRs enabled - but these are typically not touched + * by the kernel or by user-space (such as the X server), we rely on PAT for any + * additional cache attribute logic. + * + * PAT doesn't work via explicit memory ranges, but uses page table entries to add + * cache attribute information to the mapped memory range: there's 3 bits used, + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the + * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). + * + * ( There's a metric ton of finer details, such as compatibility with CPU quirks + * that only support 4 types of PAT entries, and interaction with MTRRs, see + * below for details. ) */ #include @@ -839,7 +862,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, } /* - * Change the memory type for the physial address range in kernel identity + * Change the memory type for the physical address range in kernel identity * mapping space if that range is a part of identity map. */ int kernel_map_sync_memtype(u64 base, unsigned long size, @@ -851,15 +874,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, return 0; /* - * some areas in the middle of the kernel identity range - * are not mapped, like the PCI space. + * Some areas in the middle of the kernel identity range + * are not mapped, for example the PCI space. */ if (!page_is_ram(base >> PAGE_SHIFT)) return 0; id_sz = (__pa(high_memory-1) <= base + size) ? - __pa(high_memory) - base : - size; + __pa(high_memory) - base : size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", @@ -1099,6 +1121,10 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) +/* + * We are allocating a temporary printout-entry to be passed + * between seq_start()/next() and seq_show(): + */ static struct memtype *memtype_get_idx(loff_t pos) { struct memtype *print_entry; @@ -1112,12 +1138,13 @@ static struct memtype *memtype_get_idx(loff_t pos) ret = memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); - if (!ret) { - return print_entry; - } else { + /* Free it on error: */ + if (ret) { kfree(print_entry); return NULL; } + + return print_entry; } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) @@ -1144,8 +1171,11 @@ static int memtype_seq_show(struct seq_file *seq, void *v) { struct memtype *print_entry = (struct memtype *)v; - seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), - print_entry->start, print_entry->end); + seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", + cattr_name(print_entry->type), + print_entry->start, + print_entry->end); + kfree(print_entry); return 0; @@ -1178,7 +1208,6 @@ static int __init pat_memtype_list_init(void) } return 0; } - late_initcall(pat_memtype_list_init); #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c index 6855362eaf21..2abc47558818 100644 --- a/arch/x86/mm/pat_interval.c +++ b/arch/x86/mm/pat_interval.c @@ -25,25 +25,27 @@ * physical memory areas. Without proper tracking, conflicting memory * types in different mappings can cause CPU cache corruption. * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for + * The tree is an interval tree (augmented rbtree) which tree is ordered + * by the starting address. The tree can contain multiple entries for * different regions which overlap. All the aliases have the same - * cache attributes of course. + * cache attributes of course, as enforced by the PAT logic. * * memtype_lock protects the rbtree. */ -static inline u64 memtype_interval_start(struct memtype *memtype) + +static inline u64 interval_start(struct memtype *memtype) { return memtype->start; } -static inline u64 memtype_interval_end(struct memtype *memtype) +static inline u64 interval_end(struct memtype *memtype) { return memtype->end - 1; } + INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, - memtype_interval_start, memtype_interval_end, - static, memtype_interval) + interval_start, interval_end, + static, interval) static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; @@ -56,7 +58,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type) { struct memtype *match; - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); + match = interval_iter_first(&memtype_rbroot, start, end-1); while (match != NULL && match->start < end) { if ((match_type == MEMTYPE_EXACT_MATCH) && (match->start == start) && (match->end == end)) @@ -66,7 +68,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type) (match->start < start) && (match->end == end)) return match; - match = memtype_interval_iter_next(match, start, end-1); + match = interval_iter_next(match, start, end-1); } return NULL; /* Returns NULL if there is no match */ @@ -79,7 +81,7 @@ static int memtype_check_conflict(u64 start, u64 end, struct memtype *match; enum page_cache_mode found_type = reqtype; - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); + match = interval_iter_first(&memtype_rbroot, start, end-1); if (match == NULL) goto success; @@ -89,12 +91,12 @@ static int memtype_check_conflict(u64 start, u64 end, dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); found_type = match->type; - match = memtype_interval_iter_next(match, start, end-1); + match = interval_iter_next(match, start, end-1); while (match) { if (match->type != found_type) goto failure; - match = memtype_interval_iter_next(match, start, end-1); + match = interval_iter_next(match, start, end-1); } success: if (newtype) @@ -106,11 +108,11 @@ failure: pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", current->comm, current->pid, start, end, cattr_name(found_type), cattr_name(match->type)); + return -EBUSY; } -int memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) +int memtype_check_insert(struct memtype *new, enum page_cache_mode *ret_type) { int err = 0; @@ -121,7 +123,7 @@ int memtype_check_insert(struct memtype *new, if (ret_type) new->type = *ret_type; - memtype_interval_insert(new, &memtype_rbroot); + interval_insert(new, &memtype_rbroot); return 0; } @@ -145,12 +147,13 @@ struct memtype *memtype_erase(u64 start, u64 end) if (data->start == start) { /* munmap: erase this node */ - memtype_interval_remove(data, &memtype_rbroot); + interval_remove(data, &memtype_rbroot); } else { /* mremap: update the end value of this node */ - memtype_interval_remove(data, &memtype_rbroot); + interval_remove(data, &memtype_rbroot); data->end = start; - memtype_interval_insert(data, &memtype_rbroot); + interval_insert(data, &memtype_rbroot); + return NULL; } @@ -159,19 +162,24 @@ struct memtype *memtype_erase(u64 start, u64 end) struct memtype *memtype_lookup(u64 addr) { - return memtype_interval_iter_first(&memtype_rbroot, addr, - addr + PAGE_SIZE-1); + return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); } -#if defined(CONFIG_DEBUG_FS) +/* + * Debugging helper, copy the Nth entry of the tree into a + * a copy for printout. This allows us to print out the tree + * via debugfs, without holding the memtype_lock too long: + */ +#ifdef CONFIG_DEBUG_FS int memtype_copy_nth_element(struct memtype *out, loff_t pos) { struct memtype *match; int i = 1; - match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + while (match && pos != i) { - match = memtype_interval_iter_next(match, 0, ULONG_MAX); + match = interval_iter_next(match, 0, ULONG_MAX); i++; } -- cgit v1.2.3-59-g8ed1b From 5557e831f68878ef202c0e5296235442cff9b41e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Nov 2019 09:50:19 +0100 Subject: x86/mm/pat: Disambiguate PAT-disabled boot messages Right now we have these four types of PAT-disabled boot messages: x86/PAT: PAT support disabled. x86/PAT: PAT MSR is 0, disabled. x86/PAT: MTRRs disabled, skipping PAT initialization too. x86/PAT: PAT not supported by CPU. The first message is ambiguous in that it doesn't signal that PAT is off due to a boot option. The second message doesn't really make it clear that this is the MSR value during early bootup and it's the firmware environment that disabled PAT support. The fourth message doesn't really make it clear that we disable PAT support because CONFIG_MTRR is off in the kernel. Clarify, harmonize and fix the spelling in these user-visible messages: x86/PAT: PAT support disabled via boot option. x86/PAT: PAT support disabled by the firmware. x86/PAT: PAT support disabled because CONFIG_MTRR is disabled in the kernel. x86/PAT: PAT not supported by the CPU. Also add a fifth message, in case PAT support is disabled at build time: x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel. Previously we'd just silently return from pat_init() without giving any indication that PAT support is off. Finally, clarify/extend some of the comments related to PAT initialization. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 2 +- arch/x86/mm/pat.c | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..3337d2233aef 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) } static inline void mtrr_bp_init(void) { - pat_disable("MTRRs disabled, skipping PAT initialization too."); + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); } #define mtrr_ap_init() do {} while (0) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 560ac5137a2f..e26b81ca5b95 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -66,7 +66,11 @@ static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); static bool __read_mostly pat_initialized; static bool __read_mostly init_cm_done; -void pat_disable(const char *reason) +/* + * PAT support is enabled by default, but can be disabled for + * various user-requested or hardware-forced reasons: + */ +void pat_disable(const char *msg_reason) { if (pat_disabled) return; @@ -77,12 +81,12 @@ void pat_disable(const char *reason) } pat_disabled = true; - pr_info("x86/PAT: %s\n", reason); + pr_info("x86/PAT: %s\n", msg_reason); } static int __init nopat(char *str) { - pat_disable("PAT support disabled."); + pat_disable("PAT support disabled via boot option."); return 0; } early_param("nopat", nopat); @@ -238,13 +242,13 @@ static void pat_bsp_init(u64 pat) u64 tmp_pat; if (!boot_cpu_has(X86_FEATURE_PAT)) { - pat_disable("PAT not supported by CPU."); + pat_disable("PAT not supported by the CPU."); return; } rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { - pat_disable("PAT MSR is 0, disabled."); + pat_disable("PAT support disabled by the firmware."); return; } @@ -314,7 +318,7 @@ void init_cache_modes(void) } /** - * pat_init - Initialize PAT MSR and PAT table + * pat_init - Initialize the PAT MSR and PAT table on the current CPU * * This function initializes PAT MSR and PAT table with an OS-defined value * to enable additional cache attributes, WC, WT and WP. @@ -328,6 +332,10 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; +#ifndef CONFIG_X86_PAT + pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); +#endif + if (pat_disabled) return; -- cgit v1.2.3-59-g8ed1b From ef35b0fcee23d7636daa4bed62bceeaba4eed918 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Nov 2019 09:38:26 +0100 Subject: x86/mm/pat: Create fixed width output in /sys/kernel/debug/x86/pat_memtype_list, similar to the E820 debug printouts Before: write-back @ 0xbdfa9000-0xbdfaa000 write-back @ 0xbdfaa000-0xbdfab000 write-back @ 0xbdfab000-0xbdfac000 uncached-minus @ 0xc0000000-0xd0000000 uncached-minus @ 0xd0900000-0xd0920000 uncached-minus @ 0xd0920000-0xd0940000 uncached-minus @ 0xd0940000-0xd0960000 uncached-minus @ 0xd0960000-0xd0980000 uncached-minus @ 0xd0980000-0xd0981000 uncached-minus @ 0xd0990000-0xd0991000 uncached-minus @ 0xd09a0000-0xd09a1000 uncached-minus @ 0xd09b0000-0xd09b1000 uncached-minus @ 0xd0d00000-0xd0d01000 uncached-minus @ 0xd0d10000-0xd0d11000 uncached-minus @ 0xd0d20000-0xd0d21000 uncached-minus @ 0xd0d40000-0xd0d41000 uncached-minus @ 0xfed00000-0xfed01000 uncached-minus @ 0xfed1f000-0xfed20000 uncached-minus @ 0xfed40000-0xfed41000 After: PAT: [mem 0x00000000bdf8e000-0x00000000bdfa8000] write-back PAT: [mem 0x00000000bdfa9000-0x00000000bdfaa000] write-back PAT: [mem 0x00000000bdfaa000-0x00000000bdfab000] write-back PAT: [mem 0x00000000bdfab000-0x00000000bdfac000] write-back PAT: [mem 0x00000000c0000000-0x00000000d0000000] uncached-minus PAT: [mem 0x00000000d0900000-0x00000000d0920000] uncached-minus PAT: [mem 0x00000000d0920000-0x00000000d0940000] uncached-minus PAT: [mem 0x00000000d0940000-0x00000000d0960000] uncached-minus PAT: [mem 0x00000000d0960000-0x00000000d0980000] uncached-minus PAT: [mem 0x00000000d0980000-0x00000000d0981000] uncached-minus PAT: [mem 0x00000000d0990000-0x00000000d0991000] uncached-minus PAT: [mem 0x00000000d09a0000-0x00000000d09a1000] uncached-minus PAT: [mem 0x00000000d09b0000-0x00000000d09b1000] uncached-minus PAT: [mem 0x00000000fed1f000-0x00000000fed20000] uncached-minus PAT: [mem 0x00000000fed40000-0x00000000fed41000] uncached-minus The advantage is that it's easier to parse at a glance - and the tree is ordered by start address, which is now reflected in putting the start address in the first column. This is also now similar to how we print e820 entries: BIOS-e820: [mem 0x00000000bafda000-0x00000000bb3d3fff] usable BIOS-e820: [mem 0x00000000bb3d4000-0x00000000bdd2efff] reserved BIOS-e820: [mem 0x00000000bdd2f000-0x00000000bddccfff] ACPI NVS BIOS-e820: [mem 0x00000000bddcd000-0x00000000bdea0fff] ACPI data BIOS-e820: [mem 0x00000000bdea1000-0x00000000bdf2efff] ACPI NVS Since this is a debugfs file not used by tools there's no known ABI dependencies. Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e26b81ca5b95..ea7da7e62e17 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -1179,10 +1179,10 @@ static int memtype_seq_show(struct seq_file *seq, void *v) { struct memtype *print_entry = (struct memtype *)v; - seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", - cattr_name(print_entry->type), + seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", print_entry->start, - print_entry->end); + print_entry->end, + cattr_name(print_entry->type)); kfree(print_entry); -- cgit v1.2.3-59-g8ed1b From 47553d42c55f7b85e72ce6f3a18030102f8f93a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Nov 2019 10:18:56 +0100 Subject: x86/mm/pat: Simplify the free_memtype() control flow Simplify/streamline the quirky handling of the pat_pagerange_is_ram() logic, and get rid of the 'err' local variable. Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ea7da7e62e17..af049920e59a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -656,7 +656,6 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, int free_memtype(u64 start, u64 end) { - int err = -EINVAL; int is_range_ram; struct memtype *entry; @@ -671,14 +670,10 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = free_ram_pages_type(start, end); - - return err; - } else if (is_range_ram < 0) { + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + if (is_range_ram < 0) return -EINVAL; - } spin_lock(&memtype_lock); entry = memtype_erase(start, end); -- cgit v1.2.3-59-g8ed1b From baf65855baac302bbacf0ce17ed99b9c0940b930 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Dec 2019 10:07:23 +0100 Subject: x86/mm/pat: Harmonize 'struct memtype *' local variable and function parameter use We have quite a zoo of 'struct memtype' variable nomenclature: new entry print_entry data match out memtype Beyond the randomness, some of these are outright confusing, especially when used in larger functions. Standardize them: entry entry_new entry_old entry_print entry_match entry_out Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 51 +++++++++++++------------- arch/x86/mm/pat_internal.h | 6 +-- arch/x86/mm/pat_interval.c | 91 +++++++++++++++++++++++----------------------- 3 files changed, 75 insertions(+), 73 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index af049920e59a..4a1804903d97 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -576,7 +576,7 @@ static u64 sanitize_phys(u64 address) int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { - struct memtype *new; + struct memtype *entry_new; enum page_cache_mode actual_type; int is_range_ram; int err = 0; @@ -624,22 +624,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, return -EINVAL; } - new = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new) + entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_new) return -ENOMEM; - new->start = start; - new->end = end; - new->type = actual_type; + entry_new->start = start; + entry_new->end = end; + entry_new->type = actual_type; spin_lock(&memtype_lock); - err = memtype_check_insert(new, new_type); + err = memtype_check_insert(entry_new, new_type); if (err) { pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, - cattr_name(new->type), cattr_name(req_type)); - kfree(new); + cattr_name(entry_new->type), cattr_name(req_type)); + kfree(entry_new); spin_unlock(&memtype_lock); return err; @@ -648,7 +648,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_unlock(&memtype_lock); dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", - start, end - 1, cattr_name(new->type), cattr_name(req_type), + start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), new_type ? cattr_name(*new_type) : "-"); return err; @@ -657,7 +657,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, int free_memtype(u64 start, u64 end) { int is_range_ram; - struct memtype *entry; + struct memtype *entry_old; if (!pat_enabled()) return 0; @@ -676,16 +676,16 @@ int free_memtype(u64 start, u64 end) return -EINVAL; spin_lock(&memtype_lock); - entry = memtype_erase(start, end); + entry_old = memtype_erase(start, end); spin_unlock(&memtype_lock); - if (IS_ERR(entry)) { + if (IS_ERR(entry_old)) { pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, start, end - 1); return -EINVAL; } - kfree(entry); + kfree(entry_old); dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); @@ -726,6 +726,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) rettype = _PAGE_CACHE_MODE_UC_MINUS; spin_unlock(&memtype_lock); + return rettype; } @@ -1130,24 +1131,24 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough); */ static struct memtype *memtype_get_idx(loff_t pos) { - struct memtype *print_entry; + struct memtype *entry_print; int ret; - print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!print_entry) + entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_print) return NULL; spin_lock(&memtype_lock); - ret = memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(entry_print, pos); spin_unlock(&memtype_lock); /* Free it on error: */ if (ret) { - kfree(print_entry); + kfree(entry_print); return NULL; } - return print_entry; + return entry_print; } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) @@ -1172,14 +1173,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v) static int memtype_seq_show(struct seq_file *seq, void *v) { - struct memtype *print_entry = (struct memtype *)v; + struct memtype *entry_print = (struct memtype *)v; seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", - print_entry->start, - print_entry->end, - cattr_name(print_entry->type)); + entry_print->start, + entry_print->end, + cattr_name(entry_print->type)); - kfree(print_entry); + kfree(entry_print); return 0; } diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index 79a06684349e..23ce8cdac159 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int memtype_check_insert(struct memtype *new, +extern int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type); extern struct memtype *memtype_erase(u64 start, u64 end); extern struct memtype *memtype_lookup(u64 addr); -extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos); #else -static inline int memtype_check_insert(struct memtype *new, +static inline int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type) { return 0; } static inline struct memtype *memtype_erase(u64 start, u64 end) diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c index 2abc47558818..3c983de5f492 100644 --- a/arch/x86/mm/pat_interval.c +++ b/arch/x86/mm/pat_interval.c @@ -33,14 +33,14 @@ * memtype_lock protects the rbtree. */ -static inline u64 interval_start(struct memtype *memtype) +static inline u64 interval_start(struct memtype *entry) { - return memtype->start; + return entry->start; } -static inline u64 interval_end(struct memtype *memtype) +static inline u64 interval_end(struct memtype *entry) { - return memtype->end - 1; + return entry->end - 1; } INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, @@ -56,19 +56,20 @@ enum { static struct memtype *memtype_match(u64 start, u64 end, int match_type) { - struct memtype *match; + struct memtype *entry_match; - match = interval_iter_first(&memtype_rbroot, start, end-1); - while (match != NULL && match->start < end) { + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + + while (entry_match != NULL && entry_match->start < end) { if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; + (entry_match->start == start) && (entry_match->end == end)) + return entry_match; if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; + (entry_match->start < start) && (entry_match->end == end)) + return entry_match; - match = interval_iter_next(match, start, end-1); + entry_match = interval_iter_next(entry_match, start, end-1); } return NULL; /* Returns NULL if there is no match */ @@ -78,25 +79,25 @@ static int memtype_check_conflict(u64 start, u64 end, enum page_cache_mode reqtype, enum page_cache_mode *newtype) { - struct memtype *match; + struct memtype *entry_match; enum page_cache_mode found_type = reqtype; - match = interval_iter_first(&memtype_rbroot, start, end-1); - if (match == NULL) + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + if (entry_match == NULL) goto success; - if (match->type != found_type && newtype == NULL) + if (entry_match->type != found_type && newtype == NULL) goto failure; - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; + dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); + found_type = entry_match->type; - match = interval_iter_next(match, start, end-1); - while (match) { - if (match->type != found_type) + entry_match = interval_iter_next(entry_match, start, end-1); + while (entry_match) { + if (entry_match->type != found_type) goto failure; - match = interval_iter_next(match, start, end-1); + entry_match = interval_iter_next(entry_match, start, end-1); } success: if (newtype) @@ -107,29 +108,29 @@ success: failure: pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); + cattr_name(found_type), cattr_name(entry_match->type)); return -EBUSY; } -int memtype_check_insert(struct memtype *new, enum page_cache_mode *ret_type) +int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) { int err = 0; - err = memtype_check_conflict(new->start, new->end, new->type, ret_type); + err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); if (err) return err; if (ret_type) - new->type = *ret_type; + entry_new->type = *ret_type; - interval_insert(new, &memtype_rbroot); + interval_insert(entry_new, &memtype_rbroot); return 0; } struct memtype *memtype_erase(u64 start, u64 end) { - struct memtype *data; + struct memtype *entry_old; /* * Since the memtype_rbroot tree allows overlapping ranges, @@ -138,26 +139,26 @@ struct memtype *memtype_erase(u64 start, u64 end) * it then checks with END_MATCH, i.e. shrink the size of a node * from the end for the mremap case. */ - data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!data) + entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!entry_old) { + entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!entry_old) return ERR_PTR(-EINVAL); } - if (data->start == start) { + if (entry_old->start == start) { /* munmap: erase this node */ - interval_remove(data, &memtype_rbroot); + interval_remove(entry_old, &memtype_rbroot); } else { /* mremap: update the end value of this node */ - interval_remove(data, &memtype_rbroot); - data->end = start; - interval_insert(data, &memtype_rbroot); + interval_remove(entry_old, &memtype_rbroot); + entry_old->end = start; + interval_insert(entry_old, &memtype_rbroot); return NULL; } - return data; + return entry_old; } struct memtype *memtype_lookup(u64 addr) @@ -171,20 +172,20 @@ struct memtype *memtype_lookup(u64 addr) * via debugfs, without holding the memtype_lock too long: */ #ifdef CONFIG_DEBUG_FS -int memtype_copy_nth_element(struct memtype *out, loff_t pos) +int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) { - struct memtype *match; + struct memtype *entry_match; int i = 1; - match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); - while (match && pos != i) { - match = interval_iter_next(match, 0, ULONG_MAX); + while (entry_match && pos != i) { + entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); i++; } - if (match) { /* pos == i */ - *out = *match; + if (entry_match) { /* pos == i */ + *entry_out = *entry_match; return 0; } else { return 1; -- cgit v1.2.3-59-g8ed1b From d891b9219d2a73640d2f2a216ecb6fb29d832013 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Nov 2019 11:46:36 +0100 Subject: x86/mm/pat: Clean up PAT initialization flags Right now we have these variables that impact the PAT initialization sequence: pat_disabled boot_cpu_done pat_initialized init_cm_done Some have a pat_ prefix, some not, and the naming is random, which makes their purpose rather opaque. Name them consistently and according to their role: pat_disabled pat_bp_initialized pat_bp_enabled pat_cm_initialized Also rename pat_bsp_init() => pat_bp_init(), to use the canonical abbreviation. Also add a warning for double calls of init_cache_modes(), the call chains leading to this are complex and I couldn't convince myself that we never call this function twice - so utilize the flag for a debug check. No change in functionality intended. Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 4a1804903d97..f1677fad1735 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -61,10 +61,10 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); -static bool __read_mostly pat_initialized; -static bool __read_mostly init_cm_done; +static bool __read_mostly pat_bp_enabled; +static bool __read_mostly pat_cm_initialized; /* * PAT support is enabled by default, but can be disabled for @@ -75,7 +75,7 @@ void pat_disable(const char *msg_reason) if (pat_disabled) return; - if (boot_cpu_done) { + if (pat_bp_initialized) { WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); return; } @@ -93,7 +93,7 @@ early_param("nopat", nopat); bool pat_enabled(void) { - return pat_initialized; + return pat_bp_enabled; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -224,6 +224,8 @@ static void __init_cache_modes(u64 pat) char pat_msg[33]; int i; + WARN_ON_ONCE(pat_cm_initialized); + pat_msg[32] = 0; for (i = 7; i >= 0; i--) { cache = pat_get_cache_mode((pat >> (i * 8)) & 7, @@ -232,12 +234,12 @@ static void __init_cache_modes(u64 pat) } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); - init_cm_done = true; + pat_cm_initialized = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) -static void pat_bsp_init(u64 pat) +static void pat_bp_init(u64 pat) { u64 tmp_pat; @@ -253,7 +255,7 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); - pat_initialized = true; + pat_bp_enabled = true; __init_cache_modes(pat); } @@ -275,7 +277,7 @@ void init_cache_modes(void) { u64 pat = 0; - if (init_cm_done) + if (pat_cm_initialized) return; if (boot_cpu_has(X86_FEATURE_PAT)) { @@ -395,9 +397,9 @@ void pat_init(void) PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); } - if (!boot_cpu_done) { - pat_bsp_init(pat); - boot_cpu_done = true; + if (!pat_bp_initialized) { + pat_bp_init(pat); + pat_bp_initialized = true; } else { pat_ap_init(pat); } -- cgit v1.2.3-59-g8ed1b From f9b57cf80c8b585614ba223732be0d8f19d558d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Dec 2019 10:08:09 +0100 Subject: x86/mm/pat: Move the memtype related files to arch/x86/mm/pat/ - pat.c offers, dominantly, the memtype APIs - so rename it to memtype.c. - pageattr.c is offering, primarily, the set_memory*() page attribute APIs, which is offered via the header: name the .c file along the same pattern. I.e. perform these renames, and move them all next to each other in arch/x86/mm/pat/: pat.c => memtype.c pat_internal.h => memtype.h pat_interval.c => memtype_interval.c pageattr.c => set_memory.c pageattr-test.c => cpa-test.c Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 8 +- arch/x86/mm/pageattr-test.c | 278 ----- arch/x86/mm/pageattr.c | 2285 ------------------------------------ arch/x86/mm/pat.c | 1219 ------------------- arch/x86/mm/pat/Makefile | 5 + arch/x86/mm/pat/cpa-test.c | 278 +++++ arch/x86/mm/pat/memtype.c | 1219 +++++++++++++++++++ arch/x86/mm/pat/memtype.h | 49 + arch/x86/mm/pat/memtype_interval.c | 194 +++ arch/x86/mm/pat/set_memory.c | 2285 ++++++++++++++++++++++++++++++++++++ arch/x86/mm/pat_internal.h | 49 - arch/x86/mm/pat_interval.c | 194 --- 12 files changed, 4034 insertions(+), 4029 deletions(-) delete mode 100644 arch/x86/mm/pageattr-test.c delete mode 100644 arch/x86/mm/pageattr.c delete mode 100644 arch/x86/mm/pat.c create mode 100644 arch/x86/mm/pat/Makefile create mode 100644 arch/x86/mm/pat/cpa-test.c create mode 100644 arch/x86/mm/pat/memtype.c create mode 100644 arch/x86/mm/pat/memtype.h create mode 100644 arch/x86/mm/pat/memtype_interval.c create mode 100644 arch/x86/mm/pat/set_memory.c delete mode 100644 arch/x86/mm/pat_internal.h delete mode 100644 arch/x86/mm/pat_interval.c diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3b89c201ac26..345848f270e3 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o +obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ + pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o + +obj-y += pat/ # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_interval.o - obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c deleted file mode 100644 index facce271e8b9..000000000000 --- a/arch/x86/mm/pageattr-test.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * self test for change_page_attr. - * - * Clears the a test pte bit on random pages in the direct mapping, - * then reverts and compares page tables forwards and afterwards. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Only print the results of the first pass: - */ -static __read_mostly int print = 1; - -enum { - NTEST = 3 * 100, - NPAGES = 100, -#ifdef CONFIG_X86_64 - LPS = (1 << PMD_SHIFT), -#elif defined(CONFIG_X86_PAE) - LPS = (1 << PMD_SHIFT), -#else - LPS = (1 << 22), -#endif - GPS = (1<<30) -}; - -#define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST) - -static int pte_testbit(pte_t pte) -{ - return pte_flags(pte) & _PAGE_SOFTW1; -} - -struct split_state { - long lpg, gpg, spg, exec; - long min_exec, max_exec; -}; - -static int print_split(struct split_state *s) -{ - long i, expected, missed = 0; - int err = 0; - - s->lpg = s->gpg = s->spg = s->exec = 0; - s->min_exec = ~0UL; - s->max_exec = 0; - for (i = 0; i < max_pfn_mapped; ) { - unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT); - unsigned int level; - pte_t *pte; - - pte = lookup_address(addr, &level); - if (!pte) { - missed++; - i++; - continue; - } - - if (level == PG_LEVEL_1G && sizeof(long) == 8) { - s->gpg++; - i += GPS/PAGE_SIZE; - } else if (level == PG_LEVEL_2M) { - if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) { - printk(KERN_ERR - "%lx level %d but not PSE %Lx\n", - addr, level, (u64)pte_val(*pte)); - err = 1; - } - s->lpg++; - i += LPS/PAGE_SIZE; - } else { - s->spg++; - i++; - } - if (!(pte_val(*pte) & _PAGE_NX)) { - s->exec++; - if (addr < s->min_exec) - s->min_exec = addr; - if (addr > s->max_exec) - s->max_exec = addr; - } - } - if (print) { - printk(KERN_INFO - " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", - s->spg, s->lpg, s->gpg, s->exec, - s->min_exec != ~0UL ? s->min_exec : 0, - s->max_exec, missed); - } - - expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; - if (expected != i) { - printk(KERN_ERR "CPA max_pfn_mapped %lu but expected %lu\n", - max_pfn_mapped, expected); - return 1; - } - return err; -} - -static unsigned long addr[NTEST]; -static unsigned int len[NTEST]; - -static struct page *pages[NPAGES]; -static unsigned long addrs[NPAGES]; - -/* Change the global bit on random pages in the direct mapping */ -static int pageattr_test(void) -{ - struct split_state sa, sb, sc; - unsigned long *bm; - pte_t *pte, pte0; - int failed = 0; - unsigned int level; - int i, k; - int err; - - if (print) - printk(KERN_INFO "CPA self-test:\n"); - - bm = vzalloc((max_pfn_mapped + 7) / 8); - if (!bm) { - printk(KERN_ERR "CPA Cannot vmalloc bitmap\n"); - return -ENOMEM; - } - - failed += print_split(&sa); - - for (i = 0; i < NTEST; i++) { - unsigned long pfn = prandom_u32() % max_pfn_mapped; - - addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); - len[i] = prandom_u32() % NPAGES; - len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); - - if (len[i] == 0) - len[i] = 1; - - pte = NULL; - pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */ - - for (k = 0; k < len[i]; k++) { - pte = lookup_address(addr[i] + k*PAGE_SIZE, &level); - if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 || - !(pte_val(*pte) & _PAGE_PRESENT)) { - addr[i] = 0; - break; - } - if (k == 0) { - pte0 = *pte; - } else { - if (pgprot_val(pte_pgprot(*pte)) != - pgprot_val(pte_pgprot(pte0))) { - len[i] = k; - break; - } - } - if (test_bit(pfn + k, bm)) { - len[i] = k; - break; - } - __set_bit(pfn + k, bm); - addrs[k] = addr[i] + k*PAGE_SIZE; - pages[k] = pfn_to_page(pfn + k); - } - if (!addr[i] || !pte || !k) { - addr[i] = 0; - continue; - } - - switch (i % 3) { - case 0: - err = change_page_attr_set(&addr[i], len[i], PAGE_CPA_TEST, 0); - break; - - case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); - break; - - case 2: - err = cpa_set_pages_array(pages, len[i], PAGE_CPA_TEST); - break; - } - - - if (err < 0) { - printk(KERN_ERR "CPA %d failed %d\n", i, err); - failed++; - } - - pte = lookup_address(addr[i], &level); - if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) { - printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i], - pte ? (u64)pte_val(*pte) : 0ULL); - failed++; - } - if (level != PG_LEVEL_4K) { - printk(KERN_ERR "CPA %lx: unexpected level %d\n", - addr[i], level); - failed++; - } - - } - vfree(bm); - - failed += print_split(&sb); - - for (i = 0; i < NTEST; i++) { - if (!addr[i]) - continue; - pte = lookup_address(addr[i], &level); - if (!pte) { - printk(KERN_ERR "CPA lookup of %lx failed\n", addr[i]); - failed++; - continue; - } - err = change_page_attr_clear(&addr[i], len[i], PAGE_CPA_TEST, 0); - if (err < 0) { - printk(KERN_ERR "CPA reverting failed: %d\n", err); - failed++; - } - pte = lookup_address(addr[i], &level); - if (!pte || pte_testbit(*pte)) { - printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n", - addr[i], pte ? (u64)pte_val(*pte) : 0ULL); - failed++; - } - - } - - failed += print_split(&sc); - - if (failed) { - WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); - return -EINVAL; - } else { - if (print) - printk(KERN_INFO "ok.\n"); - } - - return 0; -} - -static int do_pageattr_test(void *__unused) -{ - while (!kthread_should_stop()) { - schedule_timeout_interruptible(HZ*30); - if (pageattr_test() < 0) - break; - if (print) - print--; - } - return 0; -} - -static int start_pageattr_test(void) -{ - struct task_struct *p; - - p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); - if (!IS_ERR(p)) - wake_up_process(p); - else - WARN_ON(1); - - return 0; -} -device_initcall(start_pageattr_test); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c deleted file mode 100644 index 1b99ad05b117..000000000000 --- a/arch/x86/mm/pageattr.c +++ /dev/null @@ -1,2285 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2002 Andi Kleen, SuSE Labs. - * Thanks to Ben LaHaise for precious feedback. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mm_internal.h" - -/* - * The current flushing context - we pass it instead of 5 arguments: - */ -struct cpa_data { - unsigned long *vaddr; - pgd_t *pgd; - pgprot_t mask_set; - pgprot_t mask_clr; - unsigned long numpages; - unsigned long curpage; - unsigned long pfn; - unsigned int flags; - unsigned int force_split : 1, - force_static_prot : 1; - struct page **pages; -}; - -enum cpa_warn { - CPA_CONFLICT, - CPA_PROTECT, - CPA_DETECT, -}; - -static const int cpa_warn_level = CPA_PROTECT; - -/* - * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) - * using cpa_lock. So that we don't allow any other cpu, with stale large tlb - * entries change the page attribute in parallel to some other cpu - * splitting a large page entry along with changing the attribute. - */ -static DEFINE_SPINLOCK(cpa_lock); - -#define CPA_FLUSHTLB 1 -#define CPA_ARRAY 2 -#define CPA_PAGES_ARRAY 4 -#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ - -#ifdef CONFIG_PROC_FS -static unsigned long direct_pages_count[PG_LEVEL_NUM]; - -void update_page_count(int level, unsigned long pages) -{ - /* Protect against CPA */ - spin_lock(&pgd_lock); - direct_pages_count[level] += pages; - spin_unlock(&pgd_lock); -} - -static void split_page_count(int level) -{ - if (direct_pages_count[level] == 0) - return; - - direct_pages_count[level]--; - direct_pages_count[level - 1] += PTRS_PER_PTE; -} - -void arch_report_meminfo(struct seq_file *m) -{ - seq_printf(m, "DirectMap4k: %8lu kB\n", - direct_pages_count[PG_LEVEL_4K] << 2); -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", - direct_pages_count[PG_LEVEL_2M] << 11); -#else - seq_printf(m, "DirectMap4M: %8lu kB\n", - direct_pages_count[PG_LEVEL_2M] << 12); -#endif - if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", - direct_pages_count[PG_LEVEL_1G] << 20); -} -#else -static inline void split_page_count(int level) { } -#endif - -#ifdef CONFIG_X86_CPA_STATISTICS - -static unsigned long cpa_1g_checked; -static unsigned long cpa_1g_sameprot; -static unsigned long cpa_1g_preserved; -static unsigned long cpa_2m_checked; -static unsigned long cpa_2m_sameprot; -static unsigned long cpa_2m_preserved; -static unsigned long cpa_4k_install; - -static inline void cpa_inc_1g_checked(void) -{ - cpa_1g_checked++; -} - -static inline void cpa_inc_2m_checked(void) -{ - cpa_2m_checked++; -} - -static inline void cpa_inc_4k_install(void) -{ - cpa_4k_install++; -} - -static inline void cpa_inc_lp_sameprot(int level) -{ - if (level == PG_LEVEL_1G) - cpa_1g_sameprot++; - else - cpa_2m_sameprot++; -} - -static inline void cpa_inc_lp_preserved(int level) -{ - if (level == PG_LEVEL_1G) - cpa_1g_preserved++; - else - cpa_2m_preserved++; -} - -static int cpastats_show(struct seq_file *m, void *p) -{ - seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked); - seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot); - seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved); - seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked); - seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot); - seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved); - seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install); - return 0; -} - -static int cpastats_open(struct inode *inode, struct file *file) -{ - return single_open(file, cpastats_show, NULL); -} - -static const struct file_operations cpastats_fops = { - .open = cpastats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init cpa_stats_init(void) -{ - debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL, - &cpastats_fops); - return 0; -} -late_initcall(cpa_stats_init); -#else -static inline void cpa_inc_1g_checked(void) { } -static inline void cpa_inc_2m_checked(void) { } -static inline void cpa_inc_4k_install(void) { } -static inline void cpa_inc_lp_sameprot(int level) { } -static inline void cpa_inc_lp_preserved(int level) { } -#endif - - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; -} - -static inline int -within_inclusive(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr <= end; -} - -#ifdef CONFIG_X86_64 - -static inline unsigned long highmap_start_pfn(void) -{ - return __pa_symbol(_text) >> PAGE_SHIFT; -} - -static inline unsigned long highmap_end_pfn(void) -{ - /* Do not reference physical address outside the kernel. */ - return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; -} - -static bool __cpa_pfn_in_highmap(unsigned long pfn) -{ - /* - * Kernel text has an alias mapping at a high address, known - * here as "highmap". - */ - return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); -} - -#else - -static bool __cpa_pfn_in_highmap(unsigned long pfn) -{ - /* There is no highmap on 32-bit */ - return false; -} - -#endif - -/* - * See set_mce_nospec(). - * - * Machine check recovery code needs to change cache mode of poisoned pages to - * UC to avoid speculative access logging another error. But passing the - * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a - * speculative access. So we cheat and flip the top bit of the address. This - * works fine for the code that updates the page tables. But at the end of the - * process we need to flush the TLB and cache and the non-canonical address - * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. - * - * But in the common case we already have a canonical address. This code - * will fix the top bit if needed and is a no-op otherwise. - */ -static inline unsigned long fix_addr(unsigned long addr) -{ -#ifdef CONFIG_X86_64 - return (long)(addr << 1) >> 1; -#else - return addr; -#endif -} - -static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) -{ - if (cpa->flags & CPA_PAGES_ARRAY) { - struct page *page = cpa->pages[idx]; - - if (unlikely(PageHighMem(page))) - return 0; - - return (unsigned long)page_address(page); - } - - if (cpa->flags & CPA_ARRAY) - return cpa->vaddr[idx]; - - return *cpa->vaddr + idx * PAGE_SIZE; -} - -/* - * Flushing functions - */ - -static void clflush_cache_range_opt(void *vaddr, unsigned int size) -{ - const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; - void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); - void *vend = vaddr + size; - - if (p >= vend) - return; - - for (; p < vend; p += clflush_size) - clflushopt(p); -} - -/** - * clflush_cache_range - flush a cache range with clflush - * @vaddr: virtual start address - * @size: number of bytes to flush - * - * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or - * SFENCE to avoid ordering issues. - */ -void clflush_cache_range(void *vaddr, unsigned int size) -{ - mb(); - clflush_cache_range_opt(vaddr, size); - mb(); -} -EXPORT_SYMBOL_GPL(clflush_cache_range); - -void arch_invalidate_pmem(void *addr, size_t size) -{ - clflush_cache_range(addr, size); -} -EXPORT_SYMBOL_GPL(arch_invalidate_pmem); - -static void __cpa_flush_all(void *arg) -{ - unsigned long cache = (unsigned long)arg; - - /* - * Flush all to work around Errata in early athlons regarding - * large page flushing. - */ - __flush_tlb_all(); - - if (cache && boot_cpu_data.x86 >= 4) - wbinvd(); -} - -static void cpa_flush_all(unsigned long cache) -{ - BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); - - on_each_cpu(__cpa_flush_all, (void *) cache, 1); -} - -void __cpa_flush_tlb(void *data) -{ - struct cpa_data *cpa = data; - unsigned int i; - - for (i = 0; i < cpa->numpages; i++) - __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); -} - -static void cpa_flush(struct cpa_data *data, int cache) -{ - struct cpa_data *cpa = data; - unsigned int i; - - BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); - - if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { - cpa_flush_all(cache); - return; - } - - if (cpa->numpages <= tlb_single_page_flush_ceiling) - on_each_cpu(__cpa_flush_tlb, cpa, 1); - else - flush_tlb_all(); - - if (!cache) - return; - - mb(); - for (i = 0; i < cpa->numpages; i++) { - unsigned long addr = __cpa_addr(cpa, i); - unsigned int level; - - pte_t *pte = lookup_address(addr, &level); - - /* - * Only flush present addresses: - */ - if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); - } - mb(); -} - -static bool overlaps(unsigned long r1_start, unsigned long r1_end, - unsigned long r2_start, unsigned long r2_end) -{ - return (r1_start <= r2_end && r1_end >= r2_start) || - (r2_start <= r1_end && r2_end >= r1_start); -} - -#ifdef CONFIG_PCI_BIOS -/* - * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS - * based config access (CONFIG_PCI_GOBIOS) support. - */ -#define BIOS_PFN PFN_DOWN(BIOS_BEGIN) -#define BIOS_PFN_END PFN_DOWN(BIOS_END - 1) - -static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) -{ - if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END)) - return _PAGE_NX; - return 0; -} -#else -static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) -{ - return 0; -} -#endif - -/* - * The .rodata section needs to be read-only. Using the pfn catches all - * aliases. This also includes __ro_after_init, so do not enforce until - * kernel_set_to_readonly is true. - */ -static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn) -{ - unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata)); - - /* - * Note: __end_rodata is at page aligned and not inclusive, so - * subtract 1 to get the last enforced PFN in the rodata area. - */ - epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1; - - if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro)) - return _PAGE_RW; - return 0; -} - -/* - * Protect kernel text against becoming non executable by forbidding - * _PAGE_NX. This protects only the high kernel mapping (_text -> _etext) - * out of which the kernel actually executes. Do not protect the low - * mapping. - * - * This does not cover __inittext since that is gone after boot. - */ -static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end) -{ - unsigned long t_end = (unsigned long)_etext - 1; - unsigned long t_start = (unsigned long)_text; - - if (overlaps(start, end, t_start, t_end)) - return _PAGE_NX; - return 0; -} - -#if defined(CONFIG_X86_64) -/* - * Once the kernel maps the text as RO (kernel_set_to_readonly is set), - * kernel text mappings for the large page aligned text, rodata sections - * will be always read-only. For the kernel identity mappings covering the - * holes caused by this alignment can be anything that user asks. - * - * This will preserve the large page mappings for kernel text/data at no - * extra cost. - */ -static pgprotval_t protect_kernel_text_ro(unsigned long start, - unsigned long end) -{ - unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1; - unsigned long t_start = (unsigned long)_text; - unsigned int level; - - if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end)) - return 0; - /* - * Don't enforce the !RW mapping for the kernel text mapping, if - * the current mapping is already using small page mapping. No - * need to work hard to preserve large page mappings in this case. - * - * This also fixes the Linux Xen paravirt guest boot failure caused - * by unexpected read-only mappings for kernel identity - * mappings. In this paravirt guest case, the kernel text mapping - * and the kernel identity mapping share the same page-table pages, - * so the protections for kernel text and identity mappings have to - * be the same. - */ - if (lookup_address(start, &level) && (level != PG_LEVEL_4K)) - return _PAGE_RW; - return 0; -} -#else -static pgprotval_t protect_kernel_text_ro(unsigned long start, - unsigned long end) -{ - return 0; -} -#endif - -static inline bool conflicts(pgprot_t prot, pgprotval_t val) -{ - return (pgprot_val(prot) & ~val) != pgprot_val(prot); -} - -static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val, - unsigned long start, unsigned long end, - unsigned long pfn, const char *txt) -{ - static const char *lvltxt[] = { - [CPA_CONFLICT] = "conflict", - [CPA_PROTECT] = "protect", - [CPA_DETECT] = "detect", - }; - - if (warnlvl > cpa_warn_level || !conflicts(prot, val)) - return; - - pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n", - lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot), - (unsigned long long)val); -} - -/* - * Certain areas of memory on x86 require very specific protection flags, - * for example the BIOS area or kernel text. Callers don't always get this - * right (again, ioremap() on BIOS memory is not uncommon) so this function - * checks and fixes these known static required protection bits. - */ -static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, - unsigned long pfn, unsigned long npg, - unsigned long lpsize, int warnlvl) -{ - pgprotval_t forbidden, res; - unsigned long end; - - /* - * There is no point in checking RW/NX conflicts when the requested - * mapping is setting the page !PRESENT. - */ - if (!(pgprot_val(prot) & _PAGE_PRESENT)) - return prot; - - /* Operate on the virtual address */ - end = start + npg * PAGE_SIZE - 1; - - res = protect_kernel_text(start, end); - check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); - forbidden = res; - - /* - * Special case to preserve a large page. If the change spawns the - * full large page mapping then there is no point to split it - * up. Happens with ftrace and is going to be removed once ftrace - * switched to text_poke(). - */ - if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) { - res = protect_kernel_text_ro(start, end); - check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); - forbidden |= res; - } - - /* Check the PFN directly */ - res = protect_pci_bios(pfn, pfn + npg - 1); - check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX"); - forbidden |= res; - - res = protect_rodata(pfn, pfn + npg - 1); - check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO"); - forbidden |= res; - - return __pgprot(pgprot_val(prot) & ~forbidden); -} - -/* - * Lookup the page table entry for a virtual address in a specific pgd. - * Return a pointer to the entry and the level of the mapping. - */ -pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, - unsigned int *level) -{ - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - *level = PG_LEVEL_NONE; - - if (pgd_none(*pgd)) - return NULL; - - p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d)) - return NULL; - - *level = PG_LEVEL_512G; - if (p4d_large(*p4d) || !p4d_present(*p4d)) - return (pte_t *)p4d; - - pud = pud_offset(p4d, address); - if (pud_none(*pud)) - return NULL; - - *level = PG_LEVEL_1G; - if (pud_large(*pud) || !pud_present(*pud)) - return (pte_t *)pud; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - return NULL; - - *level = PG_LEVEL_2M; - if (pmd_large(*pmd) || !pmd_present(*pmd)) - return (pte_t *)pmd; - - *level = PG_LEVEL_4K; - - return pte_offset_kernel(pmd, address); -} - -/* - * Lookup the page table entry for a virtual address. Return a pointer - * to the entry and the level of the mapping. - * - * Note: We return pud and pmd either when the entry is marked large - * or when the present bit is not set. Otherwise we would return a - * pointer to a nonexisting mapping. - */ -pte_t *lookup_address(unsigned long address, unsigned int *level) -{ - return lookup_address_in_pgd(pgd_offset_k(address), address, level); -} -EXPORT_SYMBOL_GPL(lookup_address); - -static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, - unsigned int *level) -{ - if (cpa->pgd) - return lookup_address_in_pgd(cpa->pgd + pgd_index(address), - address, level); - - return lookup_address(address, level); -} - -/* - * Lookup the PMD entry for a virtual address. Return a pointer to the entry - * or NULL if not present. - */ -pmd_t *lookup_pmd_address(unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - - pgd = pgd_offset_k(address); - if (pgd_none(*pgd)) - return NULL; - - p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d)) - return NULL; - - pud = pud_offset(p4d, address); - if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) - return NULL; - - return pmd_offset(pud, address); -} - -/* - * This is necessary because __pa() does not work on some - * kinds of memory, like vmalloc() or the alloc_remap() - * areas on 32-bit NUMA systems. The percpu areas can - * end up in this kind of memory, for instance. - * - * This could be optimized, but it is only intended to be - * used at inititalization time, and keeping it - * unoptimized should increase the testing coverage for - * the more obscure platforms. - */ -phys_addr_t slow_virt_to_phys(void *__virt_addr) -{ - unsigned long virt_addr = (unsigned long)__virt_addr; - phys_addr_t phys_addr; - unsigned long offset; - enum pg_level level; - pte_t *pte; - - pte = lookup_address(virt_addr, &level); - BUG_ON(!pte); - - /* - * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t - * before being left-shifted PAGE_SHIFT bits -- this trick is to - * make 32-PAE kernel work correctly. - */ - switch (level) { - case PG_LEVEL_1G: - phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; - break; - case PG_LEVEL_2M: - phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; - break; - default: - phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; - offset = virt_addr & ~PAGE_MASK; - } - - return (phys_addr_t)(phys_addr | offset); -} -EXPORT_SYMBOL_GPL(slow_virt_to_phys); - -/* - * Set the new pmd in all the pgds we know about: - */ -static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) -{ - /* change init_mm */ - set_pte_atomic(kpte, pte); -#ifdef CONFIG_X86_32 - if (!SHARED_KERNEL_PMD) { - struct page *page; - - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - pgd = (pgd_t *)page_address(page) + pgd_index(address); - p4d = p4d_offset(pgd, address); - pud = pud_offset(p4d, address); - pmd = pmd_offset(pud, address); - set_pte_atomic((pte_t *)pmd, pte); - } - } -#endif -} - -static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot) -{ - /* - * _PAGE_GLOBAL means "global page" for present PTEs. - * But, it is also used to indicate _PAGE_PROTNONE - * for non-present PTEs. - * - * This ensures that a _PAGE_GLOBAL PTE going from - * present to non-present is not confused as - * _PAGE_PROTNONE. - */ - if (!(pgprot_val(prot) & _PAGE_PRESENT)) - pgprot_val(prot) &= ~_PAGE_GLOBAL; - - return prot; -} - -static int __should_split_large_page(pte_t *kpte, unsigned long address, - struct cpa_data *cpa) -{ - unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn; - pgprot_t old_prot, new_prot, req_prot, chk_prot; - pte_t new_pte, *tmp; - enum pg_level level; - - /* - * Check for races, another CPU might have split this page - * up already: - */ - tmp = _lookup_address_cpa(cpa, address, &level); - if (tmp != kpte) - return 1; - - switch (level) { - case PG_LEVEL_2M: - old_prot = pmd_pgprot(*(pmd_t *)kpte); - old_pfn = pmd_pfn(*(pmd_t *)kpte); - cpa_inc_2m_checked(); - break; - case PG_LEVEL_1G: - old_prot = pud_pgprot(*(pud_t *)kpte); - old_pfn = pud_pfn(*(pud_t *)kpte); - cpa_inc_1g_checked(); - break; - default: - return -EINVAL; - } - - psize = page_level_size(level); - pmask = page_level_mask(level); - - /* - * Calculate the number of pages, which fit into this large - * page starting at address: - */ - lpaddr = (address + psize) & pmask; - numpages = (lpaddr - address) >> PAGE_SHIFT; - if (numpages < cpa->numpages) - cpa->numpages = numpages; - - /* - * We are safe now. Check whether the new pgprot is the same: - * Convert protection attributes to 4k-format, as cpa->mask* are set - * up accordingly. - */ - - /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */ - req_prot = pgprot_large_2_4k(old_prot); - - pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); - - /* - * req_prot is in format of 4k pages. It must be converted to large - * page format: the caching mode includes the PAT bit located at - * different bit positions in the two formats. - */ - req_prot = pgprot_4k_2_large(req_prot); - req_prot = pgprot_clear_protnone_bits(req_prot); - if (pgprot_val(req_prot) & _PAGE_PRESENT) - pgprot_val(req_prot) |= _PAGE_PSE; - - /* - * old_pfn points to the large page base pfn. So we need to add the - * offset of the virtual address: - */ - pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT); - cpa->pfn = pfn; - - /* - * Calculate the large page base address and the number of 4K pages - * in the large page - */ - lpaddr = address & pmask; - numpages = psize >> PAGE_SHIFT; - - /* - * Sanity check that the existing mapping is correct versus the static - * protections. static_protections() guards against !PRESENT, so no - * extra conditional required here. - */ - chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, - psize, CPA_CONFLICT); - - if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { - /* - * Split the large page and tell the split code to - * enforce static protections. - */ - cpa->force_static_prot = 1; - return 1; - } - - /* - * Optimization: If the requested pgprot is the same as the current - * pgprot, then the large page can be preserved and no updates are - * required independent of alignment and length of the requested - * range. The above already established that the current pgprot is - * correct, which in consequence makes the requested pgprot correct - * as well if it is the same. The static protection scan below will - * not come to a different conclusion. - */ - if (pgprot_val(req_prot) == pgprot_val(old_prot)) { - cpa_inc_lp_sameprot(level); - return 0; - } - - /* - * If the requested range does not cover the full page, split it up - */ - if (address != lpaddr || cpa->numpages != numpages) - return 1; - - /* - * Check whether the requested pgprot is conflicting with a static - * protection requirement in the large page. - */ - new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, - psize, CPA_DETECT); - - /* - * If there is a conflict, split the large page. - * - * There used to be a 4k wise evaluation trying really hard to - * preserve the large pages, but experimentation has shown, that this - * does not help at all. There might be corner cases which would - * preserve one large page occasionally, but it's really not worth the - * extra code and cycles for the common case. - */ - if (pgprot_val(req_prot) != pgprot_val(new_prot)) - return 1; - - /* All checks passed. Update the large page mapping. */ - new_pte = pfn_pte(old_pfn, new_prot); - __set_pmd_pte(kpte, address, new_pte); - cpa->flags |= CPA_FLUSHTLB; - cpa_inc_lp_preserved(level); - return 0; -} - -static int should_split_large_page(pte_t *kpte, unsigned long address, - struct cpa_data *cpa) -{ - int do_split; - - if (cpa->force_split) - return 1; - - spin_lock(&pgd_lock); - do_split = __should_split_large_page(kpte, address, cpa); - spin_unlock(&pgd_lock); - - return do_split; -} - -static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, - pgprot_t ref_prot, unsigned long address, - unsigned long size) -{ - unsigned int npg = PFN_DOWN(size); - pgprot_t prot; - - /* - * If should_split_large_page() discovered an inconsistent mapping, - * remove the invalid protection in the split mapping. - */ - if (!cpa->force_static_prot) - goto set; - - /* Hand in lpsize = 0 to enforce the protection mechanism */ - prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT); - - if (pgprot_val(prot) == pgprot_val(ref_prot)) - goto set; - - /* - * If this is splitting a PMD, fix it up. PUD splits cannot be - * fixed trivially as that would require to rescan the newly - * installed PMD mappings after returning from split_large_page() - * so an eventual further split can allocate the necessary PTE - * pages. Warn for now and revisit it in case this actually - * happens. - */ - if (size == PAGE_SIZE) - ref_prot = prot; - else - pr_warn_once("CPA: Cannot fixup static protections for PUD split\n"); -set: - set_pte(pte, pfn_pte(pfn, ref_prot)); -} - -static int -__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, - struct page *base) -{ - unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1; - pte_t *pbase = (pte_t *)page_address(base); - unsigned int i, level; - pgprot_t ref_prot; - pte_t *tmp; - - spin_lock(&pgd_lock); - /* - * Check for races, another CPU might have split this page - * up for us already: - */ - tmp = _lookup_address_cpa(cpa, address, &level); - if (tmp != kpte) { - spin_unlock(&pgd_lock); - return 1; - } - - paravirt_alloc_pte(&init_mm, page_to_pfn(base)); - - switch (level) { - case PG_LEVEL_2M: - ref_prot = pmd_pgprot(*(pmd_t *)kpte); - /* - * Clear PSE (aka _PAGE_PAT) and move - * PAT bit to correct position. - */ - ref_prot = pgprot_large_2_4k(ref_prot); - ref_pfn = pmd_pfn(*(pmd_t *)kpte); - lpaddr = address & PMD_MASK; - lpinc = PAGE_SIZE; - break; - - case PG_LEVEL_1G: - ref_prot = pud_pgprot(*(pud_t *)kpte); - ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; - lpaddr = address & PUD_MASK; - lpinc = PMD_SIZE; - /* - * Clear the PSE flags if the PRESENT flag is not set - * otherwise pmd_present/pmd_huge will return true - * even on a non present pmd. - */ - if (!(pgprot_val(ref_prot) & _PAGE_PRESENT)) - pgprot_val(ref_prot) &= ~_PAGE_PSE; - break; - - default: - spin_unlock(&pgd_lock); - return 1; - } - - ref_prot = pgprot_clear_protnone_bits(ref_prot); - - /* - * Get the target pfn from the original entry: - */ - pfn = ref_pfn; - for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc) - split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc); - - if (virt_addr_valid(address)) { - unsigned long pfn = PFN_DOWN(__pa(address)); - - if (pfn_range_is_mapped(pfn, pfn + 1)) - split_page_count(level); - } - - /* - * Install the new, split up pagetable. - * - * We use the standard kernel pagetable protections for the new - * pagetable protections, the actual ptes set above control the - * primary protection behavior: - */ - __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); - - /* - * Do a global flush tlb after splitting the large page - * and before we do the actual change page attribute in the PTE. - * - * Without this, we violate the TLB application note, that says: - * "The TLBs may contain both ordinary and large-page - * translations for a 4-KByte range of linear addresses. This - * may occur if software modifies the paging structures so that - * the page size used for the address range changes. If the two - * translations differ with respect to page frame or attributes - * (e.g., permissions), processor behavior is undefined and may - * be implementation-specific." - * - * We do this global tlb flush inside the cpa_lock, so that we - * don't allow any other cpu, with stale tlb entries change the - * page attribute in parallel, that also falls into the - * just split large page entry. - */ - flush_tlb_all(); - spin_unlock(&pgd_lock); - - return 0; -} - -static int split_large_page(struct cpa_data *cpa, pte_t *kpte, - unsigned long address) -{ - struct page *base; - - if (!debug_pagealloc_enabled()) - spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL, 0); - if (!debug_pagealloc_enabled()) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; - - if (__split_large_page(cpa, kpte, address, base)) - __free_page(base); - - return 0; -} - -static bool try_to_free_pte_page(pte_t *pte) -{ - int i; - - for (i = 0; i < PTRS_PER_PTE; i++) - if (!pte_none(pte[i])) - return false; - - free_page((unsigned long)pte); - return true; -} - -static bool try_to_free_pmd_page(pmd_t *pmd) -{ - int i; - - for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_none(pmd[i])) - return false; - - free_page((unsigned long)pmd); - return true; -} - -static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) -{ - pte_t *pte = pte_offset_kernel(pmd, start); - - while (start < end) { - set_pte(pte, __pte(0)); - - start += PAGE_SIZE; - pte++; - } - - if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { - pmd_clear(pmd); - return true; - } - return false; -} - -static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, - unsigned long start, unsigned long end) -{ - if (unmap_pte_range(pmd, start, end)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) - pud_clear(pud); -} - -static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) -{ - pmd_t *pmd = pmd_offset(pud, start); - - /* - * Not on a 2MB page boundary? - */ - if (start & (PMD_SIZE - 1)) { - unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; - unsigned long pre_end = min_t(unsigned long, end, next_page); - - __unmap_pmd_range(pud, pmd, start, pre_end); - - start = pre_end; - pmd++; - } - - /* - * Try to unmap in 2M chunks. - */ - while (end - start >= PMD_SIZE) { - if (pmd_large(*pmd)) - pmd_clear(pmd); - else - __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); - - start += PMD_SIZE; - pmd++; - } - - /* - * 4K leftovers? - */ - if (start < end) - return __unmap_pmd_range(pud, pmd, start, end); - - /* - * Try again to free the PMD page if haven't succeeded above. - */ - if (!pud_none(*pud)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) - pud_clear(pud); -} - -static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) -{ - pud_t *pud = pud_offset(p4d, start); - - /* - * Not on a GB page boundary? - */ - if (start & (PUD_SIZE - 1)) { - unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; - unsigned long pre_end = min_t(unsigned long, end, next_page); - - unmap_pmd_range(pud, start, pre_end); - - start = pre_end; - pud++; - } - - /* - * Try to unmap in 1G chunks? - */ - while (end - start >= PUD_SIZE) { - - if (pud_large(*pud)) - pud_clear(pud); - else - unmap_pmd_range(pud, start, start + PUD_SIZE); - - start += PUD_SIZE; - pud++; - } - - /* - * 2M leftovers? - */ - if (start < end) - unmap_pmd_range(pud, start, end); - - /* - * No need to try to free the PUD page because we'll free it in - * populate_pgd's error path - */ -} - -static int alloc_pte_page(pmd_t *pmd) -{ - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!pte) - return -1; - - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); - return 0; -} - -static int alloc_pmd_page(pud_t *pud) -{ - pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); - if (!pmd) - return -1; - - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - return 0; -} - -static void populate_pte(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) -{ - pte_t *pte; - - pte = pte_offset_kernel(pmd, start); - - pgprot = pgprot_clear_protnone_bits(pgprot); - - while (num_pages-- && start < end) { - set_pte(pte, pfn_pte(cpa->pfn, pgprot)); - - start += PAGE_SIZE; - cpa->pfn++; - pte++; - } -} - -static long populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) -{ - long cur_pages = 0; - pmd_t *pmd; - pgprot_t pmd_pgprot; - - /* - * Not on a 2M boundary? - */ - if (start & (PMD_SIZE - 1)) { - unsigned long pre_end = start + (num_pages << PAGE_SHIFT); - unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; - - pre_end = min_t(unsigned long, pre_end, next_page); - cur_pages = (pre_end - start) >> PAGE_SHIFT; - cur_pages = min_t(unsigned int, num_pages, cur_pages); - - /* - * Need a PTE page? - */ - pmd = pmd_offset(pud, start); - if (pmd_none(*pmd)) - if (alloc_pte_page(pmd)) - return -1; - - populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); - - start = pre_end; - } - - /* - * We mapped them all? - */ - if (num_pages == cur_pages) - return cur_pages; - - pmd_pgprot = pgprot_4k_2_large(pgprot); - - while (end - start >= PMD_SIZE) { - - /* - * We cannot use a 1G page so allocate a PMD page if needed. - */ - if (pud_none(*pud)) - if (alloc_pmd_page(pud)) - return -1; - - pmd = pmd_offset(pud, start); - - set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, - canon_pgprot(pmd_pgprot)))); - - start += PMD_SIZE; - cpa->pfn += PMD_SIZE >> PAGE_SHIFT; - cur_pages += PMD_SIZE >> PAGE_SHIFT; - } - - /* - * Map trailing 4K pages. - */ - if (start < end) { - pmd = pmd_offset(pud, start); - if (pmd_none(*pmd)) - if (alloc_pte_page(pmd)) - return -1; - - populate_pte(cpa, start, end, num_pages - cur_pages, - pmd, pgprot); - } - return num_pages; -} - -static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, - pgprot_t pgprot) -{ - pud_t *pud; - unsigned long end; - long cur_pages = 0; - pgprot_t pud_pgprot; - - end = start + (cpa->numpages << PAGE_SHIFT); - - /* - * Not on a Gb page boundary? => map everything up to it with - * smaller pages. - */ - if (start & (PUD_SIZE - 1)) { - unsigned long pre_end; - unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; - - pre_end = min_t(unsigned long, end, next_page); - cur_pages = (pre_end - start) >> PAGE_SHIFT; - cur_pages = min_t(int, (int)cpa->numpages, cur_pages); - - pud = pud_offset(p4d, start); - - /* - * Need a PMD page? - */ - if (pud_none(*pud)) - if (alloc_pmd_page(pud)) - return -1; - - cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, - pud, pgprot); - if (cur_pages < 0) - return cur_pages; - - start = pre_end; - } - - /* We mapped them all? */ - if (cpa->numpages == cur_pages) - return cur_pages; - - pud = pud_offset(p4d, start); - pud_pgprot = pgprot_4k_2_large(pgprot); - - /* - * Map everything starting from the Gb boundary, possibly with 1G pages - */ - while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { - set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, - canon_pgprot(pud_pgprot)))); - - start += PUD_SIZE; - cpa->pfn += PUD_SIZE >> PAGE_SHIFT; - cur_pages += PUD_SIZE >> PAGE_SHIFT; - pud++; - } - - /* Map trailing leftover */ - if (start < end) { - long tmp; - - pud = pud_offset(p4d, start); - if (pud_none(*pud)) - if (alloc_pmd_page(pud)) - return -1; - - tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, - pud, pgprot); - if (tmp < 0) - return cur_pages; - - cur_pages += tmp; - } - return cur_pages; -} - -/* - * Restrictions for kernel page table do not necessarily apply when mapping in - * an alternate PGD. - */ -static int populate_pgd(struct cpa_data *cpa, unsigned long addr) -{ - pgprot_t pgprot = __pgprot(_KERNPG_TABLE); - pud_t *pud = NULL; /* shut up gcc */ - p4d_t *p4d; - pgd_t *pgd_entry; - long ret; - - pgd_entry = cpa->pgd + pgd_index(addr); - - if (pgd_none(*pgd_entry)) { - p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); - if (!p4d) - return -1; - - set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE)); - } - - /* - * Allocate a PUD page and hand it down for mapping. - */ - p4d = p4d_offset(pgd_entry, addr); - if (p4d_none(*p4d)) { - pud = (pud_t *)get_zeroed_page(GFP_KERNEL); - if (!pud) - return -1; - - set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); - } - - pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); - - ret = populate_pud(cpa, addr, p4d, pgprot); - if (ret < 0) { - /* - * Leave the PUD page in place in case some other CPU or thread - * already found it, but remove any useless entries we just - * added to it. - */ - unmap_pud_range(p4d, addr, - addr + (cpa->numpages << PAGE_SHIFT)); - return ret; - } - - cpa->numpages = ret; - return 0; -} - -static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, - int primary) -{ - if (cpa->pgd) { - /* - * Right now, we only execute this code path when mapping - * the EFI virtual memory map regions, no other users - * provide a ->pgd value. This may change in the future. - */ - return populate_pgd(cpa, vaddr); - } - - /* - * Ignore all non primary paths. - */ - if (!primary) { - cpa->numpages = 1; - return 0; - } - - /* - * Ignore the NULL PTE for kernel identity mapping, as it is expected - * to have holes. - * Also set numpages to '1' indicating that we processed cpa req for - * one virtual address page and its pfn. TBD: numpages can be set based - * on the initial value and the level returned by lookup_address(). - */ - if (within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { - cpa->numpages = 1; - cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; - return 0; - - } else if (__cpa_pfn_in_highmap(cpa->pfn)) { - /* Faults in the highmap are OK, so do not warn: */ - return -EFAULT; - } else { - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", vaddr, - *cpa->vaddr); - - return -EFAULT; - } -} - -static int __change_page_attr(struct cpa_data *cpa, int primary) -{ - unsigned long address; - int do_split, err; - unsigned int level; - pte_t *kpte, old_pte; - - address = __cpa_addr(cpa, cpa->curpage); -repeat: - kpte = _lookup_address_cpa(cpa, address, &level); - if (!kpte) - return __cpa_process_fault(cpa, address, primary); - - old_pte = *kpte; - if (pte_none(old_pte)) - return __cpa_process_fault(cpa, address, primary); - - if (level == PG_LEVEL_4K) { - pte_t new_pte; - pgprot_t new_prot = pte_pgprot(old_pte); - unsigned long pfn = pte_pfn(old_pte); - - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); - - cpa_inc_4k_install(); - /* Hand in lpsize = 0 to enforce the protection mechanism */ - new_prot = static_protections(new_prot, address, pfn, 1, 0, - CPA_PROTECT); - - new_prot = pgprot_clear_protnone_bits(new_prot); - - /* - * We need to keep the pfn from the existing PTE, - * after all we're only going to change it's attributes - * not the memory it points to - */ - new_pte = pfn_pte(pfn, new_prot); - cpa->pfn = pfn; - /* - * Do we really change anything ? - */ - if (pte_val(old_pte) != pte_val(new_pte)) { - set_pte_atomic(kpte, new_pte); - cpa->flags |= CPA_FLUSHTLB; - } - cpa->numpages = 1; - return 0; - } - - /* - * Check, whether we can keep the large page intact - * and just change the pte: - */ - do_split = should_split_large_page(kpte, address, cpa); - /* - * When the range fits into the existing large page, - * return. cp->numpages and cpa->tlbflush have been updated in - * try_large_page: - */ - if (do_split <= 0) - return do_split; - - /* - * We have to split the large page: - */ - err = split_large_page(cpa, kpte, address); - if (!err) - goto repeat; - - return err; -} - -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); - -static int cpa_process_alias(struct cpa_data *cpa) -{ - struct cpa_data alias_cpa; - unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr; - int ret; - - if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) - return 0; - - /* - * No need to redo, when the primary call touched the direct - * mapping already: - */ - vaddr = __cpa_addr(cpa, cpa->curpage); - if (!(within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { - - alias_cpa = *cpa; - alias_cpa.vaddr = &laddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - alias_cpa.curpage = 0; - - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - -#ifdef CONFIG_X86_64 - /* - * If the primary call didn't touch the high mapping already - * and the physical address is inside the kernel map, we need - * to touch the high mapped kernel as well: - */ - if (!within(vaddr, (unsigned long)_text, _brk_end) && - __cpa_pfn_in_highmap(cpa->pfn)) { - unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + - __START_KERNEL_map - phys_base; - alias_cpa = *cpa; - alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - alias_cpa.curpage = 0; - - /* - * The high mapping range is imprecise, so ignore the - * return value. - */ - __change_page_attr_set_clr(&alias_cpa, 0); - } -#endif - - return 0; -} - -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) -{ - unsigned long numpages = cpa->numpages; - unsigned long rempages = numpages; - int ret = 0; - - while (rempages) { - /* - * Store the remaining nr of pages for the large page - * preservation check. - */ - cpa->numpages = rempages; - /* for array changes, we can't use large page */ - if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa->numpages = 1; - - if (!debug_pagealloc_enabled()) - spin_lock(&cpa_lock); - ret = __change_page_attr(cpa, checkalias); - if (!debug_pagealloc_enabled()) - spin_unlock(&cpa_lock); - if (ret) - goto out; - - if (checkalias) { - ret = cpa_process_alias(cpa); - if (ret) - goto out; - } - - /* - * Adjust the number of pages with the result of the - * CPA operation. Either a large page has been - * preserved or a single page update happened. - */ - BUG_ON(cpa->numpages > rempages || !cpa->numpages); - rempages -= cpa->numpages; - cpa->curpage += cpa->numpages; - } - -out: - /* Restore the original numpages */ - cpa->numpages = numpages; - return ret; -} - -static int change_page_attr_set_clr(unsigned long *addr, int numpages, - pgprot_t mask_set, pgprot_t mask_clr, - int force_split, int in_flag, - struct page **pages) -{ - struct cpa_data cpa; - int ret, cache, checkalias; - - memset(&cpa, 0, sizeof(cpa)); - - /* - * Check, if we are requested to set a not supported - * feature. Clearing non-supported features is OK. - */ - mask_set = canon_pgprot(mask_set); - - if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) - return 0; - - /* Ensure we are PAGE_SIZE aligned */ - if (in_flag & CPA_ARRAY) { - int i; - for (i = 0; i < numpages; i++) { - if (addr[i] & ~PAGE_MASK) { - addr[i] &= PAGE_MASK; - WARN_ON_ONCE(1); - } - } - } else if (!(in_flag & CPA_PAGES_ARRAY)) { - /* - * in_flag of CPA_PAGES_ARRAY implies it is aligned. - * No need to check in that case - */ - if (*addr & ~PAGE_MASK) { - *addr &= PAGE_MASK; - /* - * People should not be passing in unaligned addresses: - */ - WARN_ON_ONCE(1); - } - } - - /* Must avoid aliasing mappings in the highmem code */ - kmap_flush_unused(); - - vm_unmap_aliases(); - - cpa.vaddr = addr; - cpa.pages = pages; - cpa.numpages = numpages; - cpa.mask_set = mask_set; - cpa.mask_clr = mask_clr; - cpa.flags = 0; - cpa.curpage = 0; - cpa.force_split = force_split; - - if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa.flags |= in_flag; - - /* No alias checking for _NX bit modifications */ - checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - /* Has caller explicitly disabled alias checking? */ - if (in_flag & CPA_NO_CHECK_ALIAS) - checkalias = 0; - - ret = __change_page_attr_set_clr(&cpa, checkalias); - - /* - * Check whether we really changed something: - */ - if (!(cpa.flags & CPA_FLUSHTLB)) - goto out; - - /* - * No need to flush, when we did not set any of the caching - * attributes: - */ - cache = !!pgprot2cachemode(mask_set); - - /* - * On error; flush everything to be sure. - */ - if (ret) { - cpa_flush_all(cache); - goto out; - } - - cpa_flush(&cpa, cache); -out: - return ret; -} - -static inline int change_page_attr_set(unsigned long *addr, int numpages, - pgprot_t mask, int array) -{ - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, - (array ? CPA_ARRAY : 0), NULL); -} - -static inline int change_page_attr_clear(unsigned long *addr, int numpages, - pgprot_t mask, int array) -{ - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, - (array ? CPA_ARRAY : 0), NULL); -} - -static inline int cpa_set_pages_array(struct page **pages, int numpages, - pgprot_t mask) -{ - return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, - CPA_PAGES_ARRAY, pages); -} - -static inline int cpa_clear_pages_array(struct page **pages, int numpages, - pgprot_t mask) -{ - return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, - CPA_PAGES_ARRAY, pages); -} - -int _set_memory_uc(unsigned long addr, int numpages) -{ - /* - * for now UC MINUS. see comments in ioremap() - * If you really need strong UC use ioremap_uc(), but note - * that you cannot override IO areas with set_memory_*() as - * these helpers cannot work with IO memory. - */ - return change_page_attr_set(&addr, numpages, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), - 0); -} - -int set_memory_uc(unsigned long addr, int numpages) -{ - int ret; - - /* - * for now UC MINUS. see comments in ioremap() - */ - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_MODE_UC_MINUS, NULL); - if (ret) - goto out_err; - - ret = _set_memory_uc(addr, numpages); - if (ret) - goto out_free; - - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: - return ret; -} -EXPORT_SYMBOL(set_memory_uc); - -int _set_memory_wc(unsigned long addr, int numpages) -{ - int ret; - - ret = change_page_attr_set(&addr, numpages, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), - 0); - if (!ret) { - ret = change_page_attr_set_clr(&addr, numpages, - cachemode2pgprot(_PAGE_CACHE_MODE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, 0, NULL); - } - return ret; -} - -int set_memory_wc(unsigned long addr, int numpages) -{ - int ret; - - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_MODE_WC, NULL); - if (ret) - return ret; - - ret = _set_memory_wc(addr, numpages); - if (ret) - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - - return ret; -} -EXPORT_SYMBOL(set_memory_wc); - -int _set_memory_wt(unsigned long addr, int numpages) -{ - return change_page_attr_set(&addr, numpages, - cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); -} - -int _set_memory_wb(unsigned long addr, int numpages) -{ - /* WB cache mode is hard wired to all cache attribute bits being 0 */ - return change_page_attr_clear(&addr, numpages, - __pgprot(_PAGE_CACHE_MASK), 0); -} - -int set_memory_wb(unsigned long addr, int numpages) -{ - int ret; - - ret = _set_memory_wb(addr, numpages); - if (ret) - return ret; - - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return 0; -} -EXPORT_SYMBOL(set_memory_wb); - -int set_memory_x(unsigned long addr, int numpages) -{ - if (!(__supported_pte_mask & _PAGE_NX)) - return 0; - - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); -} - -int set_memory_nx(unsigned long addr, int numpages) -{ - if (!(__supported_pte_mask & _PAGE_NX)) - return 0; - - return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); -} - -int set_memory_ro(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); -} - -int set_memory_rw(unsigned long addr, int numpages) -{ - return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); -} - -int set_memory_np(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); -} - -int set_memory_np_noalias(unsigned long addr, int numpages) -{ - int cpa_flags = CPA_NO_CHECK_ALIAS; - - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), - __pgprot(_PAGE_PRESENT), 0, - cpa_flags, NULL); -} - -int set_memory_4k(unsigned long addr, int numpages) -{ - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), - __pgprot(0), 1, 0, NULL); -} - -int set_memory_nonglobal(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, - __pgprot(_PAGE_GLOBAL), 0); -} - -int set_memory_global(unsigned long addr, int numpages) -{ - return change_page_attr_set(&addr, numpages, - __pgprot(_PAGE_GLOBAL), 0); -} - -static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) -{ - struct cpa_data cpa; - int ret; - - /* Nothing to do if memory encryption is not active */ - if (!mem_encrypt_active()) - return 0; - - /* Should not be working on unaligned addresses */ - if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) - addr &= PAGE_MASK; - - memset(&cpa, 0, sizeof(cpa)); - cpa.vaddr = &addr; - cpa.numpages = numpages; - cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); - cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); - cpa.pgd = init_mm.pgd; - - /* Must avoid aliasing mappings in the highmem code */ - kmap_flush_unused(); - vm_unmap_aliases(); - - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, 1); - - ret = __change_page_attr_set_clr(&cpa, 1); - - /* - * After changing the encryption attribute, we need to flush TLBs again - * in case any speculative TLB caching occurred (but no need to flush - * caches again). We could just use cpa_flush_all(), but in case TLB - * flushing gets optimized in the cpa_flush() path use the same logic - * as above. - */ - cpa_flush(&cpa, 0); - - return ret; -} - -int set_memory_encrypted(unsigned long addr, int numpages) -{ - return __set_memory_enc_dec(addr, numpages, true); -} -EXPORT_SYMBOL_GPL(set_memory_encrypted); - -int set_memory_decrypted(unsigned long addr, int numpages) -{ - return __set_memory_enc_dec(addr, numpages, false); -} -EXPORT_SYMBOL_GPL(set_memory_decrypted); - -int set_pages_uc(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_uc(addr, numpages); -} -EXPORT_SYMBOL(set_pages_uc); - -static int _set_pages_array(struct page **pages, int numpages, - enum page_cache_mode new_type) -{ - unsigned long start; - unsigned long end; - enum page_cache_mode set_type; - int i; - int free_idx; - int ret; - - for (i = 0; i < numpages; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - if (reserve_memtype(start, end, new_type, NULL)) - goto err_out; - } - - /* If WC, set to UC- first and then WC */ - set_type = (new_type == _PAGE_CACHE_MODE_WC) ? - _PAGE_CACHE_MODE_UC_MINUS : new_type; - - ret = cpa_set_pages_array(pages, numpages, - cachemode2pgprot(set_type)); - if (!ret && new_type == _PAGE_CACHE_MODE_WC) - ret = change_page_attr_set_clr(NULL, numpages, - cachemode2pgprot( - _PAGE_CACHE_MODE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, CPA_PAGES_ARRAY, pages); - if (ret) - goto err_out; - return 0; /* Success */ -err_out: - free_idx = i; - for (i = 0; i < free_idx; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - free_memtype(start, end); - } - return -EINVAL; -} - -int set_pages_array_uc(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS); -} -EXPORT_SYMBOL(set_pages_array_uc); - -int set_pages_array_wc(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC); -} -EXPORT_SYMBOL(set_pages_array_wc); - -int set_pages_array_wt(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); -} -EXPORT_SYMBOL_GPL(set_pages_array_wt); - -int set_pages_wb(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_wb(addr, numpages); -} -EXPORT_SYMBOL(set_pages_wb); - -int set_pages_array_wb(struct page **pages, int numpages) -{ - int retval; - unsigned long start; - unsigned long end; - int i; - - /* WB cache mode is hard wired to all cache attribute bits being 0 */ - retval = cpa_clear_pages_array(pages, numpages, - __pgprot(_PAGE_CACHE_MASK)); - if (retval) - return retval; - - for (i = 0; i < numpages; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - free_memtype(start, end); - } - - return 0; -} -EXPORT_SYMBOL(set_pages_array_wb); - -int set_pages_ro(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_ro(addr, numpages); -} - -int set_pages_rw(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_rw(addr, numpages); -} - -static int __set_pages_p(struct page *page, int numpages) -{ - unsigned long tempaddr = (unsigned long) page_address(page); - struct cpa_data cpa = { .vaddr = &tempaddr, - .pgd = NULL, - .numpages = numpages, - .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .mask_clr = __pgprot(0), - .flags = 0}; - - /* - * No alias checking needed for setting present flag. otherwise, - * we may need to break large pages for 64-bit kernel text - * mappings (this adds to complexity if we want to do this from - * atomic context especially). Let's keep it simple! - */ - return __change_page_attr_set_clr(&cpa, 0); -} - -static int __set_pages_np(struct page *page, int numpages) -{ - unsigned long tempaddr = (unsigned long) page_address(page); - struct cpa_data cpa = { .vaddr = &tempaddr, - .pgd = NULL, - .numpages = numpages, - .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0}; - - /* - * No alias checking needed for setting not present flag. otherwise, - * we may need to break large pages for 64-bit kernel text - * mappings (this adds to complexity if we want to do this from - * atomic context especially). Let's keep it simple! - */ - return __change_page_attr_set_clr(&cpa, 0); -} - -int set_direct_map_invalid_noflush(struct page *page) -{ - return __set_pages_np(page, 1); -} - -int set_direct_map_default_noflush(struct page *page) -{ - return __set_pages_p(page, 1); -} - -void __kernel_map_pages(struct page *page, int numpages, int enable) -{ - if (PageHighMem(page)) - return; - if (!enable) { - debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); - } - - /* - * The return value is ignored as the calls cannot fail. - * Large pages for identity mappings are not used at boot time - * and hence no memory allocations during large page split. - */ - if (enable) - __set_pages_p(page, numpages); - else - __set_pages_np(page, numpages); - - /* - * We should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu. - * Preemption needs to be disabled around __flush_tlb_all() due to - * CR3 reload in __native_flush_tlb(). - */ - preempt_disable(); - __flush_tlb_all(); - preempt_enable(); - - arch_flush_lazy_mmu_mode(); -} - -#ifdef CONFIG_HIBERNATION -bool kernel_page_present(struct page *page) -{ - unsigned int level; - pte_t *pte; - - if (PageHighMem(page)) - return false; - - pte = lookup_address((unsigned long)page_address(page), &level); - return (pte_val(*pte) & _PAGE_PRESENT); -} -#endif /* CONFIG_HIBERNATION */ - -int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, - unsigned numpages, unsigned long page_flags) -{ - int retval = -EINVAL; - - struct cpa_data cpa = { - .vaddr = &address, - .pfn = pfn, - .pgd = pgd, - .numpages = numpages, - .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), - .flags = 0, - }; - - WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); - - if (!(__supported_pte_mask & _PAGE_NX)) - goto out; - - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - - if (!(page_flags & _PAGE_ENC)) - cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); - - cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); - - retval = __change_page_attr_set_clr(&cpa, 0); - __flush_tlb_all(); - -out: - return retval; -} - -/* - * __flush_tlb_all() flushes mappings only on current CPU and hence this - * function shouldn't be used in an SMP environment. Presently, it's used only - * during boot (way before smp_init()) by EFI subsystem and hence is ok. - */ -int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, - unsigned long numpages) -{ - int retval; - - /* - * The typical sequence for unmapping is to find a pte through - * lookup_address_in_pgd() (ideally, it should never return NULL because - * the address is already mapped) and change it's protections. As pfn is - * the *target* of a mapping, it's not useful while unmapping. - */ - struct cpa_data cpa = { - .vaddr = &address, - .pfn = 0, - .pgd = pgd, - .numpages = numpages, - .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0, - }; - - WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); - - retval = __change_page_attr_set_clr(&cpa, 0); - __flush_tlb_all(); - - return retval; -} - -/* - * The testcases use internal knowledge of the implementation that shouldn't - * be exposed to the rest of the kernel. Include these directly here. - */ -#ifdef CONFIG_CPA_DEBUG -#include "pageattr-test.c" -#endif diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c deleted file mode 100644 index f1677fad1735..000000000000 --- a/arch/x86/mm/pat.c +++ /dev/null @@ -1,1219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. - * - * Authors: Venkatesh Pallipadi - * Suresh B Siddha - * - * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. - * - * Basic principles: - * - * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and - * the kernel to set one of a handful of 'caching type' attributes for physical - * memory ranges: uncached, write-combining, write-through, write-protected, - * and the most commonly used and default attribute: write-back caching. - * - * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is - * a hardware interface to enumerate a limited number of physical memory ranges - * and set their caching attributes explicitly, programmed into the CPU via MSRs. - * Even modern CPUs have MTRRs enabled - but these are typically not touched - * by the kernel or by user-space (such as the X server), we rely on PAT for any - * additional cache attribute logic. - * - * PAT doesn't work via explicit memory ranges, but uses page table entries to add - * cache attribute information to the mapped memory range: there's 3 bits used, - * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the - * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). - * - * ( There's a metric ton of finer details, such as compatibility with CPU quirks - * that only support 4 types of PAT entries, and interaction with MTRRs, see - * below for details. ) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pat_internal.h" -#include "mm_internal.h" - -#undef pr_fmt -#define pr_fmt(fmt) "" fmt - -static bool __read_mostly pat_bp_initialized; -static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); -static bool __read_mostly pat_bp_enabled; -static bool __read_mostly pat_cm_initialized; - -/* - * PAT support is enabled by default, but can be disabled for - * various user-requested or hardware-forced reasons: - */ -void pat_disable(const char *msg_reason) -{ - if (pat_disabled) - return; - - if (pat_bp_initialized) { - WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); - return; - } - - pat_disabled = true; - pr_info("x86/PAT: %s\n", msg_reason); -} - -static int __init nopat(char *str) -{ - pat_disable("PAT support disabled via boot option."); - return 0; -} -early_param("nopat", nopat); - -bool pat_enabled(void) -{ - return pat_bp_enabled; -} -EXPORT_SYMBOL_GPL(pat_enabled); - -int pat_debug_enable; - -static int __init pat_debug_setup(char *str) -{ - pat_debug_enable = 1; - return 0; -} -__setup("debugpat", pat_debug_setup); - -#ifdef CONFIG_X86_PAT -/* - * X86 PAT uses page flags arch_1 and uncached together to keep track of - * memory type of pages that have backing page struct. - * - * X86 PAT supports 4 different memory types: - * - _PAGE_CACHE_MODE_WB - * - _PAGE_CACHE_MODE_WC - * - _PAGE_CACHE_MODE_UC_MINUS - * - _PAGE_CACHE_MODE_WT - * - * _PAGE_CACHE_MODE_WB is the default type. - */ - -#define _PGMT_WB 0 -#define _PGMT_WC (1UL << PG_arch_1) -#define _PGMT_UC_MINUS (1UL << PG_uncached) -#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1) -#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) -#define _PGMT_CLEAR_MASK (~_PGMT_MASK) - -static inline enum page_cache_mode get_page_memtype(struct page *pg) -{ - unsigned long pg_flags = pg->flags & _PGMT_MASK; - - if (pg_flags == _PGMT_WB) - return _PAGE_CACHE_MODE_WB; - else if (pg_flags == _PGMT_WC) - return _PAGE_CACHE_MODE_WC; - else if (pg_flags == _PGMT_UC_MINUS) - return _PAGE_CACHE_MODE_UC_MINUS; - else - return _PAGE_CACHE_MODE_WT; -} - -static inline void set_page_memtype(struct page *pg, - enum page_cache_mode memtype) -{ - unsigned long memtype_flags; - unsigned long old_flags; - unsigned long new_flags; - - switch (memtype) { - case _PAGE_CACHE_MODE_WC: - memtype_flags = _PGMT_WC; - break; - case _PAGE_CACHE_MODE_UC_MINUS: - memtype_flags = _PGMT_UC_MINUS; - break; - case _PAGE_CACHE_MODE_WT: - memtype_flags = _PGMT_WT; - break; - case _PAGE_CACHE_MODE_WB: - default: - memtype_flags = _PGMT_WB; - break; - } - - do { - old_flags = pg->flags; - new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; - } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); -} -#else -static inline enum page_cache_mode get_page_memtype(struct page *pg) -{ - return -1; -} -static inline void set_page_memtype(struct page *pg, - enum page_cache_mode memtype) -{ -} -#endif - -enum { - PAT_UC = 0, /* uncached */ - PAT_WC = 1, /* Write combining */ - PAT_WT = 4, /* Write Through */ - PAT_WP = 5, /* Write Protected */ - PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ -}; - -#define CM(c) (_PAGE_CACHE_MODE_ ## c) - -static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) -{ - enum page_cache_mode cache; - char *cache_mode; - - switch (pat_val) { - case PAT_UC: cache = CM(UC); cache_mode = "UC "; break; - case PAT_WC: cache = CM(WC); cache_mode = "WC "; break; - case PAT_WT: cache = CM(WT); cache_mode = "WT "; break; - case PAT_WP: cache = CM(WP); cache_mode = "WP "; break; - case PAT_WB: cache = CM(WB); cache_mode = "WB "; break; - case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; - default: cache = CM(WB); cache_mode = "WB "; break; - } - - memcpy(msg, cache_mode, 4); - - return cache; -} - -#undef CM - -/* - * Update the cache mode to pgprot translation tables according to PAT - * configuration. - * Using lower indices is preferred, so we start with highest index. - */ -static void __init_cache_modes(u64 pat) -{ - enum page_cache_mode cache; - char pat_msg[33]; - int i; - - WARN_ON_ONCE(pat_cm_initialized); - - pat_msg[32] = 0; - for (i = 7; i >= 0; i--) { - cache = pat_get_cache_mode((pat >> (i * 8)) & 7, - pat_msg + 4 * i); - update_cache_mode_entry(i, cache); - } - pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); - - pat_cm_initialized = true; -} - -#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) - -static void pat_bp_init(u64 pat) -{ - u64 tmp_pat; - - if (!boot_cpu_has(X86_FEATURE_PAT)) { - pat_disable("PAT not supported by the CPU."); - return; - } - - rdmsrl(MSR_IA32_CR_PAT, tmp_pat); - if (!tmp_pat) { - pat_disable("PAT support disabled by the firmware."); - return; - } - - wrmsrl(MSR_IA32_CR_PAT, pat); - pat_bp_enabled = true; - - __init_cache_modes(pat); -} - -static void pat_ap_init(u64 pat) -{ - if (!boot_cpu_has(X86_FEATURE_PAT)) { - /* - * If this happens we are on a secondary CPU, but switched to - * PAT on the boot CPU. We have no way to undo PAT. - */ - panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); - } - - wrmsrl(MSR_IA32_CR_PAT, pat); -} - -void init_cache_modes(void) -{ - u64 pat = 0; - - if (pat_cm_initialized) - return; - - if (boot_cpu_has(X86_FEATURE_PAT)) { - /* - * CPU supports PAT. Set PAT table to be consistent with - * PAT MSR. This case supports "nopat" boot option, and - * virtual machine environments which support PAT without - * MTRRs. In specific, Xen has unique setup to PAT MSR. - * - * If PAT MSR returns 0, it is considered invalid and emulates - * as No PAT. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - } - - if (!pat) { - /* - * No PAT. Emulate the PAT table that corresponds to the two - * cache bits, PWT (Write Through) and PCD (Cache Disable). - * This setup is also the same as the BIOS default setup. - * - * PTE encoding: - * - * PCD - * |PWT PAT - * || slot - * 00 0 WB : _PAGE_CACHE_MODE_WB - * 01 1 WT : _PAGE_CACHE_MODE_WT - * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS - * 11 3 UC : _PAGE_CACHE_MODE_UC - * - * NOTE: When WC or WP is used, it is redirected to UC- per - * the default setup in __cachemode2pte_tbl[]. - */ - pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); - } - - __init_cache_modes(pat); -} - -/** - * pat_init - Initialize the PAT MSR and PAT table on the current CPU - * - * This function initializes PAT MSR and PAT table with an OS-defined value - * to enable additional cache attributes, WC, WT and WP. - * - * This function must be called on all CPUs using the specific sequence of - * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this - * procedure for PAT. - */ -void pat_init(void) -{ - u64 pat; - struct cpuinfo_x86 *c = &boot_cpu_data; - -#ifndef CONFIG_X86_PAT - pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); -#endif - - if (pat_disabled) - return; - - if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { - /* - * PAT support with the lower four entries. Intel Pentium 2, - * 3, M, and 4 are affected by PAT errata, which makes the - * upper four entries unusable. To be on the safe side, we don't - * use those. - * - * PTE encoding: - * PAT - * |PCD - * ||PWT PAT - * ||| slot - * 000 0 WB : _PAGE_CACHE_MODE_WB - * 001 1 WC : _PAGE_CACHE_MODE_WC - * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS - * 011 3 UC : _PAGE_CACHE_MODE_UC - * PAT bit unused - * - * NOTE: When WT or WP is used, it is redirected to UC- per - * the default setup in __cachemode2pte_tbl[]. - */ - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); - } else { - /* - * Full PAT support. We put WT in slot 7 to improve - * robustness in the presence of errata that might cause - * the high PAT bit to be ignored. This way, a buggy slot 7 - * access will hit slot 3, and slot 3 is UC, so at worst - * we lose performance without causing a correctness issue. - * Pentium 4 erratum N46 is an example for such an erratum, - * although we try not to use PAT at all on affected CPUs. - * - * PTE encoding: - * PAT - * |PCD - * ||PWT PAT - * ||| slot - * 000 0 WB : _PAGE_CACHE_MODE_WB - * 001 1 WC : _PAGE_CACHE_MODE_WC - * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS - * 011 3 UC : _PAGE_CACHE_MODE_UC - * 100 4 WB : Reserved - * 101 5 WP : _PAGE_CACHE_MODE_WP - * 110 6 UC-: Reserved - * 111 7 WT : _PAGE_CACHE_MODE_WT - * - * The reserved slots are unused, but mapped to their - * corresponding types in the presence of PAT errata. - */ - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); - } - - if (!pat_bp_initialized) { - pat_bp_init(pat); - pat_bp_initialized = true; - } else { - pat_ap_init(pat); - } -} - -#undef PAT - -static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ - -/* - * Does intersection of PAT memory type and MTRR memory type and returns - * the resulting memory type as PAT understands it. - * (Type in pat and mtrr will not have same value) - * The intersection is based on "Effective Memory Type" tables in IA-32 - * SDM vol 3a - */ -static unsigned long pat_x_mtrr_type(u64 start, u64 end, - enum page_cache_mode req_type) -{ - /* - * Look for MTRR hint to get the effective type in case where PAT - * request is for WB. - */ - if (req_type == _PAGE_CACHE_MODE_WB) { - u8 mtrr_type, uniform; - - mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) - return _PAGE_CACHE_MODE_UC_MINUS; - - return _PAGE_CACHE_MODE_WB; - } - - return req_type; -} - -struct pagerange_state { - unsigned long cur_pfn; - int ram; - int not_ram; -}; - -static int -pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) -{ - struct pagerange_state *state = arg; - - state->not_ram |= initial_pfn > state->cur_pfn; - state->ram |= total_nr_pages > 0; - state->cur_pfn = initial_pfn + total_nr_pages; - - return state->ram && state->not_ram; -} - -static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) -{ - int ret = 0; - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct pagerange_state state = {start_pfn, 0, 0}; - - /* - * For legacy reasons, physical address range in the legacy ISA - * region is tracked as non-RAM. This will allow users of - * /dev/mem to map portions of legacy ISA region, even when - * some of those portions are listed(or not even listed) with - * different e820 types(RAM/reserved/..) - */ - if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) - start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; - - if (start_pfn < end_pfn) { - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, - &state, pagerange_is_ram_callback); - } - - return (ret > 0) ? -1 : (state.ram ? 1 : 0); -} - -/* - * For RAM pages, we use page flags to mark the pages with appropriate type. - * The page flags are limited to four types, WB (default), WC, WT and UC-. - * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting - * a new memory type is only allowed for a page mapped with the default WB - * type. - * - * Here we do two passes: - * - Find the memtype of all the pages in the range, look for any conflicts. - * - In case of no conflicts, set the new memtype for pages in the range. - */ -static int reserve_ram_pages_type(u64 start, u64 end, - enum page_cache_mode req_type, - enum page_cache_mode *new_type) -{ - struct page *page; - u64 pfn; - - if (req_type == _PAGE_CACHE_MODE_WP) { - if (new_type) - *new_type = _PAGE_CACHE_MODE_UC_MINUS; - return -EINVAL; - } - - if (req_type == _PAGE_CACHE_MODE_UC) { - /* We do not support strong UC */ - WARN_ON_ONCE(1); - req_type = _PAGE_CACHE_MODE_UC_MINUS; - } - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - enum page_cache_mode type; - - page = pfn_to_page(pfn); - type = get_page_memtype(page); - if (type != _PAGE_CACHE_MODE_WB) { - pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", - start, end - 1, type, req_type); - if (new_type) - *new_type = type; - - return -EBUSY; - } - } - - if (new_type) - *new_type = req_type; - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - set_page_memtype(page, req_type); - } - return 0; -} - -static int free_ram_pages_type(u64 start, u64 end) -{ - struct page *page; - u64 pfn; - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - set_page_memtype(page, _PAGE_CACHE_MODE_WB); - } - return 0; -} - -static u64 sanitize_phys(u64 address) -{ - /* - * When changing the memtype for pages containing poison allow - * for a "decoy" virtual address (bit 63 clear) passed to - * set_memory_X(). __pa() on a "decoy" address results in a - * physical address with bit 63 set. - * - * Decoy addresses are not present for 32-bit builds, see - * set_mce_nospec(). - */ - if (IS_ENABLED(CONFIG_X86_64)) - return address & __PHYSICAL_MASK; - return address; -} - -/* - * req_type typically has one of the: - * - _PAGE_CACHE_MODE_WB - * - _PAGE_CACHE_MODE_WC - * - _PAGE_CACHE_MODE_UC_MINUS - * - _PAGE_CACHE_MODE_UC - * - _PAGE_CACHE_MODE_WT - * - * If new_type is NULL, function will return an error if it cannot reserve the - * region with req_type. If new_type is non-NULL, function will return - * available type in new_type in case of no error. In case of any error - * it will return a negative return value. - */ -int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, - enum page_cache_mode *new_type) -{ - struct memtype *entry_new; - enum page_cache_mode actual_type; - int is_range_ram; - int err = 0; - - start = sanitize_phys(start); - end = sanitize_phys(end); - if (start >= end) { - WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__, - start, end - 1, cattr_name(req_type)); - return -EINVAL; - } - - if (!pat_enabled()) { - /* This is identical to page table setting without PAT */ - if (new_type) - *new_type = req_type; - return 0; - } - - /* Low ISA region is always mapped WB in page table. No need to track */ - if (x86_platform.is_untracked_pat_range(start, end)) { - if (new_type) - *new_type = _PAGE_CACHE_MODE_WB; - return 0; - } - - /* - * Call mtrr_lookup to get the type hint. This is an - * optimization for /dev/mem mmap'ers into WB memory (BIOS - * tools and ACPI tools). Use WB request for WB memory and use - * UC_MINUS otherwise. - */ - actual_type = pat_x_mtrr_type(start, end, req_type); - - if (new_type) - *new_type = actual_type; - - is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = reserve_ram_pages_type(start, end, req_type, new_type); - - return err; - } else if (is_range_ram < 0) { - return -EINVAL; - } - - entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!entry_new) - return -ENOMEM; - - entry_new->start = start; - entry_new->end = end; - entry_new->type = actual_type; - - spin_lock(&memtype_lock); - - err = memtype_check_insert(entry_new, new_type); - if (err) { - pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", - start, end - 1, - cattr_name(entry_new->type), cattr_name(req_type)); - kfree(entry_new); - spin_unlock(&memtype_lock); - - return err; - } - - spin_unlock(&memtype_lock); - - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", - start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), - new_type ? cattr_name(*new_type) : "-"); - - return err; -} - -int free_memtype(u64 start, u64 end) -{ - int is_range_ram; - struct memtype *entry_old; - - if (!pat_enabled()) - return 0; - - start = sanitize_phys(start); - end = sanitize_phys(end); - - /* Low ISA region is always mapped WB. No need to track */ - if (x86_platform.is_untracked_pat_range(start, end)) - return 0; - - is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - if (is_range_ram < 0) - return -EINVAL; - - spin_lock(&memtype_lock); - entry_old = memtype_erase(start, end); - spin_unlock(&memtype_lock); - - if (IS_ERR(entry_old)) { - pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", - current->comm, current->pid, start, end - 1); - return -EINVAL; - } - - kfree(entry_old); - - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); - - return 0; -} - - -/** - * lookup_memtype - Looksup the memory type for a physical address - * @paddr: physical address of which memory type needs to be looked up - * - * Only to be called when PAT is enabled - * - * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS - * or _PAGE_CACHE_MODE_WT. - */ -static enum page_cache_mode lookup_memtype(u64 paddr) -{ - enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB; - struct memtype *entry; - - if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) - return rettype; - - if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { - struct page *page; - - page = pfn_to_page(paddr >> PAGE_SHIFT); - return get_page_memtype(page); - } - - spin_lock(&memtype_lock); - - entry = memtype_lookup(paddr); - if (entry != NULL) - rettype = entry->type; - else - rettype = _PAGE_CACHE_MODE_UC_MINUS; - - spin_unlock(&memtype_lock); - - return rettype; -} - -/** - * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type - * of @pfn cannot be overridden by UC MTRR memory type. - * - * Only to be called when PAT is enabled. - * - * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC. - * Returns false in other cases. - */ -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) -{ - enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn)); - - return cm == _PAGE_CACHE_MODE_UC || - cm == _PAGE_CACHE_MODE_UC_MINUS || - cm == _PAGE_CACHE_MODE_WC; -} -EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); - -/** - * io_reserve_memtype - Request a memory type mapping for a region of memory - * @start: start (physical address) of the region - * @end: end (physical address) of the region - * @type: A pointer to memtype, with requested type. On success, requested - * or any other compatible type that was available for the region is returned - * - * On success, returns 0 - * On failure, returns non-zero - */ -int io_reserve_memtype(resource_size_t start, resource_size_t end, - enum page_cache_mode *type) -{ - resource_size_t size = end - start; - enum page_cache_mode req_type = *type; - enum page_cache_mode new_type; - int ret; - - WARN_ON_ONCE(iomem_map_sanity_check(start, size)); - - ret = reserve_memtype(start, end, req_type, &new_type); - if (ret) - goto out_err; - - if (!is_new_memtype_allowed(start, size, req_type, new_type)) - goto out_free; - - if (kernel_map_sync_memtype(start, size, new_type) < 0) - goto out_free; - - *type = new_type; - return 0; - -out_free: - free_memtype(start, end); - ret = -EBUSY; -out_err: - return ret; -} - -/** - * io_free_memtype - Release a memory type mapping for a region of memory - * @start: start (physical address) of the region - * @end: end (physical address) of the region - */ -void io_free_memtype(resource_size_t start, resource_size_t end) -{ - free_memtype(start, end); -} - -int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) -{ - enum page_cache_mode type = _PAGE_CACHE_MODE_WC; - - return io_reserve_memtype(start, start + size, &type); -} -EXPORT_SYMBOL(arch_io_reserve_memtype_wc); - -void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) -{ - io_free_memtype(start, start + size); -} -EXPORT_SYMBOL(arch_io_free_memtype_wc); - -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) -{ - if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size)) - vma_prot = pgprot_decrypted(vma_prot); - - return vma_prot; -} - -#ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - return 1; -} -#else -/* This check is needed to avoid cache aliasing when PAT is enabled */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - u64 from = ((u64)pfn) << PAGE_SHIFT; - u64 to = from + size; - u64 cursor = from; - - if (!pat_enabled()) - return 1; - - while (cursor < to) { - if (!devmem_is_allowed(pfn)) - return 0; - cursor += PAGE_SIZE; - pfn++; - } - return 1; -} -#endif /* CONFIG_STRICT_DEVMEM */ - -int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t *vma_prot) -{ - enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB; - - if (!range_is_allowed(pfn, size)) - return 0; - - if (file->f_flags & O_DSYNC) - pcm = _PAGE_CACHE_MODE_UC_MINUS; - - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | - cachemode2protval(pcm)); - return 1; -} - -/* - * Change the memory type for the physical address range in kernel identity - * mapping space if that range is a part of identity map. - */ -int kernel_map_sync_memtype(u64 base, unsigned long size, - enum page_cache_mode pcm) -{ - unsigned long id_sz; - - if (base > __pa(high_memory-1)) - return 0; - - /* - * Some areas in the middle of the kernel identity range - * are not mapped, for example the PCI space. - */ - if (!page_is_ram(base >> PAGE_SHIFT)) - return 0; - - id_sz = (__pa(high_memory-1) <= base + size) ? - __pa(high_memory) - base : size; - - if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { - pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", - current->comm, current->pid, - cattr_name(pcm), - base, (unsigned long long)(base + size-1)); - return -EINVAL; - } - return 0; -} - -/* - * Internal interface to reserve a range of physical memory with prot. - * Reserved non RAM regions only and after successful reserve_memtype, - * this func also keeps identity mapping (if any) in sync with this new prot. - */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, - int strict_prot) -{ - int is_ram = 0; - int ret; - enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot); - enum page_cache_mode pcm = want_pcm; - - is_ram = pat_pagerange_is_ram(paddr, paddr + size); - - /* - * reserve_pfn_range() for RAM pages. We do not refcount to keep - * track of number of mappings of RAM pages. We can assert that - * the type requested matches the type of first page in the range. - */ - if (is_ram) { - if (!pat_enabled()) - return 0; - - pcm = lookup_memtype(paddr); - if (want_pcm != pcm) { - pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", - current->comm, current->pid, - cattr_name(want_pcm), - (unsigned long long)paddr, - (unsigned long long)(paddr + size - 1), - cattr_name(pcm)); - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); - } - return 0; - } - - ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm); - if (ret) - return ret; - - if (pcm != want_pcm) { - if (strict_prot || - !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { - free_memtype(paddr, paddr + size); - pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", - current->comm, current->pid, - cattr_name(want_pcm), - (unsigned long long)paddr, - (unsigned long long)(paddr + size - 1), - cattr_name(pcm)); - return -EINVAL; - } - /* - * We allow returning different type than the one requested in - * non strict case. - */ - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); - } - - if (kernel_map_sync_memtype(paddr, size, pcm) < 0) { - free_memtype(paddr, paddr + size); - return -EINVAL; - } - return 0; -} - -/* - * Internal interface to free a range of physical memory. - * Frees non RAM regions only. - */ -static void free_pfn_range(u64 paddr, unsigned long size) -{ - int is_ram; - - is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram == 0) - free_memtype(paddr, paddr + size); -} - -/* - * track_pfn_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - * - * If the vma has a linear pfn mapping for the entire range, we get the prot - * from pte and reserve the entire vma range with single reserve_pfn_range call. - */ -int track_pfn_copy(struct vm_area_struct *vma) -{ - resource_size_t paddr; - unsigned long prot; - unsigned long vma_size = vma->vm_end - vma->vm_start; - pgprot_t pgprot; - - if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); - return -EINVAL; - } - pgprot = __pgprot(prot); - return reserve_pfn_range(paddr, vma_size, &pgprot, 1); - } - - return 0; -} - -/* - * prot is passed in as a parameter for the new mapping. If the vma has - * a linear pfn mapping for the entire range, or no vma is provided, - * reserve the entire pfn + size range with single reserve_pfn_range - * call. - */ -int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, unsigned long size) -{ - resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; - enum page_cache_mode pcm; - - /* reserve the whole chunk starting from paddr */ - if (!vma || (addr == vma->vm_start - && size == (vma->vm_end - vma->vm_start))) { - int ret; - - ret = reserve_pfn_range(paddr, size, prot, 0); - if (ret == 0 && vma) - vma->vm_flags |= VM_PAT; - return ret; - } - - if (!pat_enabled()) - return 0; - - /* - * For anything smaller than the vma size we set prot based on the - * lookup. - */ - pcm = lookup_memtype(paddr); - - /* Check memtype for the remaining pages */ - while (size > PAGE_SIZE) { - size -= PAGE_SIZE; - paddr += PAGE_SIZE; - if (pcm != lookup_memtype(paddr)) - return -EINVAL; - } - - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); - - return 0; -} - -void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) -{ - enum page_cache_mode pcm; - - if (!pat_enabled()) - return; - - /* Set prot based on lookup */ - pcm = lookup_memtype(pfn_t_to_phys(pfn)); - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); -} - -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). - */ -void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) -{ - resource_size_t paddr; - unsigned long prot; - - if (vma && !(vma->vm_flags & VM_PAT)) - return; - - /* free the chunk starting from pfn or the whole chunk */ - paddr = (resource_size_t)pfn << PAGE_SHIFT; - if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); - return; - } - - size = vma->vm_end - vma->vm_start; - } - free_pfn_range(paddr, size); - if (vma) - vma->vm_flags &= ~VM_PAT; -} - -/* - * untrack_pfn_moved is called, while mremapping a pfnmap for a new region, - * with the old vma after its pfnmap page table has been removed. The new - * vma has a new pfnmap to the same pfn & cache type with VM_PAT set. - */ -void untrack_pfn_moved(struct vm_area_struct *vma) -{ - vma->vm_flags &= ~VM_PAT; -} - -pgprot_t pgprot_writecombine(pgprot_t prot) -{ - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WC)); -} -EXPORT_SYMBOL_GPL(pgprot_writecombine); - -pgprot_t pgprot_writethrough(pgprot_t prot) -{ - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WT)); -} -EXPORT_SYMBOL_GPL(pgprot_writethrough); - -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) - -/* - * We are allocating a temporary printout-entry to be passed - * between seq_start()/next() and seq_show(): - */ -static struct memtype *memtype_get_idx(loff_t pos) -{ - struct memtype *entry_print; - int ret; - - entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!entry_print) - return NULL; - - spin_lock(&memtype_lock); - ret = memtype_copy_nth_element(entry_print, pos); - spin_unlock(&memtype_lock); - - /* Free it on error: */ - if (ret) { - kfree(entry_print); - return NULL; - } - - return entry_print; -} - -static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) { - ++*pos; - seq_puts(seq, "PAT memtype list:\n"); - } - - return memtype_get_idx(*pos); -} - -static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return memtype_get_idx(*pos); -} - -static void memtype_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int memtype_seq_show(struct seq_file *seq, void *v) -{ - struct memtype *entry_print = (struct memtype *)v; - - seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", - entry_print->start, - entry_print->end, - cattr_name(entry_print->type)); - - kfree(entry_print); - - return 0; -} - -static const struct seq_operations memtype_seq_ops = { - .start = memtype_seq_start, - .next = memtype_seq_next, - .stop = memtype_seq_stop, - .show = memtype_seq_show, -}; - -static int memtype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &memtype_seq_ops); -} - -static const struct file_operations memtype_fops = { - .open = memtype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int __init pat_memtype_list_init(void) -{ - if (pat_enabled()) { - debugfs_create_file("pat_memtype_list", S_IRUSR, - arch_debugfs_dir, NULL, &memtype_fops); - } - return 0; -} -late_initcall(pat_memtype_list_init); - -#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile new file mode 100644 index 000000000000..ea464c995161 --- /dev/null +++ b/arch/x86/mm/pat/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := set_memory.o memtype.o + +obj-$(CONFIG_X86_PAT) += memtype_interval.o diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c new file mode 100644 index 000000000000..facce271e8b9 --- /dev/null +++ b/arch/x86/mm/pat/cpa-test.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * self test for change_page_attr. + * + * Clears the a test pte bit on random pages in the direct mapping, + * then reverts and compares page tables forwards and afterwards. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Only print the results of the first pass: + */ +static __read_mostly int print = 1; + +enum { + NTEST = 3 * 100, + NPAGES = 100, +#ifdef CONFIG_X86_64 + LPS = (1 << PMD_SHIFT), +#elif defined(CONFIG_X86_PAE) + LPS = (1 << PMD_SHIFT), +#else + LPS = (1 << 22), +#endif + GPS = (1<<30) +}; + +#define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST) + +static int pte_testbit(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SOFTW1; +} + +struct split_state { + long lpg, gpg, spg, exec; + long min_exec, max_exec; +}; + +static int print_split(struct split_state *s) +{ + long i, expected, missed = 0; + int err = 0; + + s->lpg = s->gpg = s->spg = s->exec = 0; + s->min_exec = ~0UL; + s->max_exec = 0; + for (i = 0; i < max_pfn_mapped; ) { + unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT); + unsigned int level; + pte_t *pte; + + pte = lookup_address(addr, &level); + if (!pte) { + missed++; + i++; + continue; + } + + if (level == PG_LEVEL_1G && sizeof(long) == 8) { + s->gpg++; + i += GPS/PAGE_SIZE; + } else if (level == PG_LEVEL_2M) { + if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) { + printk(KERN_ERR + "%lx level %d but not PSE %Lx\n", + addr, level, (u64)pte_val(*pte)); + err = 1; + } + s->lpg++; + i += LPS/PAGE_SIZE; + } else { + s->spg++; + i++; + } + if (!(pte_val(*pte) & _PAGE_NX)) { + s->exec++; + if (addr < s->min_exec) + s->min_exec = addr; + if (addr > s->max_exec) + s->max_exec = addr; + } + } + if (print) { + printk(KERN_INFO + " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", + s->spg, s->lpg, s->gpg, s->exec, + s->min_exec != ~0UL ? s->min_exec : 0, + s->max_exec, missed); + } + + expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; + if (expected != i) { + printk(KERN_ERR "CPA max_pfn_mapped %lu but expected %lu\n", + max_pfn_mapped, expected); + return 1; + } + return err; +} + +static unsigned long addr[NTEST]; +static unsigned int len[NTEST]; + +static struct page *pages[NPAGES]; +static unsigned long addrs[NPAGES]; + +/* Change the global bit on random pages in the direct mapping */ +static int pageattr_test(void) +{ + struct split_state sa, sb, sc; + unsigned long *bm; + pte_t *pte, pte0; + int failed = 0; + unsigned int level; + int i, k; + int err; + + if (print) + printk(KERN_INFO "CPA self-test:\n"); + + bm = vzalloc((max_pfn_mapped + 7) / 8); + if (!bm) { + printk(KERN_ERR "CPA Cannot vmalloc bitmap\n"); + return -ENOMEM; + } + + failed += print_split(&sa); + + for (i = 0; i < NTEST; i++) { + unsigned long pfn = prandom_u32() % max_pfn_mapped; + + addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); + len[i] = prandom_u32() % NPAGES; + len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); + + if (len[i] == 0) + len[i] = 1; + + pte = NULL; + pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */ + + for (k = 0; k < len[i]; k++) { + pte = lookup_address(addr[i] + k*PAGE_SIZE, &level); + if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 || + !(pte_val(*pte) & _PAGE_PRESENT)) { + addr[i] = 0; + break; + } + if (k == 0) { + pte0 = *pte; + } else { + if (pgprot_val(pte_pgprot(*pte)) != + pgprot_val(pte_pgprot(pte0))) { + len[i] = k; + break; + } + } + if (test_bit(pfn + k, bm)) { + len[i] = k; + break; + } + __set_bit(pfn + k, bm); + addrs[k] = addr[i] + k*PAGE_SIZE; + pages[k] = pfn_to_page(pfn + k); + } + if (!addr[i] || !pte || !k) { + addr[i] = 0; + continue; + } + + switch (i % 3) { + case 0: + err = change_page_attr_set(&addr[i], len[i], PAGE_CPA_TEST, 0); + break; + + case 1: + err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + break; + + case 2: + err = cpa_set_pages_array(pages, len[i], PAGE_CPA_TEST); + break; + } + + + if (err < 0) { + printk(KERN_ERR "CPA %d failed %d\n", i, err); + failed++; + } + + pte = lookup_address(addr[i], &level); + if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) { + printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i], + pte ? (u64)pte_val(*pte) : 0ULL); + failed++; + } + if (level != PG_LEVEL_4K) { + printk(KERN_ERR "CPA %lx: unexpected level %d\n", + addr[i], level); + failed++; + } + + } + vfree(bm); + + failed += print_split(&sb); + + for (i = 0; i < NTEST; i++) { + if (!addr[i]) + continue; + pte = lookup_address(addr[i], &level); + if (!pte) { + printk(KERN_ERR "CPA lookup of %lx failed\n", addr[i]); + failed++; + continue; + } + err = change_page_attr_clear(&addr[i], len[i], PAGE_CPA_TEST, 0); + if (err < 0) { + printk(KERN_ERR "CPA reverting failed: %d\n", err); + failed++; + } + pte = lookup_address(addr[i], &level); + if (!pte || pte_testbit(*pte)) { + printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n", + addr[i], pte ? (u64)pte_val(*pte) : 0ULL); + failed++; + } + + } + + failed += print_split(&sc); + + if (failed) { + WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); + return -EINVAL; + } else { + if (print) + printk(KERN_INFO "ok.\n"); + } + + return 0; +} + +static int do_pageattr_test(void *__unused) +{ + while (!kthread_should_stop()) { + schedule_timeout_interruptible(HZ*30); + if (pageattr_test() < 0) + break; + if (print) + print--; + } + return 0; +} + +static int start_pageattr_test(void) +{ + struct task_struct *p; + + p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); + if (!IS_ERR(p)) + wake_up_process(p); + else + WARN_ON(1); + + return 0; +} +device_initcall(start_pageattr_test); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c new file mode 100644 index 000000000000..76532f080795 --- /dev/null +++ b/arch/x86/mm/pat/memtype.c @@ -0,0 +1,1219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. + * + * Authors: Venkatesh Pallipadi + * Suresh B Siddha + * + * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + * + * Basic principles: + * + * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and + * the kernel to set one of a handful of 'caching type' attributes for physical + * memory ranges: uncached, write-combining, write-through, write-protected, + * and the most commonly used and default attribute: write-back caching. + * + * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is + * a hardware interface to enumerate a limited number of physical memory ranges + * and set their caching attributes explicitly, programmed into the CPU via MSRs. + * Even modern CPUs have MTRRs enabled - but these are typically not touched + * by the kernel or by user-space (such as the X server), we rely on PAT for any + * additional cache attribute logic. + * + * PAT doesn't work via explicit memory ranges, but uses page table entries to add + * cache attribute information to the mapped memory range: there's 3 bits used, + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the + * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). + * + * ( There's a metric ton of finer details, such as compatibility with CPU quirks + * that only support 4 types of PAT entries, and interaction with MTRRs, see + * below for details. ) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "memtype.h" +#include "../mm_internal.h" + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static bool __read_mostly pat_bp_initialized; +static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __read_mostly pat_bp_enabled; +static bool __read_mostly pat_cm_initialized; + +/* + * PAT support is enabled by default, but can be disabled for + * various user-requested or hardware-forced reasons: + */ +void pat_disable(const char *msg_reason) +{ + if (pat_disabled) + return; + + if (pat_bp_initialized) { + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); + return; + } + + pat_disabled = true; + pr_info("x86/PAT: %s\n", msg_reason); +} + +static int __init nopat(char *str) +{ + pat_disable("PAT support disabled via boot option."); + return 0; +} +early_param("nopat", nopat); + +bool pat_enabled(void) +{ + return pat_bp_enabled; +} +EXPORT_SYMBOL_GPL(pat_enabled); + +int pat_debug_enable; + +static int __init pat_debug_setup(char *str) +{ + pat_debug_enable = 1; + return 0; +} +__setup("debugpat", pat_debug_setup); + +#ifdef CONFIG_X86_PAT +/* + * X86 PAT uses page flags arch_1 and uncached together to keep track of + * memory type of pages that have backing page struct. + * + * X86 PAT supports 4 different memory types: + * - _PAGE_CACHE_MODE_WB + * - _PAGE_CACHE_MODE_WC + * - _PAGE_CACHE_MODE_UC_MINUS + * - _PAGE_CACHE_MODE_WT + * + * _PAGE_CACHE_MODE_WB is the default type. + */ + +#define _PGMT_WB 0 +#define _PGMT_WC (1UL << PG_arch_1) +#define _PGMT_UC_MINUS (1UL << PG_uncached) +#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_CLEAR_MASK (~_PGMT_MASK) + +static inline enum page_cache_mode get_page_memtype(struct page *pg) +{ + unsigned long pg_flags = pg->flags & _PGMT_MASK; + + if (pg_flags == _PGMT_WB) + return _PAGE_CACHE_MODE_WB; + else if (pg_flags == _PGMT_WC) + return _PAGE_CACHE_MODE_WC; + else if (pg_flags == _PGMT_UC_MINUS) + return _PAGE_CACHE_MODE_UC_MINUS; + else + return _PAGE_CACHE_MODE_WT; +} + +static inline void set_page_memtype(struct page *pg, + enum page_cache_mode memtype) +{ + unsigned long memtype_flags; + unsigned long old_flags; + unsigned long new_flags; + + switch (memtype) { + case _PAGE_CACHE_MODE_WC: + memtype_flags = _PGMT_WC; + break; + case _PAGE_CACHE_MODE_UC_MINUS: + memtype_flags = _PGMT_UC_MINUS; + break; + case _PAGE_CACHE_MODE_WT: + memtype_flags = _PGMT_WT; + break; + case _PAGE_CACHE_MODE_WB: + default: + memtype_flags = _PGMT_WB; + break; + } + + do { + old_flags = pg->flags; + new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; + } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); +} +#else +static inline enum page_cache_mode get_page_memtype(struct page *pg) +{ + return -1; +} +static inline void set_page_memtype(struct page *pg, + enum page_cache_mode memtype) +{ +} +#endif + +enum { + PAT_UC = 0, /* uncached */ + PAT_WC = 1, /* Write combining */ + PAT_WT = 4, /* Write Through */ + PAT_WP = 5, /* Write Protected */ + PAT_WB = 6, /* Write Back (default) */ + PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ +}; + +#define CM(c) (_PAGE_CACHE_MODE_ ## c) + +static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) +{ + enum page_cache_mode cache; + char *cache_mode; + + switch (pat_val) { + case PAT_UC: cache = CM(UC); cache_mode = "UC "; break; + case PAT_WC: cache = CM(WC); cache_mode = "WC "; break; + case PAT_WT: cache = CM(WT); cache_mode = "WT "; break; + case PAT_WP: cache = CM(WP); cache_mode = "WP "; break; + case PAT_WB: cache = CM(WB); cache_mode = "WB "; break; + case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; + default: cache = CM(WB); cache_mode = "WB "; break; + } + + memcpy(msg, cache_mode, 4); + + return cache; +} + +#undef CM + +/* + * Update the cache mode to pgprot translation tables according to PAT + * configuration. + * Using lower indices is preferred, so we start with highest index. + */ +static void __init_cache_modes(u64 pat) +{ + enum page_cache_mode cache; + char pat_msg[33]; + int i; + + WARN_ON_ONCE(pat_cm_initialized); + + pat_msg[32] = 0; + for (i = 7; i >= 0; i--) { + cache = pat_get_cache_mode((pat >> (i * 8)) & 7, + pat_msg + 4 * i); + update_cache_mode_entry(i, cache); + } + pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); + + pat_cm_initialized = true; +} + +#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) + +static void pat_bp_init(u64 pat) +{ + u64 tmp_pat; + + if (!boot_cpu_has(X86_FEATURE_PAT)) { + pat_disable("PAT not supported by the CPU."); + return; + } + + rdmsrl(MSR_IA32_CR_PAT, tmp_pat); + if (!tmp_pat) { + pat_disable("PAT support disabled by the firmware."); + return; + } + + wrmsrl(MSR_IA32_CR_PAT, pat); + pat_bp_enabled = true; + + __init_cache_modes(pat); +} + +static void pat_ap_init(u64 pat) +{ + if (!boot_cpu_has(X86_FEATURE_PAT)) { + /* + * If this happens we are on a secondary CPU, but switched to + * PAT on the boot CPU. We have no way to undo PAT. + */ + panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); + } + + wrmsrl(MSR_IA32_CR_PAT, pat); +} + +void init_cache_modes(void) +{ + u64 pat = 0; + + if (pat_cm_initialized) + return; + + if (boot_cpu_has(X86_FEATURE_PAT)) { + /* + * CPU supports PAT. Set PAT table to be consistent with + * PAT MSR. This case supports "nopat" boot option, and + * virtual machine environments which support PAT without + * MTRRs. In specific, Xen has unique setup to PAT MSR. + * + * If PAT MSR returns 0, it is considered invalid and emulates + * as No PAT. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + } + + if (!pat) { + /* + * No PAT. Emulate the PAT table that corresponds to the two + * cache bits, PWT (Write Through) and PCD (Cache Disable). + * This setup is also the same as the BIOS default setup. + * + * PTE encoding: + * + * PCD + * |PWT PAT + * || slot + * 00 0 WB : _PAGE_CACHE_MODE_WB + * 01 1 WT : _PAGE_CACHE_MODE_WT + * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 11 3 UC : _PAGE_CACHE_MODE_UC + * + * NOTE: When WC or WP is used, it is redirected to UC- per + * the default setup in __cachemode2pte_tbl[]. + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } + + __init_cache_modes(pat); +} + +/** + * pat_init - Initialize the PAT MSR and PAT table on the current CPU + * + * This function initializes PAT MSR and PAT table with an OS-defined value + * to enable additional cache attributes, WC, WT and WP. + * + * This function must be called on all CPUs using the specific sequence of + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this + * procedure for PAT. + */ +void pat_init(void) +{ + u64 pat; + struct cpuinfo_x86 *c = &boot_cpu_data; + +#ifndef CONFIG_X86_PAT + pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); +#endif + + if (pat_disabled) + return; + + if ((c->x86_vendor == X86_VENDOR_INTEL) && + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + /* + * PAT support with the lower four entries. Intel Pentium 2, + * 3, M, and 4 are affected by PAT errata, which makes the + * upper four entries unusable. To be on the safe side, we don't + * use those. + * + * PTE encoding: + * PAT + * |PCD + * ||PWT PAT + * ||| slot + * 000 0 WB : _PAGE_CACHE_MODE_WB + * 001 1 WC : _PAGE_CACHE_MODE_WC + * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 011 3 UC : _PAGE_CACHE_MODE_UC + * PAT bit unused + * + * NOTE: When WT or WP is used, it is redirected to UC- per + * the default setup in __cachemode2pte_tbl[]. + */ + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + } else { + /* + * Full PAT support. We put WT in slot 7 to improve + * robustness in the presence of errata that might cause + * the high PAT bit to be ignored. This way, a buggy slot 7 + * access will hit slot 3, and slot 3 is UC, so at worst + * we lose performance without causing a correctness issue. + * Pentium 4 erratum N46 is an example for such an erratum, + * although we try not to use PAT at all on affected CPUs. + * + * PTE encoding: + * PAT + * |PCD + * ||PWT PAT + * ||| slot + * 000 0 WB : _PAGE_CACHE_MODE_WB + * 001 1 WC : _PAGE_CACHE_MODE_WC + * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 011 3 UC : _PAGE_CACHE_MODE_UC + * 100 4 WB : Reserved + * 101 5 WP : _PAGE_CACHE_MODE_WP + * 110 6 UC-: Reserved + * 111 7 WT : _PAGE_CACHE_MODE_WT + * + * The reserved slots are unused, but mapped to their + * corresponding types in the presence of PAT errata. + */ + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); + } + + if (!pat_bp_initialized) { + pat_bp_init(pat); + pat_bp_initialized = true; + } else { + pat_ap_init(pat); + } +} + +#undef PAT + +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ + +/* + * Does intersection of PAT memory type and MTRR memory type and returns + * the resulting memory type as PAT understands it. + * (Type in pat and mtrr will not have same value) + * The intersection is based on "Effective Memory Type" tables in IA-32 + * SDM vol 3a + */ +static unsigned long pat_x_mtrr_type(u64 start, u64 end, + enum page_cache_mode req_type) +{ + /* + * Look for MTRR hint to get the effective type in case where PAT + * request is for WB. + */ + if (req_type == _PAGE_CACHE_MODE_WB) { + u8 mtrr_type, uniform; + + mtrr_type = mtrr_type_lookup(start, end, &uniform); + if (mtrr_type != MTRR_TYPE_WRBACK) + return _PAGE_CACHE_MODE_UC_MINUS; + + return _PAGE_CACHE_MODE_WB; + } + + return req_type; +} + +struct pagerange_state { + unsigned long cur_pfn; + int ram; + int not_ram; +}; + +static int +pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) +{ + struct pagerange_state *state = arg; + + state->not_ram |= initial_pfn > state->cur_pfn; + state->ram |= total_nr_pages > 0; + state->cur_pfn = initial_pfn + total_nr_pages; + + return state->ram && state->not_ram; +} + +static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) +{ + int ret = 0; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct pagerange_state state = {start_pfn, 0, 0}; + + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) + start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; + + if (start_pfn < end_pfn) { + ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, + &state, pagerange_is_ram_callback); + } + + return (ret > 0) ? -1 : (state.ram ? 1 : 0); +} + +/* + * For RAM pages, we use page flags to mark the pages with appropriate type. + * The page flags are limited to four types, WB (default), WC, WT and UC-. + * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting + * a new memory type is only allowed for a page mapped with the default WB + * type. + * + * Here we do two passes: + * - Find the memtype of all the pages in the range, look for any conflicts. + * - In case of no conflicts, set the new memtype for pages in the range. + */ +static int reserve_ram_pages_type(u64 start, u64 end, + enum page_cache_mode req_type, + enum page_cache_mode *new_type) +{ + struct page *page; + u64 pfn; + + if (req_type == _PAGE_CACHE_MODE_WP) { + if (new_type) + *new_type = _PAGE_CACHE_MODE_UC_MINUS; + return -EINVAL; + } + + if (req_type == _PAGE_CACHE_MODE_UC) { + /* We do not support strong UC */ + WARN_ON_ONCE(1); + req_type = _PAGE_CACHE_MODE_UC_MINUS; + } + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { + enum page_cache_mode type; + + page = pfn_to_page(pfn); + type = get_page_memtype(page); + if (type != _PAGE_CACHE_MODE_WB) { + pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", + start, end - 1, type, req_type); + if (new_type) + *new_type = type; + + return -EBUSY; + } + } + + if (new_type) + *new_type = req_type; + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { + page = pfn_to_page(pfn); + set_page_memtype(page, req_type); + } + return 0; +} + +static int free_ram_pages_type(u64 start, u64 end) +{ + struct page *page; + u64 pfn; + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { + page = pfn_to_page(pfn); + set_page_memtype(page, _PAGE_CACHE_MODE_WB); + } + return 0; +} + +static u64 sanitize_phys(u64 address) +{ + /* + * When changing the memtype for pages containing poison allow + * for a "decoy" virtual address (bit 63 clear) passed to + * set_memory_X(). __pa() on a "decoy" address results in a + * physical address with bit 63 set. + * + * Decoy addresses are not present for 32-bit builds, see + * set_mce_nospec(). + */ + if (IS_ENABLED(CONFIG_X86_64)) + return address & __PHYSICAL_MASK; + return address; +} + +/* + * req_type typically has one of the: + * - _PAGE_CACHE_MODE_WB + * - _PAGE_CACHE_MODE_WC + * - _PAGE_CACHE_MODE_UC_MINUS + * - _PAGE_CACHE_MODE_UC + * - _PAGE_CACHE_MODE_WT + * + * If new_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If new_type is non-NULL, function will return + * available type in new_type in case of no error. In case of any error + * it will return a negative return value. + */ +int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, + enum page_cache_mode *new_type) +{ + struct memtype *entry_new; + enum page_cache_mode actual_type; + int is_range_ram; + int err = 0; + + start = sanitize_phys(start); + end = sanitize_phys(end); + if (start >= end) { + WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__, + start, end - 1, cattr_name(req_type)); + return -EINVAL; + } + + if (!pat_enabled()) { + /* This is identical to page table setting without PAT */ + if (new_type) + *new_type = req_type; + return 0; + } + + /* Low ISA region is always mapped WB in page table. No need to track */ + if (x86_platform.is_untracked_pat_range(start, end)) { + if (new_type) + *new_type = _PAGE_CACHE_MODE_WB; + return 0; + } + + /* + * Call mtrr_lookup to get the type hint. This is an + * optimization for /dev/mem mmap'ers into WB memory (BIOS + * tools and ACPI tools). Use WB request for WB memory and use + * UC_MINUS otherwise. + */ + actual_type = pat_x_mtrr_type(start, end, req_type); + + if (new_type) + *new_type = actual_type; + + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) { + + err = reserve_ram_pages_type(start, end, req_type, new_type); + + return err; + } else if (is_range_ram < 0) { + return -EINVAL; + } + + entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_new) + return -ENOMEM; + + entry_new->start = start; + entry_new->end = end; + entry_new->type = actual_type; + + spin_lock(&memtype_lock); + + err = memtype_check_insert(entry_new, new_type); + if (err) { + pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + start, end - 1, + cattr_name(entry_new->type), cattr_name(req_type)); + kfree(entry_new); + spin_unlock(&memtype_lock); + + return err; + } + + spin_unlock(&memtype_lock); + + dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", + start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), + new_type ? cattr_name(*new_type) : "-"); + + return err; +} + +int free_memtype(u64 start, u64 end) +{ + int is_range_ram; + struct memtype *entry_old; + + if (!pat_enabled()) + return 0; + + start = sanitize_phys(start); + end = sanitize_phys(end); + + /* Low ISA region is always mapped WB. No need to track */ + if (x86_platform.is_untracked_pat_range(start, end)) + return 0; + + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + if (is_range_ram < 0) + return -EINVAL; + + spin_lock(&memtype_lock); + entry_old = memtype_erase(start, end); + spin_unlock(&memtype_lock); + + if (IS_ERR(entry_old)) { + pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", + current->comm, current->pid, start, end - 1); + return -EINVAL; + } + + kfree(entry_old); + + dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); + + return 0; +} + + +/** + * lookup_memtype - Looksup the memory type for a physical address + * @paddr: physical address of which memory type needs to be looked up + * + * Only to be called when PAT is enabled + * + * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS + * or _PAGE_CACHE_MODE_WT. + */ +static enum page_cache_mode lookup_memtype(u64 paddr) +{ + enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB; + struct memtype *entry; + + if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) + return rettype; + + if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { + struct page *page; + + page = pfn_to_page(paddr >> PAGE_SHIFT); + return get_page_memtype(page); + } + + spin_lock(&memtype_lock); + + entry = memtype_lookup(paddr); + if (entry != NULL) + rettype = entry->type; + else + rettype = _PAGE_CACHE_MODE_UC_MINUS; + + spin_unlock(&memtype_lock); + + return rettype; +} + +/** + * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type + * of @pfn cannot be overridden by UC MTRR memory type. + * + * Only to be called when PAT is enabled. + * + * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC. + * Returns false in other cases. + */ +bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) +{ + enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn)); + + return cm == _PAGE_CACHE_MODE_UC || + cm == _PAGE_CACHE_MODE_UC_MINUS || + cm == _PAGE_CACHE_MODE_WC; +} +EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); + +/** + * io_reserve_memtype - Request a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + * @type: A pointer to memtype, with requested type. On success, requested + * or any other compatible type that was available for the region is returned + * + * On success, returns 0 + * On failure, returns non-zero + */ +int io_reserve_memtype(resource_size_t start, resource_size_t end, + enum page_cache_mode *type) +{ + resource_size_t size = end - start; + enum page_cache_mode req_type = *type; + enum page_cache_mode new_type; + int ret; + + WARN_ON_ONCE(iomem_map_sanity_check(start, size)); + + ret = reserve_memtype(start, end, req_type, &new_type); + if (ret) + goto out_err; + + if (!is_new_memtype_allowed(start, size, req_type, new_type)) + goto out_free; + + if (kernel_map_sync_memtype(start, size, new_type) < 0) + goto out_free; + + *type = new_type; + return 0; + +out_free: + free_memtype(start, end); + ret = -EBUSY; +out_err: + return ret; +} + +/** + * io_free_memtype - Release a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + */ +void io_free_memtype(resource_size_t start, resource_size_t end) +{ + free_memtype(start, end); +} + +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size)) + vma_prot = pgprot_decrypted(vma_prot); + + return vma_prot; +} + +#ifdef CONFIG_STRICT_DEVMEM +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#else +/* This check is needed to avoid cache aliasing when PAT is enabled */ +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + if (!pat_enabled()) + return 1; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#endif /* CONFIG_STRICT_DEVMEM */ + +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot) +{ + enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB; + + if (!range_is_allowed(pfn, size)) + return 0; + + if (file->f_flags & O_DSYNC) + pcm = _PAGE_CACHE_MODE_UC_MINUS; + + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | + cachemode2protval(pcm)); + return 1; +} + +/* + * Change the memory type for the physical address range in kernel identity + * mapping space if that range is a part of identity map. + */ +int kernel_map_sync_memtype(u64 base, unsigned long size, + enum page_cache_mode pcm) +{ + unsigned long id_sz; + + if (base > __pa(high_memory-1)) + return 0; + + /* + * Some areas in the middle of the kernel identity range + * are not mapped, for example the PCI space. + */ + if (!page_is_ram(base >> PAGE_SHIFT)) + return 0; + + id_sz = (__pa(high_memory-1) <= base + size) ? + __pa(high_memory) - base : size; + + if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { + pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", + current->comm, current->pid, + cattr_name(pcm), + base, (unsigned long long)(base + size-1)); + return -EINVAL; + } + return 0; +} + +/* + * Internal interface to reserve a range of physical memory with prot. + * Reserved non RAM regions only and after successful reserve_memtype, + * this func also keeps identity mapping (if any) in sync with this new prot. + */ +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, + int strict_prot) +{ + int is_ram = 0; + int ret; + enum page_cache_mode want_pcm = pgprot2cachemode(*vma_prot); + enum page_cache_mode pcm = want_pcm; + + is_ram = pat_pagerange_is_ram(paddr, paddr + size); + + /* + * reserve_pfn_range() for RAM pages. We do not refcount to keep + * track of number of mappings of RAM pages. We can assert that + * the type requested matches the type of first page in the range. + */ + if (is_ram) { + if (!pat_enabled()) + return 0; + + pcm = lookup_memtype(paddr); + if (want_pcm != pcm) { + pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", + current->comm, current->pid, + cattr_name(want_pcm), + (unsigned long long)paddr, + (unsigned long long)(paddr + size - 1), + cattr_name(pcm)); + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + cachemode2protval(pcm)); + } + return 0; + } + + ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm); + if (ret) + return ret; + + if (pcm != want_pcm) { + if (strict_prot || + !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { + free_memtype(paddr, paddr + size); + pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", + current->comm, current->pid, + cattr_name(want_pcm), + (unsigned long long)paddr, + (unsigned long long)(paddr + size - 1), + cattr_name(pcm)); + return -EINVAL; + } + /* + * We allow returning different type than the one requested in + * non strict case. + */ + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + cachemode2protval(pcm)); + } + + if (kernel_map_sync_memtype(paddr, size, pcm) < 0) { + free_memtype(paddr, paddr + size); + return -EINVAL; + } + return 0; +} + +/* + * Internal interface to free a range of physical memory. + * Frees non RAM regions only. + */ +static void free_pfn_range(u64 paddr, unsigned long size) +{ + int is_ram; + + is_ram = pat_pagerange_is_ram(paddr, paddr + size); + if (is_ram == 0) + free_memtype(paddr, paddr + size); +} + +/* + * track_pfn_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). + * + * If the vma has a linear pfn mapping for the entire range, we get the prot + * from pte and reserve the entire vma range with single reserve_pfn_range call. + */ +int track_pfn_copy(struct vm_area_struct *vma) +{ + resource_size_t paddr; + unsigned long prot; + unsigned long vma_size = vma->vm_end - vma->vm_start; + pgprot_t pgprot; + + if (vma->vm_flags & VM_PAT) { + /* + * reserve the whole chunk covered by vma. We need the + * starting address and protection from pte. + */ + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { + WARN_ON_ONCE(1); + return -EINVAL; + } + pgprot = __pgprot(prot); + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); + } + + return 0; +} + +/* + * prot is passed in as a parameter for the new mapping. If the vma has + * a linear pfn mapping for the entire range, or no vma is provided, + * reserve the entire pfn + size range with single reserve_pfn_range + * call. + */ +int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn, unsigned long addr, unsigned long size) +{ + resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; + enum page_cache_mode pcm; + + /* reserve the whole chunk starting from paddr */ + if (!vma || (addr == vma->vm_start + && size == (vma->vm_end - vma->vm_start))) { + int ret; + + ret = reserve_pfn_range(paddr, size, prot, 0); + if (ret == 0 && vma) + vma->vm_flags |= VM_PAT; + return ret; + } + + if (!pat_enabled()) + return 0; + + /* + * For anything smaller than the vma size we set prot based on the + * lookup. + */ + pcm = lookup_memtype(paddr); + + /* Check memtype for the remaining pages */ + while (size > PAGE_SIZE) { + size -= PAGE_SIZE; + paddr += PAGE_SIZE; + if (pcm != lookup_memtype(paddr)) + return -EINVAL; + } + + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | + cachemode2protval(pcm)); + + return 0; +} + +void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) +{ + enum page_cache_mode pcm; + + if (!pat_enabled()) + return; + + /* Set prot based on lookup */ + pcm = lookup_memtype(pfn_t_to_phys(pfn)); + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | + cachemode2protval(pcm)); +} + +/* + * untrack_pfn is called while unmapping a pfnmap for a region. + * untrack can be called for a specific region indicated by pfn and size or + * can be for the entire vma (in which case pfn, size are zero). + */ +void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) +{ + resource_size_t paddr; + unsigned long prot; + + if (vma && !(vma->vm_flags & VM_PAT)) + return; + + /* free the chunk starting from pfn or the whole chunk */ + paddr = (resource_size_t)pfn << PAGE_SHIFT; + if (!paddr && !size) { + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { + WARN_ON_ONCE(1); + return; + } + + size = vma->vm_end - vma->vm_start; + } + free_pfn_range(paddr, size); + if (vma) + vma->vm_flags &= ~VM_PAT; +} + +/* + * untrack_pfn_moved is called, while mremapping a pfnmap for a new region, + * with the old vma after its pfnmap page table has been removed. The new + * vma has a new pfnmap to the same pfn & cache type with VM_PAT set. + */ +void untrack_pfn_moved(struct vm_area_struct *vma) +{ + vma->vm_flags &= ~VM_PAT; +} + +pgprot_t pgprot_writecombine(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WC)); +} +EXPORT_SYMBOL_GPL(pgprot_writecombine); + +pgprot_t pgprot_writethrough(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); +} +EXPORT_SYMBOL_GPL(pgprot_writethrough); + +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) + +/* + * We are allocating a temporary printout-entry to be passed + * between seq_start()/next() and seq_show(): + */ +static struct memtype *memtype_get_idx(loff_t pos) +{ + struct memtype *entry_print; + int ret; + + entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_print) + return NULL; + + spin_lock(&memtype_lock); + ret = memtype_copy_nth_element(entry_print, pos); + spin_unlock(&memtype_lock); + + /* Free it on error: */ + if (ret) { + kfree(entry_print); + return NULL; + } + + return entry_print; +} + +static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) { + ++*pos; + seq_puts(seq, "PAT memtype list:\n"); + } + + return memtype_get_idx(*pos); +} + +static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return memtype_get_idx(*pos); +} + +static void memtype_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int memtype_seq_show(struct seq_file *seq, void *v) +{ + struct memtype *entry_print = (struct memtype *)v; + + seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", + entry_print->start, + entry_print->end, + cattr_name(entry_print->type)); + + kfree(entry_print); + + return 0; +} + +static const struct seq_operations memtype_seq_ops = { + .start = memtype_seq_start, + .next = memtype_seq_next, + .stop = memtype_seq_stop, + .show = memtype_seq_show, +}; + +static int memtype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &memtype_seq_ops); +} + +static const struct file_operations memtype_fops = { + .open = memtype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init pat_memtype_list_init(void) +{ + if (pat_enabled()) { + debugfs_create_file("pat_memtype_list", S_IRUSR, + arch_debugfs_dir, NULL, &memtype_fops); + } + return 0; +} +late_initcall(pat_memtype_list_init); + +#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/arch/x86/mm/pat/memtype.h b/arch/x86/mm/pat/memtype.h new file mode 100644 index 000000000000..cacecdbceb55 --- /dev/null +++ b/arch/x86/mm/pat/memtype.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __MEMTYPE_H_ +#define __MEMTYPE_H_ + +extern int pat_debug_enable; + +#define dprintk(fmt, arg...) \ + do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0) + +struct memtype { + u64 start; + u64 end; + u64 subtree_max_end; + enum page_cache_mode type; + struct rb_node rb; +}; + +static inline char *cattr_name(enum page_cache_mode pcm) +{ + switch (pcm) { + case _PAGE_CACHE_MODE_UC: return "uncached"; + case _PAGE_CACHE_MODE_UC_MINUS: return "uncached-minus"; + case _PAGE_CACHE_MODE_WB: return "write-back"; + case _PAGE_CACHE_MODE_WC: return "write-combining"; + case _PAGE_CACHE_MODE_WT: return "write-through"; + case _PAGE_CACHE_MODE_WP: return "write-protected"; + default: return "broken"; + } +} + +#ifdef CONFIG_X86_PAT +extern int memtype_check_insert(struct memtype *entry_new, + enum page_cache_mode *new_type); +extern struct memtype *memtype_erase(u64 start, u64 end); +extern struct memtype *memtype_lookup(u64 addr); +extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos); +#else +static inline int memtype_check_insert(struct memtype *entry_new, + enum page_cache_mode *new_type) +{ return 0; } +static inline struct memtype *memtype_erase(u64 start, u64 end) +{ return NULL; } +static inline struct memtype *memtype_lookup(u64 addr) +{ return NULL; } +static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ return 0; } +#endif + +#endif /* __MEMTYPE_H_ */ diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c new file mode 100644 index 000000000000..a7fbbdd88eeb --- /dev/null +++ b/arch/x86/mm/pat/memtype_interval.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi + * Suresh B Siddha + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "memtype.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) which tree is ordered + * by the starting address. The tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course, as enforced by the PAT logic. + * + * memtype_lock protects the rbtree. + */ + +static inline u64 interval_start(struct memtype *entry) +{ + return entry->start; +} + +static inline u64 interval_end(struct memtype *entry) +{ + return entry->end - 1; +} + +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + interval_start, interval_end, + static, interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *entry_match; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + + while (entry_match != NULL && entry_match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (entry_match->start == start) && (entry_match->end == end)) + return entry_match; + + if ((match_type == MEMTYPE_END_MATCH) && + (entry_match->start < start) && (entry_match->end == end)) + return entry_match; + + entry_match = interval_iter_next(entry_match, start, end-1); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *entry_match; + enum page_cache_mode found_type = reqtype; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + if (entry_match == NULL) + goto success; + + if (entry_match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); + found_type = entry_match->type; + + entry_match = interval_iter_next(entry_match, start, end-1); + while (entry_match) { + if (entry_match->type != found_type) + goto failure; + + entry_match = interval_iter_next(entry_match, start, end-1); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(entry_match->type)); + + return -EBUSY; +} + +int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); + if (err) + return err; + + if (ret_type) + entry_new->type = *ret_type; + + interval_insert(entry_new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *entry_old; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!entry_old) { + entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!entry_old) + return ERR_PTR(-EINVAL); + } + + if (entry_old->start == start) { + /* munmap: erase this node */ + interval_remove(entry_old, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + interval_remove(entry_old, &memtype_rbroot); + entry_old->end = start; + interval_insert(entry_old, &memtype_rbroot); + + return NULL; + } + + return entry_old; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); +} + +/* + * Debugging helper, copy the Nth entry of the tree into a + * a copy for printout. This allows us to print out the tree + * via debugfs, without holding the memtype_lock too long: + */ +#ifdef CONFIG_DEBUG_FS +int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) +{ + struct memtype *entry_match; + int i = 1; + + entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + + while (entry_match && pos != i) { + entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); + i++; + } + + if (entry_match) { /* pos == i */ + *entry_out = *entry_match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c new file mode 100644 index 000000000000..8fbefee6fc6d --- /dev/null +++ b/arch/x86/mm/pat/set_memory.c @@ -0,0 +1,2285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2002 Andi Kleen, SuSE Labs. + * Thanks to Ben LaHaise for precious feedback. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../mm_internal.h" + +/* + * The current flushing context - we pass it instead of 5 arguments: + */ +struct cpa_data { + unsigned long *vaddr; + pgd_t *pgd; + pgprot_t mask_set; + pgprot_t mask_clr; + unsigned long numpages; + unsigned long curpage; + unsigned long pfn; + unsigned int flags; + unsigned int force_split : 1, + force_static_prot : 1; + struct page **pages; +}; + +enum cpa_warn { + CPA_CONFLICT, + CPA_PROTECT, + CPA_DETECT, +}; + +static const int cpa_warn_level = CPA_PROTECT; + +/* + * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) + * using cpa_lock. So that we don't allow any other cpu, with stale large tlb + * entries change the page attribute in parallel to some other cpu + * splitting a large page entry along with changing the attribute. + */ +static DEFINE_SPINLOCK(cpa_lock); + +#define CPA_FLUSHTLB 1 +#define CPA_ARRAY 2 +#define CPA_PAGES_ARRAY 4 +#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ + +#ifdef CONFIG_PROC_FS +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void update_page_count(int level, unsigned long pages) +{ + /* Protect against CPA */ + spin_lock(&pgd_lock); + direct_pages_count[level] += pages; + spin_unlock(&pgd_lock); +} + +static void split_page_count(int level) +{ + if (direct_pages_count[level] == 0) + return; + + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; +} + +void arch_report_meminfo(struct seq_file *m) +{ + seq_printf(m, "DirectMap4k: %8lu kB\n", + direct_pages_count[PG_LEVEL_4K] << 2); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + seq_printf(m, "DirectMap2M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 11); +#else + seq_printf(m, "DirectMap4M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 12); +#endif + if (direct_gbpages) + seq_printf(m, "DirectMap1G: %8lu kB\n", + direct_pages_count[PG_LEVEL_1G] << 20); +} +#else +static inline void split_page_count(int level) { } +#endif + +#ifdef CONFIG_X86_CPA_STATISTICS + +static unsigned long cpa_1g_checked; +static unsigned long cpa_1g_sameprot; +static unsigned long cpa_1g_preserved; +static unsigned long cpa_2m_checked; +static unsigned long cpa_2m_sameprot; +static unsigned long cpa_2m_preserved; +static unsigned long cpa_4k_install; + +static inline void cpa_inc_1g_checked(void) +{ + cpa_1g_checked++; +} + +static inline void cpa_inc_2m_checked(void) +{ + cpa_2m_checked++; +} + +static inline void cpa_inc_4k_install(void) +{ + cpa_4k_install++; +} + +static inline void cpa_inc_lp_sameprot(int level) +{ + if (level == PG_LEVEL_1G) + cpa_1g_sameprot++; + else + cpa_2m_sameprot++; +} + +static inline void cpa_inc_lp_preserved(int level) +{ + if (level == PG_LEVEL_1G) + cpa_1g_preserved++; + else + cpa_2m_preserved++; +} + +static int cpastats_show(struct seq_file *m, void *p) +{ + seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked); + seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot); + seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved); + seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked); + seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot); + seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved); + seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install); + return 0; +} + +static int cpastats_open(struct inode *inode, struct file *file) +{ + return single_open(file, cpastats_show, NULL); +} + +static const struct file_operations cpastats_fops = { + .open = cpastats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init cpa_stats_init(void) +{ + debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL, + &cpastats_fops); + return 0; +} +late_initcall(cpa_stats_init); +#else +static inline void cpa_inc_1g_checked(void) { } +static inline void cpa_inc_2m_checked(void) { } +static inline void cpa_inc_4k_install(void) { } +static inline void cpa_inc_lp_sameprot(int level) { } +static inline void cpa_inc_lp_preserved(int level) { } +#endif + + +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + +static inline int +within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr <= end; +} + +#ifdef CONFIG_X86_64 + +static inline unsigned long highmap_start_pfn(void) +{ + return __pa_symbol(_text) >> PAGE_SHIFT; +} + +static inline unsigned long highmap_end_pfn(void) +{ + /* Do not reference physical address outside the kernel. */ + return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; +} + +static bool __cpa_pfn_in_highmap(unsigned long pfn) +{ + /* + * Kernel text has an alias mapping at a high address, known + * here as "highmap". + */ + return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); +} + +#else + +static bool __cpa_pfn_in_highmap(unsigned long pfn) +{ + /* There is no highmap on 32-bit */ + return false; +} + +#endif + +/* + * See set_mce_nospec(). + * + * Machine check recovery code needs to change cache mode of poisoned pages to + * UC to avoid speculative access logging another error. But passing the + * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a + * speculative access. So we cheat and flip the top bit of the address. This + * works fine for the code that updates the page tables. But at the end of the + * process we need to flush the TLB and cache and the non-canonical address + * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. + * + * But in the common case we already have a canonical address. This code + * will fix the top bit if needed and is a no-op otherwise. + */ +static inline unsigned long fix_addr(unsigned long addr) +{ +#ifdef CONFIG_X86_64 + return (long)(addr << 1) >> 1; +#else + return addr; +#endif +} + +static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) +{ + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[idx]; + + if (unlikely(PageHighMem(page))) + return 0; + + return (unsigned long)page_address(page); + } + + if (cpa->flags & CPA_ARRAY) + return cpa->vaddr[idx]; + + return *cpa->vaddr + idx * PAGE_SIZE; +} + +/* + * Flushing functions + */ + +static void clflush_cache_range_opt(void *vaddr, unsigned int size) +{ + const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; + void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); + void *vend = vaddr + size; + + if (p >= vend) + return; + + for (; p < vend; p += clflush_size) + clflushopt(p); +} + +/** + * clflush_cache_range - flush a cache range with clflush + * @vaddr: virtual start address + * @size: number of bytes to flush + * + * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or + * SFENCE to avoid ordering issues. + */ +void clflush_cache_range(void *vaddr, unsigned int size) +{ + mb(); + clflush_cache_range_opt(vaddr, size); + mb(); +} +EXPORT_SYMBOL_GPL(clflush_cache_range); + +void arch_invalidate_pmem(void *addr, size_t size) +{ + clflush_cache_range(addr, size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); + +static void __cpa_flush_all(void *arg) +{ + unsigned long cache = (unsigned long)arg; + + /* + * Flush all to work around Errata in early athlons regarding + * large page flushing. + */ + __flush_tlb_all(); + + if (cache && boot_cpu_data.x86 >= 4) + wbinvd(); +} + +static void cpa_flush_all(unsigned long cache) +{ + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); + + on_each_cpu(__cpa_flush_all, (void *) cache, 1); +} + +void __cpa_flush_tlb(void *data) +{ + struct cpa_data *cpa = data; + unsigned int i; + + for (i = 0; i < cpa->numpages; i++) + __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); +} + +static void cpa_flush(struct cpa_data *data, int cache) +{ + struct cpa_data *cpa = data; + unsigned int i; + + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); + + if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { + cpa_flush_all(cache); + return; + } + + if (cpa->numpages <= tlb_single_page_flush_ceiling) + on_each_cpu(__cpa_flush_tlb, cpa, 1); + else + flush_tlb_all(); + + if (!cache) + return; + + mb(); + for (i = 0; i < cpa->numpages; i++) { + unsigned long addr = __cpa_addr(cpa, i); + unsigned int level; + + pte_t *pte = lookup_address(addr, &level); + + /* + * Only flush present addresses: + */ + if (pte && (pte_val(*pte) & _PAGE_PRESENT)) + clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); + } + mb(); +} + +static bool overlaps(unsigned long r1_start, unsigned long r1_end, + unsigned long r2_start, unsigned long r2_end) +{ + return (r1_start <= r2_end && r1_end >= r2_start) || + (r2_start <= r1_end && r2_end >= r1_start); +} + +#ifdef CONFIG_PCI_BIOS +/* + * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS + * based config access (CONFIG_PCI_GOBIOS) support. + */ +#define BIOS_PFN PFN_DOWN(BIOS_BEGIN) +#define BIOS_PFN_END PFN_DOWN(BIOS_END - 1) + +static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) +{ + if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END)) + return _PAGE_NX; + return 0; +} +#else +static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) +{ + return 0; +} +#endif + +/* + * The .rodata section needs to be read-only. Using the pfn catches all + * aliases. This also includes __ro_after_init, so do not enforce until + * kernel_set_to_readonly is true. + */ +static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn) +{ + unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata)); + + /* + * Note: __end_rodata is at page aligned and not inclusive, so + * subtract 1 to get the last enforced PFN in the rodata area. + */ + epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1; + + if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro)) + return _PAGE_RW; + return 0; +} + +/* + * Protect kernel text against becoming non executable by forbidding + * _PAGE_NX. This protects only the high kernel mapping (_text -> _etext) + * out of which the kernel actually executes. Do not protect the low + * mapping. + * + * This does not cover __inittext since that is gone after boot. + */ +static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end) +{ + unsigned long t_end = (unsigned long)_etext - 1; + unsigned long t_start = (unsigned long)_text; + + if (overlaps(start, end, t_start, t_end)) + return _PAGE_NX; + return 0; +} + +#if defined(CONFIG_X86_64) +/* + * Once the kernel maps the text as RO (kernel_set_to_readonly is set), + * kernel text mappings for the large page aligned text, rodata sections + * will be always read-only. For the kernel identity mappings covering the + * holes caused by this alignment can be anything that user asks. + * + * This will preserve the large page mappings for kernel text/data at no + * extra cost. + */ +static pgprotval_t protect_kernel_text_ro(unsigned long start, + unsigned long end) +{ + unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1; + unsigned long t_start = (unsigned long)_text; + unsigned int level; + + if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end)) + return 0; + /* + * Don't enforce the !RW mapping for the kernel text mapping, if + * the current mapping is already using small page mapping. No + * need to work hard to preserve large page mappings in this case. + * + * This also fixes the Linux Xen paravirt guest boot failure caused + * by unexpected read-only mappings for kernel identity + * mappings. In this paravirt guest case, the kernel text mapping + * and the kernel identity mapping share the same page-table pages, + * so the protections for kernel text and identity mappings have to + * be the same. + */ + if (lookup_address(start, &level) && (level != PG_LEVEL_4K)) + return _PAGE_RW; + return 0; +} +#else +static pgprotval_t protect_kernel_text_ro(unsigned long start, + unsigned long end) +{ + return 0; +} +#endif + +static inline bool conflicts(pgprot_t prot, pgprotval_t val) +{ + return (pgprot_val(prot) & ~val) != pgprot_val(prot); +} + +static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val, + unsigned long start, unsigned long end, + unsigned long pfn, const char *txt) +{ + static const char *lvltxt[] = { + [CPA_CONFLICT] = "conflict", + [CPA_PROTECT] = "protect", + [CPA_DETECT] = "detect", + }; + + if (warnlvl > cpa_warn_level || !conflicts(prot, val)) + return; + + pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n", + lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot), + (unsigned long long)val); +} + +/* + * Certain areas of memory on x86 require very specific protection flags, + * for example the BIOS area or kernel text. Callers don't always get this + * right (again, ioremap() on BIOS memory is not uncommon) so this function + * checks and fixes these known static required protection bits. + */ +static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, + unsigned long pfn, unsigned long npg, + unsigned long lpsize, int warnlvl) +{ + pgprotval_t forbidden, res; + unsigned long end; + + /* + * There is no point in checking RW/NX conflicts when the requested + * mapping is setting the page !PRESENT. + */ + if (!(pgprot_val(prot) & _PAGE_PRESENT)) + return prot; + + /* Operate on the virtual address */ + end = start + npg * PAGE_SIZE - 1; + + res = protect_kernel_text(start, end); + check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); + forbidden = res; + + /* + * Special case to preserve a large page. If the change spawns the + * full large page mapping then there is no point to split it + * up. Happens with ftrace and is going to be removed once ftrace + * switched to text_poke(). + */ + if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) { + res = protect_kernel_text_ro(start, end); + check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); + forbidden |= res; + } + + /* Check the PFN directly */ + res = protect_pci_bios(pfn, pfn + npg - 1); + check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX"); + forbidden |= res; + + res = protect_rodata(pfn, pfn + npg - 1); + check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO"); + forbidden |= res; + + return __pgprot(pgprot_val(prot) & ~forbidden); +} + +/* + * Lookup the page table entry for a virtual address in a specific pgd. + * Return a pointer to the entry and the level of the mapping. + */ +pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level) +{ + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + *level = PG_LEVEL_NONE; + + if (pgd_none(*pgd)) + return NULL; + + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d)) + return NULL; + + *level = PG_LEVEL_512G; + if (p4d_large(*p4d) || !p4d_present(*p4d)) + return (pte_t *)p4d; + + pud = pud_offset(p4d, address); + if (pud_none(*pud)) + return NULL; + + *level = PG_LEVEL_1G; + if (pud_large(*pud) || !pud_present(*pud)) + return (pte_t *)pud; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + return NULL; + + *level = PG_LEVEL_2M; + if (pmd_large(*pmd) || !pmd_present(*pmd)) + return (pte_t *)pmd; + + *level = PG_LEVEL_4K; + + return pte_offset_kernel(pmd, address); +} + +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ +pte_t *lookup_address(unsigned long address, unsigned int *level) +{ + return lookup_address_in_pgd(pgd_offset_k(address), address, level); +} +EXPORT_SYMBOL_GPL(lookup_address); + +static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, + unsigned int *level) +{ + if (cpa->pgd) + return lookup_address_in_pgd(cpa->pgd + pgd_index(address), + address, level); + + return lookup_address(address, level); +} + +/* + * Lookup the PMD entry for a virtual address. Return a pointer to the entry + * or NULL if not present. + */ +pmd_t *lookup_pmd_address(unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + return NULL; + + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, address); + if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) + return NULL; + + return pmd_offset(pud, address); +} + +/* + * This is necessary because __pa() does not work on some + * kinds of memory, like vmalloc() or the alloc_remap() + * areas on 32-bit NUMA systems. The percpu areas can + * end up in this kind of memory, for instance. + * + * This could be optimized, but it is only intended to be + * used at inititalization time, and keeping it + * unoptimized should increase the testing coverage for + * the more obscure platforms. + */ +phys_addr_t slow_virt_to_phys(void *__virt_addr) +{ + unsigned long virt_addr = (unsigned long)__virt_addr; + phys_addr_t phys_addr; + unsigned long offset; + enum pg_level level; + pte_t *pte; + + pte = lookup_address(virt_addr, &level); + BUG_ON(!pte); + + /* + * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t + * before being left-shifted PAGE_SHIFT bits -- this trick is to + * make 32-PAE kernel work correctly. + */ + switch (level) { + case PG_LEVEL_1G: + phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PUD_PAGE_MASK; + break; + case PG_LEVEL_2M: + phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PMD_PAGE_MASK; + break; + default: + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; + offset = virt_addr & ~PAGE_MASK; + } + + return (phys_addr_t)(phys_addr | offset); +} +EXPORT_SYMBOL_GPL(slow_virt_to_phys); + +/* + * Set the new pmd in all the pgds we know about: + */ +static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) +{ + /* change init_mm */ + set_pte_atomic(kpte, pte); +#ifdef CONFIG_X86_32 + if (!SHARED_KERNEL_PMD) { + struct page *page; + + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = (pgd_t *)page_address(page) + pgd_index(address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); + pmd = pmd_offset(pud, address); + set_pte_atomic((pte_t *)pmd, pte); + } + } +#endif +} + +static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot) +{ + /* + * _PAGE_GLOBAL means "global page" for present PTEs. + * But, it is also used to indicate _PAGE_PROTNONE + * for non-present PTEs. + * + * This ensures that a _PAGE_GLOBAL PTE going from + * present to non-present is not confused as + * _PAGE_PROTNONE. + */ + if (!(pgprot_val(prot) & _PAGE_PRESENT)) + pgprot_val(prot) &= ~_PAGE_GLOBAL; + + return prot; +} + +static int __should_split_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) +{ + unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn; + pgprot_t old_prot, new_prot, req_prot, chk_prot; + pte_t new_pte, *tmp; + enum pg_level level; + + /* + * Check for races, another CPU might have split this page + * up already: + */ + tmp = _lookup_address_cpa(cpa, address, &level); + if (tmp != kpte) + return 1; + + switch (level) { + case PG_LEVEL_2M: + old_prot = pmd_pgprot(*(pmd_t *)kpte); + old_pfn = pmd_pfn(*(pmd_t *)kpte); + cpa_inc_2m_checked(); + break; + case PG_LEVEL_1G: + old_prot = pud_pgprot(*(pud_t *)kpte); + old_pfn = pud_pfn(*(pud_t *)kpte); + cpa_inc_1g_checked(); + break; + default: + return -EINVAL; + } + + psize = page_level_size(level); + pmask = page_level_mask(level); + + /* + * Calculate the number of pages, which fit into this large + * page starting at address: + */ + lpaddr = (address + psize) & pmask; + numpages = (lpaddr - address) >> PAGE_SHIFT; + if (numpages < cpa->numpages) + cpa->numpages = numpages; + + /* + * We are safe now. Check whether the new pgprot is the same: + * Convert protection attributes to 4k-format, as cpa->mask* are set + * up accordingly. + */ + + /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */ + req_prot = pgprot_large_2_4k(old_prot); + + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); + + /* + * req_prot is in format of 4k pages. It must be converted to large + * page format: the caching mode includes the PAT bit located at + * different bit positions in the two formats. + */ + req_prot = pgprot_4k_2_large(req_prot); + req_prot = pgprot_clear_protnone_bits(req_prot); + if (pgprot_val(req_prot) & _PAGE_PRESENT) + pgprot_val(req_prot) |= _PAGE_PSE; + + /* + * old_pfn points to the large page base pfn. So we need to add the + * offset of the virtual address: + */ + pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT); + cpa->pfn = pfn; + + /* + * Calculate the large page base address and the number of 4K pages + * in the large page + */ + lpaddr = address & pmask; + numpages = psize >> PAGE_SHIFT; + + /* + * Sanity check that the existing mapping is correct versus the static + * protections. static_protections() guards against !PRESENT, so no + * extra conditional required here. + */ + chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, + psize, CPA_CONFLICT); + + if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { + /* + * Split the large page and tell the split code to + * enforce static protections. + */ + cpa->force_static_prot = 1; + return 1; + } + + /* + * Optimization: If the requested pgprot is the same as the current + * pgprot, then the large page can be preserved and no updates are + * required independent of alignment and length of the requested + * range. The above already established that the current pgprot is + * correct, which in consequence makes the requested pgprot correct + * as well if it is the same. The static protection scan below will + * not come to a different conclusion. + */ + if (pgprot_val(req_prot) == pgprot_val(old_prot)) { + cpa_inc_lp_sameprot(level); + return 0; + } + + /* + * If the requested range does not cover the full page, split it up + */ + if (address != lpaddr || cpa->numpages != numpages) + return 1; + + /* + * Check whether the requested pgprot is conflicting with a static + * protection requirement in the large page. + */ + new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, + psize, CPA_DETECT); + + /* + * If there is a conflict, split the large page. + * + * There used to be a 4k wise evaluation trying really hard to + * preserve the large pages, but experimentation has shown, that this + * does not help at all. There might be corner cases which would + * preserve one large page occasionally, but it's really not worth the + * extra code and cycles for the common case. + */ + if (pgprot_val(req_prot) != pgprot_val(new_prot)) + return 1; + + /* All checks passed. Update the large page mapping. */ + new_pte = pfn_pte(old_pfn, new_prot); + __set_pmd_pte(kpte, address, new_pte); + cpa->flags |= CPA_FLUSHTLB; + cpa_inc_lp_preserved(level); + return 0; +} + +static int should_split_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) +{ + int do_split; + + if (cpa->force_split) + return 1; + + spin_lock(&pgd_lock); + do_split = __should_split_large_page(kpte, address, cpa); + spin_unlock(&pgd_lock); + + return do_split; +} + +static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, + pgprot_t ref_prot, unsigned long address, + unsigned long size) +{ + unsigned int npg = PFN_DOWN(size); + pgprot_t prot; + + /* + * If should_split_large_page() discovered an inconsistent mapping, + * remove the invalid protection in the split mapping. + */ + if (!cpa->force_static_prot) + goto set; + + /* Hand in lpsize = 0 to enforce the protection mechanism */ + prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT); + + if (pgprot_val(prot) == pgprot_val(ref_prot)) + goto set; + + /* + * If this is splitting a PMD, fix it up. PUD splits cannot be + * fixed trivially as that would require to rescan the newly + * installed PMD mappings after returning from split_large_page() + * so an eventual further split can allocate the necessary PTE + * pages. Warn for now and revisit it in case this actually + * happens. + */ + if (size == PAGE_SIZE) + ref_prot = prot; + else + pr_warn_once("CPA: Cannot fixup static protections for PUD split\n"); +set: + set_pte(pte, pfn_pte(pfn, ref_prot)); +} + +static int +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, + struct page *base) +{ + unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1; + pte_t *pbase = (pte_t *)page_address(base); + unsigned int i, level; + pgprot_t ref_prot; + pte_t *tmp; + + spin_lock(&pgd_lock); + /* + * Check for races, another CPU might have split this page + * up for us already: + */ + tmp = _lookup_address_cpa(cpa, address, &level); + if (tmp != kpte) { + spin_unlock(&pgd_lock); + return 1; + } + + paravirt_alloc_pte(&init_mm, page_to_pfn(base)); + + switch (level) { + case PG_LEVEL_2M: + ref_prot = pmd_pgprot(*(pmd_t *)kpte); + /* + * Clear PSE (aka _PAGE_PAT) and move + * PAT bit to correct position. + */ + ref_prot = pgprot_large_2_4k(ref_prot); + ref_pfn = pmd_pfn(*(pmd_t *)kpte); + lpaddr = address & PMD_MASK; + lpinc = PAGE_SIZE; + break; + + case PG_LEVEL_1G: + ref_prot = pud_pgprot(*(pud_t *)kpte); + ref_pfn = pud_pfn(*(pud_t *)kpte); + pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + lpaddr = address & PUD_MASK; + lpinc = PMD_SIZE; + /* + * Clear the PSE flags if the PRESENT flag is not set + * otherwise pmd_present/pmd_huge will return true + * even on a non present pmd. + */ + if (!(pgprot_val(ref_prot) & _PAGE_PRESENT)) + pgprot_val(ref_prot) &= ~_PAGE_PSE; + break; + + default: + spin_unlock(&pgd_lock); + return 1; + } + + ref_prot = pgprot_clear_protnone_bits(ref_prot); + + /* + * Get the target pfn from the original entry: + */ + pfn = ref_pfn; + for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc) + split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc); + + if (virt_addr_valid(address)) { + unsigned long pfn = PFN_DOWN(__pa(address)); + + if (pfn_range_is_mapped(pfn, pfn + 1)) + split_page_count(level); + } + + /* + * Install the new, split up pagetable. + * + * We use the standard kernel pagetable protections for the new + * pagetable protections, the actual ptes set above control the + * primary protection behavior: + */ + __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); + + /* + * Do a global flush tlb after splitting the large page + * and before we do the actual change page attribute in the PTE. + * + * Without this, we violate the TLB application note, that says: + * "The TLBs may contain both ordinary and large-page + * translations for a 4-KByte range of linear addresses. This + * may occur if software modifies the paging structures so that + * the page size used for the address range changes. If the two + * translations differ with respect to page frame or attributes + * (e.g., permissions), processor behavior is undefined and may + * be implementation-specific." + * + * We do this global tlb flush inside the cpa_lock, so that we + * don't allow any other cpu, with stale tlb entries change the + * page attribute in parallel, that also falls into the + * just split large page entry. + */ + flush_tlb_all(); + spin_unlock(&pgd_lock); + + return 0; +} + +static int split_large_page(struct cpa_data *cpa, pte_t *kpte, + unsigned long address) +{ + struct page *base; + + if (!debug_pagealloc_enabled()) + spin_unlock(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + if (!debug_pagealloc_enabled()) + spin_lock(&cpa_lock); + if (!base) + return -ENOMEM; + + if (__split_large_page(cpa, kpte, address, base)) + __free_page(base); + + return 0; +} + +static bool try_to_free_pte_page(pte_t *pte) +{ + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) + if (!pte_none(pte[i])) + return false; + + free_page((unsigned long)pte); + return true; +} + +static bool try_to_free_pmd_page(pmd_t *pmd) +{ + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_none(pmd[i])) + return false; + + free_page((unsigned long)pmd); + return true; +} + +static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, start); + + while (start < end) { + set_pte(pte, __pte(0)); + + start += PAGE_SIZE; + pte++; + } + + if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + pmd_clear(pmd); + return true; + } + return false; +} + +static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, + unsigned long start, unsigned long end) +{ + if (unmap_pte_range(pmd, start, end)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} + +static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, start); + + /* + * Not on a 2MB page boundary? + */ + if (start & (PMD_SIZE - 1)) { + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + __unmap_pmd_range(pud, pmd, start, pre_end); + + start = pre_end; + pmd++; + } + + /* + * Try to unmap in 2M chunks. + */ + while (end - start >= PMD_SIZE) { + if (pmd_large(*pmd)) + pmd_clear(pmd); + else + __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); + + start += PMD_SIZE; + pmd++; + } + + /* + * 4K leftovers? + */ + if (start < end) + return __unmap_pmd_range(pud, pmd, start, end); + + /* + * Try again to free the PMD page if haven't succeeded above. + */ + if (!pud_none(*pud)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} + +static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) +{ + pud_t *pud = pud_offset(p4d, start); + + /* + * Not on a GB page boundary? + */ + if (start & (PUD_SIZE - 1)) { + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + unmap_pmd_range(pud, start, pre_end); + + start = pre_end; + pud++; + } + + /* + * Try to unmap in 1G chunks? + */ + while (end - start >= PUD_SIZE) { + + if (pud_large(*pud)) + pud_clear(pud); + else + unmap_pmd_range(pud, start, start + PUD_SIZE); + + start += PUD_SIZE; + pud++; + } + + /* + * 2M leftovers? + */ + if (start < end) + unmap_pmd_range(pud, start, end); + + /* + * No need to try to free the PUD page because we'll free it in + * populate_pgd's error path + */ +} + +static int alloc_pte_page(pmd_t *pmd) +{ + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pte) + return -1; + + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + return 0; +} + +static int alloc_pmd_page(pud_t *pud) +{ + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); + if (!pmd) + return -1; + + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + return 0; +} + +static void populate_pte(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, start); + + pgprot = pgprot_clear_protnone_bits(pgprot); + + while (num_pages-- && start < end) { + set_pte(pte, pfn_pte(cpa->pfn, pgprot)); + + start += PAGE_SIZE; + cpa->pfn++; + pte++; + } +} + +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) +{ + long cur_pages = 0; + pmd_t *pmd; + pgprot_t pmd_pgprot; + + /* + * Not on a 2M boundary? + */ + if (start & (PMD_SIZE - 1)) { + unsigned long pre_end = start + (num_pages << PAGE_SHIFT); + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + + pre_end = min_t(unsigned long, pre_end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(unsigned int, num_pages, cur_pages); + + /* + * Need a PTE page? + */ + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); + + start = pre_end; + } + + /* + * We mapped them all? + */ + if (num_pages == cur_pages) + return cur_pages; + + pmd_pgprot = pgprot_4k_2_large(pgprot); + + while (end - start >= PMD_SIZE) { + + /* + * We cannot use a 1G page so allocate a PMD page if needed. + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + pmd = pmd_offset(pud, start); + + set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, + canon_pgprot(pmd_pgprot)))); + + start += PMD_SIZE; + cpa->pfn += PMD_SIZE >> PAGE_SHIFT; + cur_pages += PMD_SIZE >> PAGE_SHIFT; + } + + /* + * Map trailing 4K pages. + */ + if (start < end) { + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, end, num_pages - cur_pages, + pmd, pgprot); + } + return num_pages; +} + +static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, + pgprot_t pgprot) +{ + pud_t *pud; + unsigned long end; + long cur_pages = 0; + pgprot_t pud_pgprot; + + end = start + (cpa->numpages << PAGE_SHIFT); + + /* + * Not on a Gb page boundary? => map everything up to it with + * smaller pages. + */ + if (start & (PUD_SIZE - 1)) { + unsigned long pre_end; + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + + pre_end = min_t(unsigned long, end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(int, (int)cpa->numpages, cur_pages); + + pud = pud_offset(p4d, start); + + /* + * Need a PMD page? + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, + pud, pgprot); + if (cur_pages < 0) + return cur_pages; + + start = pre_end; + } + + /* We mapped them all? */ + if (cpa->numpages == cur_pages) + return cur_pages; + + pud = pud_offset(p4d, start); + pud_pgprot = pgprot_4k_2_large(pgprot); + + /* + * Map everything starting from the Gb boundary, possibly with 1G pages + */ + while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { + set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, + canon_pgprot(pud_pgprot)))); + + start += PUD_SIZE; + cpa->pfn += PUD_SIZE >> PAGE_SHIFT; + cur_pages += PUD_SIZE >> PAGE_SHIFT; + pud++; + } + + /* Map trailing leftover */ + if (start < end) { + long tmp; + + pud = pud_offset(p4d, start); + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, + pud, pgprot); + if (tmp < 0) + return cur_pages; + + cur_pages += tmp; + } + return cur_pages; +} + +/* + * Restrictions for kernel page table do not necessarily apply when mapping in + * an alternate PGD. + */ +static int populate_pgd(struct cpa_data *cpa, unsigned long addr) +{ + pgprot_t pgprot = __pgprot(_KERNPG_TABLE); + pud_t *pud = NULL; /* shut up gcc */ + p4d_t *p4d; + pgd_t *pgd_entry; + long ret; + + pgd_entry = cpa->pgd + pgd_index(addr); + + if (pgd_none(*pgd_entry)) { + p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); + if (!p4d) + return -1; + + set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE)); + } + + /* + * Allocate a PUD page and hand it down for mapping. + */ + p4d = p4d_offset(pgd_entry, addr); + if (p4d_none(*p4d)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL); + if (!pud) + return -1; + + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); + } + + pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); + + ret = populate_pud(cpa, addr, p4d, pgprot); + if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ + unmap_pud_range(p4d, addr, + addr + (cpa->numpages << PAGE_SHIFT)); + return ret; + } + + cpa->numpages = ret; + return 0; +} + +static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, + int primary) +{ + if (cpa->pgd) { + /* + * Right now, we only execute this code path when mapping + * the EFI virtual memory map regions, no other users + * provide a ->pgd value. This may change in the future. + */ + return populate_pgd(cpa, vaddr); + } + + /* + * Ignore all non primary paths. + */ + if (!primary) { + cpa->numpages = 1; + return 0; + } + + /* + * Ignore the NULL PTE for kernel identity mapping, as it is expected + * to have holes. + * Also set numpages to '1' indicating that we processed cpa req for + * one virtual address page and its pfn. TBD: numpages can be set based + * on the initial value and the level returned by lookup_address(). + */ + if (within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { + cpa->numpages = 1; + cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; + return 0; + + } else if (__cpa_pfn_in_highmap(cpa->pfn)) { + /* Faults in the highmap are OK, so do not warn: */ + return -EFAULT; + } else { + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", vaddr, + *cpa->vaddr); + + return -EFAULT; + } +} + +static int __change_page_attr(struct cpa_data *cpa, int primary) +{ + unsigned long address; + int do_split, err; + unsigned int level; + pte_t *kpte, old_pte; + + address = __cpa_addr(cpa, cpa->curpage); +repeat: + kpte = _lookup_address_cpa(cpa, address, &level); + if (!kpte) + return __cpa_process_fault(cpa, address, primary); + + old_pte = *kpte; + if (pte_none(old_pte)) + return __cpa_process_fault(cpa, address, primary); + + if (level == PG_LEVEL_4K) { + pte_t new_pte; + pgprot_t new_prot = pte_pgprot(old_pte); + unsigned long pfn = pte_pfn(old_pte); + + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + + cpa_inc_4k_install(); + /* Hand in lpsize = 0 to enforce the protection mechanism */ + new_prot = static_protections(new_prot, address, pfn, 1, 0, + CPA_PROTECT); + + new_prot = pgprot_clear_protnone_bits(new_prot); + + /* + * We need to keep the pfn from the existing PTE, + * after all we're only going to change it's attributes + * not the memory it points to + */ + new_pte = pfn_pte(pfn, new_prot); + cpa->pfn = pfn; + /* + * Do we really change anything ? + */ + if (pte_val(old_pte) != pte_val(new_pte)) { + set_pte_atomic(kpte, new_pte); + cpa->flags |= CPA_FLUSHTLB; + } + cpa->numpages = 1; + return 0; + } + + /* + * Check, whether we can keep the large page intact + * and just change the pte: + */ + do_split = should_split_large_page(kpte, address, cpa); + /* + * When the range fits into the existing large page, + * return. cp->numpages and cpa->tlbflush have been updated in + * try_large_page: + */ + if (do_split <= 0) + return do_split; + + /* + * We have to split the large page: + */ + err = split_large_page(cpa, kpte, address); + if (!err) + goto repeat; + + return err; +} + +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); + +static int cpa_process_alias(struct cpa_data *cpa) +{ + struct cpa_data alias_cpa; + unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); + unsigned long vaddr; + int ret; + + if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) + return 0; + + /* + * No need to redo, when the primary call touched the direct + * mapping already: + */ + vaddr = __cpa_addr(cpa, cpa->curpage); + if (!(within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { + + alias_cpa = *cpa; + alias_cpa.vaddr = &laddr; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + alias_cpa.curpage = 0; + + ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; + } + +#ifdef CONFIG_X86_64 + /* + * If the primary call didn't touch the high mapping already + * and the physical address is inside the kernel map, we need + * to touch the high mapped kernel as well: + */ + if (!within(vaddr, (unsigned long)_text, _brk_end) && + __cpa_pfn_in_highmap(cpa->pfn)) { + unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + + __START_KERNEL_map - phys_base; + alias_cpa = *cpa; + alias_cpa.vaddr = &temp_cpa_vaddr; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + alias_cpa.curpage = 0; + + /* + * The high mapping range is imprecise, so ignore the + * return value. + */ + __change_page_attr_set_clr(&alias_cpa, 0); + } +#endif + + return 0; +} + +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) +{ + unsigned long numpages = cpa->numpages; + unsigned long rempages = numpages; + int ret = 0; + + while (rempages) { + /* + * Store the remaining nr of pages for the large page + * preservation check. + */ + cpa->numpages = rempages; + /* for array changes, we can't use large page */ + if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa->numpages = 1; + + if (!debug_pagealloc_enabled()) + spin_lock(&cpa_lock); + ret = __change_page_attr(cpa, checkalias); + if (!debug_pagealloc_enabled()) + spin_unlock(&cpa_lock); + if (ret) + goto out; + + if (checkalias) { + ret = cpa_process_alias(cpa); + if (ret) + goto out; + } + + /* + * Adjust the number of pages with the result of the + * CPA operation. Either a large page has been + * preserved or a single page update happened. + */ + BUG_ON(cpa->numpages > rempages || !cpa->numpages); + rempages -= cpa->numpages; + cpa->curpage += cpa->numpages; + } + +out: + /* Restore the original numpages */ + cpa->numpages = numpages; + return ret; +} + +static int change_page_attr_set_clr(unsigned long *addr, int numpages, + pgprot_t mask_set, pgprot_t mask_clr, + int force_split, int in_flag, + struct page **pages) +{ + struct cpa_data cpa; + int ret, cache, checkalias; + + memset(&cpa, 0, sizeof(cpa)); + + /* + * Check, if we are requested to set a not supported + * feature. Clearing non-supported features is OK. + */ + mask_set = canon_pgprot(mask_set); + + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) + return 0; + + /* Ensure we are PAGE_SIZE aligned */ + if (in_flag & CPA_ARRAY) { + int i; + for (i = 0; i < numpages; i++) { + if (addr[i] & ~PAGE_MASK) { + addr[i] &= PAGE_MASK; + WARN_ON_ONCE(1); + } + } + } else if (!(in_flag & CPA_PAGES_ARRAY)) { + /* + * in_flag of CPA_PAGES_ARRAY implies it is aligned. + * No need to check in that case + */ + if (*addr & ~PAGE_MASK) { + *addr &= PAGE_MASK; + /* + * People should not be passing in unaligned addresses: + */ + WARN_ON_ONCE(1); + } + } + + /* Must avoid aliasing mappings in the highmem code */ + kmap_flush_unused(); + + vm_unmap_aliases(); + + cpa.vaddr = addr; + cpa.pages = pages; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; + cpa.flags = 0; + cpa.curpage = 0; + cpa.force_split = force_split; + + if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa.flags |= in_flag; + + /* No alias checking for _NX bit modifications */ + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; + /* Has caller explicitly disabled alias checking? */ + if (in_flag & CPA_NO_CHECK_ALIAS) + checkalias = 0; + + ret = __change_page_attr_set_clr(&cpa, checkalias); + + /* + * Check whether we really changed something: + */ + if (!(cpa.flags & CPA_FLUSHTLB)) + goto out; + + /* + * No need to flush, when we did not set any of the caching + * attributes: + */ + cache = !!pgprot2cachemode(mask_set); + + /* + * On error; flush everything to be sure. + */ + if (ret) { + cpa_flush_all(cache); + goto out; + } + + cpa_flush(&cpa, cache); +out: + return ret; +} + +static inline int change_page_attr_set(unsigned long *addr, int numpages, + pgprot_t mask, int array) +{ + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, + (array ? CPA_ARRAY : 0), NULL); +} + +static inline int change_page_attr_clear(unsigned long *addr, int numpages, + pgprot_t mask, int array) +{ + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, + (array ? CPA_ARRAY : 0), NULL); +} + +static inline int cpa_set_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, + CPA_PAGES_ARRAY, pages); +} + +static inline int cpa_clear_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, + CPA_PAGES_ARRAY, pages); +} + +int _set_memory_uc(unsigned long addr, int numpages) +{ + /* + * for now UC MINUS. see comments in ioremap() + * If you really need strong UC use ioremap_uc(), but note + * that you cannot override IO areas with set_memory_*() as + * these helpers cannot work with IO memory. + */ + return change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), + 0); +} + +int set_memory_uc(unsigned long addr, int numpages) +{ + int ret; + + /* + * for now UC MINUS. see comments in ioremap() + */ + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_MODE_UC_MINUS, NULL); + if (ret) + goto out_err; + + ret = _set_memory_uc(addr, numpages); + if (ret) + goto out_free; + + return 0; + +out_free: + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); +out_err: + return ret; +} +EXPORT_SYMBOL(set_memory_uc); + +int _set_memory_wc(unsigned long addr, int numpages) +{ + int ret; + + ret = change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), + 0); + if (!ret) { + ret = change_page_attr_set_clr(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, 0, NULL); + } + return ret; +} + +int set_memory_wc(unsigned long addr, int numpages) +{ + int ret; + + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_MODE_WC, NULL); + if (ret) + return ret; + + ret = _set_memory_wc(addr, numpages); + if (ret) + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + + return ret; +} +EXPORT_SYMBOL(set_memory_wc); + +int _set_memory_wt(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); +} + +int _set_memory_wb(unsigned long addr, int numpages) +{ + /* WB cache mode is hard wired to all cache attribute bits being 0 */ + return change_page_attr_clear(&addr, numpages, + __pgprot(_PAGE_CACHE_MASK), 0); +} + +int set_memory_wb(unsigned long addr, int numpages) +{ + int ret; + + ret = _set_memory_wb(addr, numpages); + if (ret) + return ret; + + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + return 0; +} +EXPORT_SYMBOL(set_memory_wb); + +int set_memory_x(unsigned long addr, int numpages) +{ + if (!(__supported_pte_mask & _PAGE_NX)) + return 0; + + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + if (!(__supported_pte_mask & _PAGE_NX)) + return 0; + + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); +} + +int set_memory_np(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); +} + +int set_memory_np_noalias(unsigned long addr, int numpages) +{ + int cpa_flags = CPA_NO_CHECK_ALIAS; + + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + __pgprot(_PAGE_PRESENT), 0, + cpa_flags, NULL); +} + +int set_memory_4k(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + __pgprot(0), 1, 0, NULL); +} + +int set_memory_nonglobal(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, + __pgprot(_PAGE_GLOBAL), 0); +} + +int set_memory_global(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + __pgprot(_PAGE_GLOBAL), 0); +} + +static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) +{ + struct cpa_data cpa; + int ret; + + /* Nothing to do if memory encryption is not active */ + if (!mem_encrypt_active()) + return 0; + + /* Should not be working on unaligned addresses */ + if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) + addr &= PAGE_MASK; + + memset(&cpa, 0, sizeof(cpa)); + cpa.vaddr = &addr; + cpa.numpages = numpages; + cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); + cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); + cpa.pgd = init_mm.pgd; + + /* Must avoid aliasing mappings in the highmem code */ + kmap_flush_unused(); + vm_unmap_aliases(); + + /* + * Before changing the encryption attribute, we need to flush caches. + */ + cpa_flush(&cpa, 1); + + ret = __change_page_attr_set_clr(&cpa, 1); + + /* + * After changing the encryption attribute, we need to flush TLBs again + * in case any speculative TLB caching occurred (but no need to flush + * caches again). We could just use cpa_flush_all(), but in case TLB + * flushing gets optimized in the cpa_flush() path use the same logic + * as above. + */ + cpa_flush(&cpa, 0); + + return ret; +} + +int set_memory_encrypted(unsigned long addr, int numpages) +{ + return __set_memory_enc_dec(addr, numpages, true); +} +EXPORT_SYMBOL_GPL(set_memory_encrypted); + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + return __set_memory_enc_dec(addr, numpages, false); +} +EXPORT_SYMBOL_GPL(set_memory_decrypted); + +int set_pages_uc(struct page *page, int numpages) +{ + unsigned long addr = (unsigned long)page_address(page); + + return set_memory_uc(addr, numpages); +} +EXPORT_SYMBOL(set_pages_uc); + +static int _set_pages_array(struct page **pages, int numpages, + enum page_cache_mode new_type) +{ + unsigned long start; + unsigned long end; + enum page_cache_mode set_type; + int i; + int free_idx; + int ret; + + for (i = 0; i < numpages; i++) { + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; + end = start + PAGE_SIZE; + if (reserve_memtype(start, end, new_type, NULL)) + goto err_out; + } + + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + + ret = cpa_set_pages_array(pages, numpages, + cachemode2pgprot(set_type)); + if (!ret && new_type == _PAGE_CACHE_MODE_WC) + ret = change_page_attr_set_clr(NULL, numpages, + cachemode2pgprot( + _PAGE_CACHE_MODE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, CPA_PAGES_ARRAY, pages); + if (ret) + goto err_out; + return 0; /* Success */ +err_out: + free_idx = i; + for (i = 0; i < free_idx; i++) { + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; + end = start + PAGE_SIZE; + free_memtype(start, end); + } + return -EINVAL; +} + +int set_pages_array_uc(struct page **pages, int numpages) +{ + return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS); +} +EXPORT_SYMBOL(set_pages_array_uc); + +int set_pages_array_wc(struct page **pages, int numpages) +{ + return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC); +} +EXPORT_SYMBOL(set_pages_array_wc); + +int set_pages_array_wt(struct page **pages, int numpages) +{ + return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_pages_array_wt); + +int set_pages_wb(struct page *page, int numpages) +{ + unsigned long addr = (unsigned long)page_address(page); + + return set_memory_wb(addr, numpages); +} +EXPORT_SYMBOL(set_pages_wb); + +int set_pages_array_wb(struct page **pages, int numpages) +{ + int retval; + unsigned long start; + unsigned long end; + int i; + + /* WB cache mode is hard wired to all cache attribute bits being 0 */ + retval = cpa_clear_pages_array(pages, numpages, + __pgprot(_PAGE_CACHE_MASK)); + if (retval) + return retval; + + for (i = 0; i < numpages; i++) { + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; + end = start + PAGE_SIZE; + free_memtype(start, end); + } + + return 0; +} +EXPORT_SYMBOL(set_pages_array_wb); + +int set_pages_ro(struct page *page, int numpages) +{ + unsigned long addr = (unsigned long)page_address(page); + + return set_memory_ro(addr, numpages); +} + +int set_pages_rw(struct page *page, int numpages) +{ + unsigned long addr = (unsigned long)page_address(page); + + return set_memory_rw(addr, numpages); +} + +static int __set_pages_p(struct page *page, int numpages) +{ + unsigned long tempaddr = (unsigned long) page_address(page); + struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, + .numpages = numpages, + .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(0), + .flags = 0}; + + /* + * No alias checking needed for setting present flag. otherwise, + * we may need to break large pages for 64-bit kernel text + * mappings (this adds to complexity if we want to do this from + * atomic context especially). Let's keep it simple! + */ + return __change_page_attr_set_clr(&cpa, 0); +} + +static int __set_pages_np(struct page *page, int numpages) +{ + unsigned long tempaddr = (unsigned long) page_address(page); + struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .flags = 0}; + + /* + * No alias checking needed for setting not present flag. otherwise, + * we may need to break large pages for 64-bit kernel text + * mappings (this adds to complexity if we want to do this from + * atomic context especially). Let's keep it simple! + */ + return __change_page_attr_set_clr(&cpa, 0); +} + +int set_direct_map_invalid_noflush(struct page *page) +{ + return __set_pages_np(page, 1); +} + +int set_direct_map_default_noflush(struct page *page) +{ + return __set_pages_p(page, 1); +} + +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + if (!enable) { + debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); + } + + /* + * The return value is ignored as the calls cannot fail. + * Large pages for identity mappings are not used at boot time + * and hence no memory allocations during large page split. + */ + if (enable) + __set_pages_p(page, numpages); + else + __set_pages_np(page, numpages); + + /* + * We should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + * Preemption needs to be disabled around __flush_tlb_all() due to + * CR3 reload in __native_flush_tlb(). + */ + preempt_disable(); + __flush_tlb_all(); + preempt_enable(); + + arch_flush_lazy_mmu_mode(); +} + +#ifdef CONFIG_HIBERNATION +bool kernel_page_present(struct page *page) +{ + unsigned int level; + pte_t *pte; + + if (PageHighMem(page)) + return false; + + pte = lookup_address((unsigned long)page_address(page), &level); + return (pte_val(*pte) & _PAGE_PRESENT); +} +#endif /* CONFIG_HIBERNATION */ + +int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, + unsigned numpages, unsigned long page_flags) +{ + int retval = -EINVAL; + + struct cpa_data cpa = { + .vaddr = &address, + .pfn = pfn, + .pgd = pgd, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(0), + .flags = 0, + }; + + WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); + + if (!(__supported_pte_mask & _PAGE_NX)) + goto out; + + if (!(page_flags & _PAGE_NX)) + cpa.mask_clr = __pgprot(_PAGE_NX); + + if (!(page_flags & _PAGE_RW)) + cpa.mask_clr = __pgprot(_PAGE_RW); + + if (!(page_flags & _PAGE_ENC)) + cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); + + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); + + retval = __change_page_attr_set_clr(&cpa, 0); + __flush_tlb_all(); + +out: + return retval; +} + +/* + * __flush_tlb_all() flushes mappings only on current CPU and hence this + * function shouldn't be used in an SMP environment. Presently, it's used only + * during boot (way before smp_init()) by EFI subsystem and hence is ok. + */ +int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, + unsigned long numpages) +{ + int retval; + + /* + * The typical sequence for unmapping is to find a pte through + * lookup_address_in_pgd() (ideally, it should never return NULL because + * the address is already mapped) and change it's protections. As pfn is + * the *target* of a mapping, it's not useful while unmapping. + */ + struct cpa_data cpa = { + .vaddr = &address, + .pfn = 0, + .pgd = pgd, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .flags = 0, + }; + + WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); + + retval = __change_page_attr_set_clr(&cpa, 0); + __flush_tlb_all(); + + return retval; +} + +/* + * The testcases use internal knowledge of the implementation that shouldn't + * be exposed to the rest of the kernel. Include these directly here. + */ +#ifdef CONFIG_CPA_DEBUG +#include "cpa-test.c" +#endif diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h deleted file mode 100644 index 23ce8cdac159..000000000000 --- a/arch/x86/mm/pat_internal.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PAT_INTERNAL_H_ -#define __PAT_INTERNAL_H_ - -extern int pat_debug_enable; - -#define dprintk(fmt, arg...) \ - do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0) - -struct memtype { - u64 start; - u64 end; - u64 subtree_max_end; - enum page_cache_mode type; - struct rb_node rb; -}; - -static inline char *cattr_name(enum page_cache_mode pcm) -{ - switch (pcm) { - case _PAGE_CACHE_MODE_UC: return "uncached"; - case _PAGE_CACHE_MODE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_MODE_WB: return "write-back"; - case _PAGE_CACHE_MODE_WC: return "write-combining"; - case _PAGE_CACHE_MODE_WT: return "write-through"; - case _PAGE_CACHE_MODE_WP: return "write-protected"; - default: return "broken"; - } -} - -#ifdef CONFIG_X86_PAT -extern int memtype_check_insert(struct memtype *entry_new, - enum page_cache_mode *new_type); -extern struct memtype *memtype_erase(u64 start, u64 end); -extern struct memtype *memtype_lookup(u64 addr); -extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos); -#else -static inline int memtype_check_insert(struct memtype *entry_new, - enum page_cache_mode *new_type) -{ return 0; } -static inline struct memtype *memtype_erase(u64 start, u64 end) -{ return NULL; } -static inline struct memtype *memtype_lookup(u64 addr) -{ return NULL; } -static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ return 0; } -#endif - -#endif /* __PAT_INTERNAL_H_ */ diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c deleted file mode 100644 index 3c983de5f492..000000000000 --- a/arch/x86/mm/pat_interval.c +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi - * Suresh B Siddha - * - * Interval tree used to store the PAT memory type reservations. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) which tree is ordered - * by the starting address. The tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course, as enforced by the PAT logic. - * - * memtype_lock protects the rbtree. - */ - -static inline u64 interval_start(struct memtype *entry) -{ - return entry->start; -} - -static inline u64 interval_end(struct memtype *entry) -{ - return entry->end - 1; -} - -INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, - interval_start, interval_end, - static, interval) - -static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *entry_match; - - entry_match = interval_iter_first(&memtype_rbroot, start, end-1); - - while (entry_match != NULL && entry_match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (entry_match->start == start) && (entry_match->end == end)) - return entry_match; - - if ((match_type == MEMTYPE_END_MATCH) && - (entry_match->start < start) && (entry_match->end == end)) - return entry_match; - - entry_match = interval_iter_next(entry_match, start, end-1); - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_check_conflict(u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct memtype *entry_match; - enum page_cache_mode found_type = reqtype; - - entry_match = interval_iter_first(&memtype_rbroot, start, end-1); - if (entry_match == NULL) - goto success; - - if (entry_match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); - found_type = entry_match->type; - - entry_match = interval_iter_next(entry_match, start, end-1); - while (entry_match) { - if (entry_match->type != found_type) - goto failure; - - entry_match = interval_iter_next(entry_match, start, end-1); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(entry_match->type)); - - return -EBUSY; -} - -int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); - if (err) - return err; - - if (ret_type) - entry_new->type = *ret_type; - - interval_insert(entry_new, &memtype_rbroot); - return 0; -} - -struct memtype *memtype_erase(u64 start, u64 end) -{ - struct memtype *entry_old; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!entry_old) { - entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!entry_old) - return ERR_PTR(-EINVAL); - } - - if (entry_old->start == start) { - /* munmap: erase this node */ - interval_remove(entry_old, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - interval_remove(entry_old, &memtype_rbroot); - entry_old->end = start; - interval_insert(entry_old, &memtype_rbroot); - - return NULL; - } - - return entry_old; -} - -struct memtype *memtype_lookup(u64 addr) -{ - return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); -} - -/* - * Debugging helper, copy the Nth entry of the tree into a - * a copy for printout. This allows us to print out the tree - * via debugfs, without holding the memtype_lock too long: - */ -#ifdef CONFIG_DEBUG_FS -int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) -{ - struct memtype *entry_match; - int i = 1; - - entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); - - while (entry_match && pos != i) { - entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); - i++; - } - - if (entry_match) { /* pos == i */ - *entry_out = *entry_match; - return 0; - } else { - return 1; - } -} -#endif -- cgit v1.2.3-59-g8ed1b From ecdd6ee77b73d11fcf2ca6739e4d1fe590446599 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:30:44 +0100 Subject: x86/mm/pat: Standardize on memtype_*() prefix for APIs Half of our memtype APIs are memtype_ prefixed, the other half are _memtype suffixed: reserve_memtype() free_memtype() kernel_map_sync_memtype() io_reserve_memtype() io_free_memtype() memtype_check_insert() memtype_erase() memtype_lookup() memtype_copy_nth_element() Use prefixes consistently, like most other modern kernel APIs: reserve_memtype() => memtype_reserve() free_memtype() => memtype_free() kernel_map_sync_memtype() => memtype_kernel_map_sync() io_reserve_memtype() => memtype_reserve_io() io_free_memtype() => memtype_free_io() memtype_check_insert() => memtype_check_insert() memtype_erase() => memtype_erase() memtype_lookup() => memtype_lookup() memtype_copy_nth_element() => memtype_copy_nth_element() Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 10 +++++----- arch/x86/mm/iomap_32.c | 4 ++-- arch/x86/mm/ioremap.c | 10 +++++----- arch/x86/mm/pat/memtype.c | 44 ++++++++++++++++++++++---------------------- arch/x86/mm/pat/set_memory.c | 16 ++++++++-------- 5 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 92015c65fa2a..4a9a97d930e7 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -10,17 +10,17 @@ void pat_disable(const char *reason); extern void pat_init(void); extern void init_cache_modes(void); -extern int reserve_memtype(u64 start, u64 end, +extern int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); +extern int memtype_free(u64 start, u64 end); -extern int kernel_map_sync_memtype(u64 base, unsigned long size, +extern int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm); -int io_reserve_memtype(resource_size_t start, resource_size_t end, +int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *pcm); -void io_free_memtype(resource_size_t start, resource_size_t end); +void memtype_free_io(resource_size_t start, resource_size_t end); bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6748b4c2baff..4a0762ebe051 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) if (!is_io_mapping_possible(base, size)) return -EINVAL; - ret = io_reserve_memtype(base, base + size, &pcm); + ret = memtype_reserve_io(base, base + size, &pcm); if (ret) return ret; @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc); void iomap_free(resource_size_t base, unsigned long size) { - io_free_memtype(base, base + size); + memtype_free_io(base, base + size); } EXPORT_SYMBOL_GPL(iomap_free); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b3a2936377b5..e49de6cbc64e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, phys_addr &= PHYSICAL_PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); + printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval); return NULL; } @@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, pcm)) + if (memtype_kernel_map_sync(phys_addr, size, pcm)) goto err_free_area; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) @@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, err_free_area: free_vm_area(area); err_free_memtype: - free_memtype(phys_addr, phys_addr + size); + memtype_free(phys_addr, phys_addr + size); return NULL; } @@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr) return; } - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); + memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ o = remove_vm_area((void __force *)addr); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 76532f080795..7ed37354e521 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -575,7 +575,7 @@ static u64 sanitize_phys(u64 address) * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ -int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, +int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { struct memtype *entry_new; @@ -638,7 +638,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, err = memtype_check_insert(entry_new, new_type); if (err) { - pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, cattr_name(entry_new->type), cattr_name(req_type)); kfree(entry_new); @@ -649,14 +649,14 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_unlock(&memtype_lock); - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", + dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), new_type ? cattr_name(*new_type) : "-"); return err; } -int free_memtype(u64 start, u64 end) +int memtype_free(u64 start, u64 end) { int is_range_ram; struct memtype *entry_old; @@ -689,7 +689,7 @@ int free_memtype(u64 start, u64 end) kfree(entry_old); - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); + dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1); return 0; } @@ -752,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); /** - * io_reserve_memtype - Request a memory type mapping for a region of memory + * memtype_reserve_io - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region * @type: A pointer to memtype, with requested type. On success, requested @@ -761,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); * On success, returns 0 * On failure, returns non-zero */ -int io_reserve_memtype(resource_size_t start, resource_size_t end, +int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *type) { resource_size_t size = end - start; @@ -771,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end, WARN_ON_ONCE(iomem_map_sanity_check(start, size)); - ret = reserve_memtype(start, end, req_type, &new_type); + ret = memtype_reserve(start, end, req_type, &new_type); if (ret) goto out_err; if (!is_new_memtype_allowed(start, size, req_type, new_type)) goto out_free; - if (kernel_map_sync_memtype(start, size, new_type) < 0) + if (memtype_kernel_map_sync(start, size, new_type) < 0) goto out_free; *type = new_type; return 0; out_free: - free_memtype(start, end); + memtype_free(start, end); ret = -EBUSY; out_err: return ret; } /** - * io_free_memtype - Release a memory type mapping for a region of memory + * memtype_free_io - Release a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region */ -void io_free_memtype(resource_size_t start, resource_size_t end) +void memtype_free_io(resource_size_t start, resource_size_t end) { - free_memtype(start, end); + memtype_free(start, end); } int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) { enum page_cache_mode type = _PAGE_CACHE_MODE_WC; - return io_reserve_memtype(start, start + size, &type); + return memtype_reserve_io(start, start + size, &type); } EXPORT_SYMBOL(arch_io_reserve_memtype_wc); void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) { - io_free_memtype(start, start + size); + memtype_free_io(start, start + size); } EXPORT_SYMBOL(arch_io_free_memtype_wc); @@ -871,7 +871,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * Change the memory type for the physical address range in kernel identity * mapping space if that range is a part of identity map. */ -int kernel_map_sync_memtype(u64 base, unsigned long size, +int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm) { unsigned long id_sz; @@ -901,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, /* * Internal interface to reserve a range of physical memory with prot. - * Reserved non RAM regions only and after successful reserve_memtype, + * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, @@ -938,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, return 0; } - ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm); + ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm); if (ret) return ret; if (pcm != want_pcm) { if (strict_prot || !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_pcm), @@ -963,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, cachemode2protval(pcm)); } - if (kernel_map_sync_memtype(paddr, size, pcm) < 0) { - free_memtype(paddr, paddr + size); + if (memtype_kernel_map_sync(paddr, size, pcm) < 0) { + memtype_free(paddr, paddr + size); return -EINVAL; } return 0; @@ -980,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); } /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 8fbefee6fc6d..3e5e98b78bf3 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1801,7 +1801,7 @@ int set_memory_uc(unsigned long addr, int numpages) /* * for now UC MINUS. see comments in ioremap() */ - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_UC_MINUS, NULL); if (ret) goto out_err; @@ -1813,7 +1813,7 @@ int set_memory_uc(unsigned long addr, int numpages) return 0; out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); out_err: return ret; } @@ -1839,14 +1839,14 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) return ret; ret = _set_memory_wc(addr, numpages); if (ret) - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return ret; } @@ -1873,7 +1873,7 @@ int set_memory_wb(unsigned long addr, int numpages) if (ret) return ret; - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return 0; } EXPORT_SYMBOL(set_memory_wb); @@ -2014,7 +2014,7 @@ static int _set_pages_array(struct page **pages, int numpages, continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - if (reserve_memtype(start, end, new_type, NULL)) + if (memtype_reserve(start, end, new_type, NULL)) goto err_out; } @@ -2040,7 +2040,7 @@ err_out: continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return -EINVAL; } @@ -2089,7 +2089,7 @@ int set_pages_array_wb(struct page **pages, int numpages) continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return 0; -- cgit v1.2.3-59-g8ed1b From eb243d1d28663c9b92010973a6a3ffa947f682ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:33:57 +0100 Subject: x86/mm/pat: Rename => pat.h is a file whose main purpose is to provide the memtype_*() APIs. PAT is the low level hardware mechanism - but the high level abstraction is memtype. So name the header as well - this goes hand in hand with memtype.c and memtype_interval.c. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/memtype.h | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/mtrr.h | 2 +- arch/x86/include/asm/pat.h | 27 --------------------------- arch/x86/include/asm/pci.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/cpu/mtrr/mtrr.c | 2 +- arch/x86/kernel/cpu/scattered.c | 2 +- arch/x86/kernel/cpu/topology.c | 2 +- arch/x86/kernel/x86_init.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/mm/iomap_32.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/pat/memtype.c | 2 +- arch/x86/mm/pat/memtype_interval.c | 2 +- arch/x86/mm/pat/set_memory.c | 2 +- arch/x86/pci/i386.c | 2 +- arch/x86/xen/mmu_pv.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/media/pci/ivtv/ivtvfb.c | 2 +- 20 files changed, 45 insertions(+), 45 deletions(-) create mode 100644 arch/x86/include/asm/memtype.h delete mode 100644 arch/x86/include/asm/pat.h diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..ec18e38ae391 --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include +#include + +bool pat_enabled(void); +void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +void memtype_free_io(resource_size_t start, resource_size_t end); + +bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 3337d2233aef..829df26fd7a3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,7 +24,7 @@ #define _ASM_X86_MTRR_H #include -#include +#include /* diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 4a9a97d930e7..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include -#include - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int memtype_reserve(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int memtype_free(u64 start, u64 end); - -extern int memtype_kernel_map_sync(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int memtype_reserve_io(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void memtype_free_io(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 90d0731fdcb6..c1fdd43fe187 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include struct pci_sysdata { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2e4d90294fe6..9d6a35a4586e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index aa5c064a6a22..51b9190c628b 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 507039c20128..6a80f36b5d59 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index adf9b71386ef..62b137c3c97a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index ee48c3fc8a65..d3a0791bc052 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include "cpu.h" diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ce89430a7f80..23e25f3034c2 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f92b40d798c..a32b847a8089 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 4a0762ebe051..f60398aeb644 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index e49de6cbc64e..44e4beb4239f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include "physaddr.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 7ed37354e521..394be8611748 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include #include "memtype.h" diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c index a7fbbdd88eeb..a07e4882bf36 100644 --- a/arch/x86/mm/pat/memtype_interval.c +++ b/arch/x86/mm/pat/memtype_interval.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include "memtype.h" diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 3e5e98b78bf3..d4ab493b1647 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include "../mm_internal.h" diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 9df652d3d927..fa855bbaebaf 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c8dbee62ec2a..bbba8b17829a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 51100350b688..c0c2c56b7a80 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,7 +40,7 @@ #include #include #if defined(CONFIG_X86) -#include +#include #endif #include #include diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c index 95a56cce9b65..1daf9e07cad7 100644 --- a/drivers/media/pci/ivtv/ivtvfb.c +++ b/drivers/media/pci/ivtv/ivtvfb.c @@ -37,7 +37,7 @@ #include #ifdef CONFIG_X86_64 -#include +#include #endif /* card parameters */ -- cgit v1.2.3-59-g8ed1b From 533d49b37a2b532354d3841a142173b8321818df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:45:32 +0100 Subject: x86/mm/pat: Clean up externs Half of the declarations have an 'extern', half of them not, use 'extern' consistently. This makes grepping for APIs easier, such as: dagon:~/tip> git grep -E '\ --- arch/x86/include/asm/memtype.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h index ec18e38ae391..9c2447b3555d 100644 --- a/arch/x86/include/asm/memtype.h +++ b/arch/x86/include/asm/memtype.h @@ -5,8 +5,8 @@ #include #include -bool pat_enabled(void); -void pat_disable(const char *reason); +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); extern void pat_init(void); extern void init_cache_modes(void); @@ -17,11 +17,11 @@ extern int memtype_free(u64 start, u64 end); extern int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm); -int memtype_reserve_io(resource_size_t start, resource_size_t end, +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *pcm); -void memtype_free_io(resource_size_t start, resource_size_t end); +extern void memtype_free_io(resource_size_t start, resource_size_t end); -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); #endif /* _ASM_X86_MEMTYPE_H */ -- cgit v1.2.3-59-g8ed1b From b75baaf3a81e71680f3d50965c9330b36993fbad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:57:04 +0100 Subject: x86/mm/pat: Fix typo in the Kconfig help text Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..b6778575cf6f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1512,7 +1512,7 @@ config X86_CPA_STATISTICS bool "Enable statistic for Change Page Attribute" depends on DEBUG_FS ---help--- - Expose statistics about the Change Page Attribute mechanims, which + Expose statistics about the Change Page Attribute mechanism, which helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. -- cgit v1.2.3-59-g8ed1b From 4efb56649132687b5093d90aa62887595e65a09d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 17:38:39 +0100 Subject: x86/mm: Tabulate the page table encoding definitions I got lost in trying to figure out which bits were enabled in one of the PTE masks, so let's make it pretty obvious at the definition site already: #define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) #define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) #define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) #define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) #define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) #define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) #define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) #define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) #define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) #define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) Especially security relevant bits like 'NX' or coherence related bits like 'G' are now super easy to read based on a single grep. We do the underscore gymnastics to not pollute the kernel's symbol namespace, and the longest line still fits into 80 columns, so this should be readable for everyone. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 143 ++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 69 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac63..ea7400726d7a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,11 +110,6 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) - /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for @@ -136,80 +131,93 @@ */ #ifndef __ASSEMBLY__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) -#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) -#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -#ifndef __ASSEMBLY__ +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) +#ifndef __ASSEMBLY__ -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; extern uint8_t __pte2cachemode_tbl[8]; -- cgit v1.2.3-59-g8ed1b From da9144c5ad8943f9003ec4a6a0200637b4ba9ebd Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sat, 23 Nov 2019 23:30:23 +0800 Subject: x86/mm/pat: Mark __cpa_flush_tlb() as static Signed-off-by: kbuild test robot Link: https://lkml.kernel.org/r/20191123153023.bj6m66scjeubhbjg@4978f4969bb8 Signed-off-by: Ingo Molnar --- arch/x86/mm/pat/set_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d4ab493b1647..20823392f4f2 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache) on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -void __cpa_flush_tlb(void *data) +static void __cpa_flush_tlb(void *data) { struct cpa_data *cpa = data; unsigned int i; -- cgit v1.2.3-59-g8ed1b From 1f059dfdf5d170dccbac92193be2fee3c1763384 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Nov 2019 08:19:36 +0100 Subject: mm/vmalloc: Add empty headers and use them from In the x86 MM code we'd like to untangle various types of historic header dependency spaghetti, but for this we'd need to pass to the generic vmalloc code various vmalloc related defines that customarily come via the low level arch header. Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/vmalloc.h | 4 ++++ arch/arc/include/asm/vmalloc.h | 4 ++++ arch/arm/include/asm/vmalloc.h | 4 ++++ arch/arm64/include/asm/vmalloc.h | 4 ++++ arch/c6x/include/asm/vmalloc.h | 4 ++++ arch/csky/include/asm/vmalloc.h | 4 ++++ arch/h8300/include/asm/vmalloc.h | 4 ++++ arch/hexagon/include/asm/vmalloc.h | 4 ++++ arch/ia64/include/asm/vmalloc.h | 4 ++++ arch/m68k/include/asm/vmalloc.h | 4 ++++ arch/microblaze/include/asm/vmalloc.h | 4 ++++ arch/mips/include/asm/vmalloc.h | 4 ++++ arch/nds32/include/asm/vmalloc.h | 4 ++++ arch/nios2/include/asm/vmalloc.h | 4 ++++ arch/openrisc/include/asm/vmalloc.h | 4 ++++ arch/parisc/include/asm/vmalloc.h | 4 ++++ arch/powerpc/include/asm/vmalloc.h | 4 ++++ arch/riscv/include/asm/vmalloc.h | 4 ++++ arch/s390/include/asm/vmalloc.h | 4 ++++ arch/sh/include/asm/vmalloc.h | 4 ++++ arch/sparc/include/asm/vmalloc.h | 4 ++++ arch/um/include/asm/vmalloc.h | 4 ++++ arch/unicore32/include/asm/vmalloc.h | 4 ++++ arch/x86/include/asm/vmalloc.h | 4 ++++ arch/xtensa/include/asm/vmalloc.h | 4 ++++ include/linux/vmalloc.h | 2 ++ 26 files changed, 102 insertions(+) create mode 100644 arch/alpha/include/asm/vmalloc.h create mode 100644 arch/arc/include/asm/vmalloc.h create mode 100644 arch/arm/include/asm/vmalloc.h create mode 100644 arch/arm64/include/asm/vmalloc.h create mode 100644 arch/c6x/include/asm/vmalloc.h create mode 100644 arch/csky/include/asm/vmalloc.h create mode 100644 arch/h8300/include/asm/vmalloc.h create mode 100644 arch/hexagon/include/asm/vmalloc.h create mode 100644 arch/ia64/include/asm/vmalloc.h create mode 100644 arch/m68k/include/asm/vmalloc.h create mode 100644 arch/microblaze/include/asm/vmalloc.h create mode 100644 arch/mips/include/asm/vmalloc.h create mode 100644 arch/nds32/include/asm/vmalloc.h create mode 100644 arch/nios2/include/asm/vmalloc.h create mode 100644 arch/openrisc/include/asm/vmalloc.h create mode 100644 arch/parisc/include/asm/vmalloc.h create mode 100644 arch/powerpc/include/asm/vmalloc.h create mode 100644 arch/riscv/include/asm/vmalloc.h create mode 100644 arch/s390/include/asm/vmalloc.h create mode 100644 arch/sh/include/asm/vmalloc.h create mode 100644 arch/sparc/include/asm/vmalloc.h create mode 100644 arch/um/include/asm/vmalloc.h create mode 100644 arch/unicore32/include/asm/vmalloc.h create mode 100644 arch/x86/include/asm/vmalloc.h create mode 100644 arch/xtensa/include/asm/vmalloc.h diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h new file mode 100644 index 000000000000..0a9a366a4d34 --- /dev/null +++ b/arch/alpha/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ALPHA_VMALLOC_H +#define _ASM_ALPHA_VMALLOC_H + +#endif /* _ASM_ALPHA_VMALLOC_H */ diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h new file mode 100644 index 000000000000..973095aad665 --- /dev/null +++ b/arch/arc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARC_VMALLOC_H +#define _ASM_ARC_VMALLOC_H + +#endif /* _ASM_ARC_VMALLOC_H */ diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h new file mode 100644 index 000000000000..a9b3718b8600 --- /dev/null +++ b/arch/arm/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM_VMALLOC_H +#define _ASM_ARM_VMALLOC_H + +#endif /* _ASM_ARM_VMALLOC_H */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h new file mode 100644 index 000000000000..2ca708ab9b20 --- /dev/null +++ b/arch/arm64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM64_VMALLOC_H +#define _ASM_ARM64_VMALLOC_H + +#endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h new file mode 100644 index 000000000000..26c6c6696bbd --- /dev/null +++ b/arch/c6x/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_C6X_VMALLOC_H +#define _ASM_C6X_VMALLOC_H + +#endif /* _ASM_C6X_VMALLOC_H */ diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h new file mode 100644 index 000000000000..43dca6336b4c --- /dev/null +++ b/arch/csky/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_CSKY_VMALLOC_H +#define _ASM_CSKY_VMALLOC_H + +#endif /* _ASM_CSKY_VMALLOC_H */ diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h new file mode 100644 index 000000000000..08a55c1dfa23 --- /dev/null +++ b/arch/h8300/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_H8300_VMALLOC_H +#define _ASM_H8300_VMALLOC_H + +#endif /* _ASM_H8300_VMALLOC_H */ diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h new file mode 100644 index 000000000000..7b04609e525c --- /dev/null +++ b/arch/hexagon/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_HEXAGON_VMALLOC_H +#define _ASM_HEXAGON_VMALLOC_H + +#endif /* _ASM_HEXAGON_VMALLOC_H */ diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h new file mode 100644 index 000000000000..a2b51141ad28 --- /dev/null +++ b/arch/ia64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_IA64_VMALLOC_H +#define _ASM_IA64_VMALLOC_H + +#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h new file mode 100644 index 000000000000..bc1dca6cf134 --- /dev/null +++ b/arch/m68k/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M68K_VMALLOC_H +#define _ASM_M68K_VMALLOC_H + +#endif /* _ASM_M68K_VMALLOC_H */ diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h new file mode 100644 index 000000000000..04013a42b0fe --- /dev/null +++ b/arch/microblaze/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MICROBLAZE_VMALLOC_H +#define _ASM_MICROBLAZE_VMALLOC_H + +#endif /* _ASM_MICROBLAZE_VMALLOC_H */ diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h new file mode 100644 index 000000000000..25dc09b25eaf --- /dev/null +++ b/arch/mips/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MIPS_VMALLOC_H +#define _ASM_MIPS_VMALLOC_H + +#endif /* _ASM_MIPS_VMALLOC_H */ diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h new file mode 100644 index 000000000000..caeed3898419 --- /dev/null +++ b/arch/nds32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NDS32_VMALLOC_H +#define _ASM_NDS32_VMALLOC_H + +#endif /* _ASM_NDS32_VMALLOC_H */ diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h new file mode 100644 index 000000000000..ec7a9260090b --- /dev/null +++ b/arch/nios2/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NIOS2_VMALLOC_H +#define _ASM_NIOS2_VMALLOC_H + +#endif /* _ASM_NIOS2_VMALLOC_H */ diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..75435eceec32 --- /dev/null +++ b/arch/openrisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_OPENRISC_VMALLOC_H +#define _ASM_OPENRISC_VMALLOC_H + +#endif /* _ASM_OPENRISC_VMALLOC_H */ diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..1088ae4e7af9 --- /dev/null +++ b/arch/parisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_PARISC_VMALLOC_H +#define _ASM_PARISC_VMALLOC_H + +#endif /* _ASM_PARISC_VMALLOC_H */ diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h new file mode 100644 index 000000000000..b992dfaaa161 --- /dev/null +++ b/arch/powerpc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_POWERPC_VMALLOC_H +#define _ASM_POWERPC_VMALLOC_H + +#endif /* _ASM_POWERPC_VMALLOC_H */ diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h new file mode 100644 index 000000000000..ff9abc00d139 --- /dev/null +++ b/arch/riscv/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_RISCV_VMALLOC_H +#define _ASM_RISCV_VMALLOC_H + +#endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h new file mode 100644 index 000000000000..3ba3a6bdca25 --- /dev/null +++ b/arch/s390/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_S390_VMALLOC_H +#define _ASM_S390_VMALLOC_H + +#endif /* _ASM_S390_VMALLOC_H */ diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h new file mode 100644 index 000000000000..716b77472646 --- /dev/null +++ b/arch/sh/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SH_VMALLOC_H +#define _ASM_SH_VMALLOC_H + +#endif /* _ASM_SH_VMALLOC_H */ diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h new file mode 100644 index 000000000000..04b8ab9518b8 --- /dev/null +++ b/arch/sparc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SPARC_VMALLOC_H +#define _ASM_SPARC_VMALLOC_H + +#endif /* _ASM_SPARC_VMALLOC_H */ diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h new file mode 100644 index 000000000000..9a7b9ed93733 --- /dev/null +++ b/arch/um/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UM_VMALLOC_H +#define _ASM_UM_VMALLOC_H + +#endif /* _ASM_UM_VMALLOC_H */ diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h new file mode 100644 index 000000000000..054435818a14 --- /dev/null +++ b/arch/unicore32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UNICORE32_VMALLOC_H +#define _ASM_UNICORE32_VMALLOC_H + +#endif /* _ASM_UNICORE32_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..ba6295fa5876 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h new file mode 100644 index 000000000000..0eb94b70be55 --- /dev/null +++ b/arch/xtensa/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_XTENSA_VMALLOC_H +#define _ASM_XTENSA_VMALLOC_H + +#endif /* _ASM_XTENSA_VMALLOC_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a4b241102771..ec3813236699 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,6 +10,8 @@ #include #include +#include + struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ -- cgit v1.2.3-59-g8ed1b From 186525bd6b83efc592672e2d6185e4d7c810d2b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Nov 2019 08:17:25 +0100 Subject: mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions - Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Linus Torvalds Cc: Andrew Morton Cc: Rik van Riel Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_entry_area.h | 10 +--- arch/x86/include/asm/mmu_context.h | 86 +++------------------------------ arch/x86/include/asm/pgtable_32_areas.h | 53 ++++++++++++++++++++ arch/x86/include/asm/pgtable_32_types.h | 57 ++-------------------- arch/x86/include/asm/pgtable_areas.h | 16 ++++++ arch/x86/include/asm/vmalloc.h | 2 + arch/x86/kernel/ldt.c | 83 +++++++++++++++++++++++++++++++ arch/x86/kernel/setup.c | 1 + arch/x86/mm/fault.c | 1 + arch/x86/mm/init_32.c | 1 + arch/x86/mm/pgtable_32.c | 1 + arch/x86/mm/physaddr.c | 1 + include/linux/mm.h | 15 ++---- mm/highmem.c | 2 +- mm/vmalloc.c | 8 +++ 15 files changed, 183 insertions(+), 154 deletions(-) create mode 100644 arch/x86/include/asm/pgtable_32_areas.h create mode 100644 arch/x86/include/asm/pgtable_areas.h diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 804734058c77..02c0078d3787 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 @@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -/* Single page reserved for the readonly IDT mapping: */ -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static inline struct entry_stack *cpu_entry_stack(int cpu) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5f33924e200f..b243234e90cb 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -69,14 +69,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif -static inline void load_mm_ldt(struct mm_struct *mm) -{ #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); - - /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. - */ - - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else +static inline void load_mm_ldt(struct mm_struct *mm) +{ clear_LDT(); -#endif } - static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif - DEBUG_LOCKS_WARN_ON(preemptible()); } +#endif -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..b6355416a15a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0416d42e5bdd..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,55 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. - * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c - * to avoid include recursion hell. - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43) - -/* The +1 is for the readonly IDT page: */ -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..d34cce1b995c --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h index ba6295fa5876..29837740b520 100644 --- a/arch/x86/include/asm/vmalloc.h +++ b/arch/x86/include/asm/vmalloc.h @@ -1,4 +1,6 @@ #ifndef _ASM_X86_VMALLOC_H #define _ASM_X86_VMALLOC_H +#include + #endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b2463fcb20a8..c57e1ca70fd1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -28,6 +28,89 @@ #include #include #include +#include + +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +} + +void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { + clear_LDT(); + } +} + +void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ + /* + * Load the LDT if either the old or new mm had an LDT. + * + * An mm will never go from having an LDT to not having an LDT. Two + * mms never share an LDT, so we don't gain anything by checking to + * see whether the LDT changed. There's also no guarantee that + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, + * then prev->context.ldt will also be non-NULL. + * + * If we really cared, we could optimize the case where prev == next + * and we're exiting lazy mode. Most of the time, if this happens, + * we don't actually need to reload LDTR, but modify_ldt() is mostly + * used by legacy code and emulators where we don't need this level of + * performance. + * + * This uses | instead of || because it generates better code. + */ + if (unlikely((unsigned long)prev->context.ldt | + (unsigned long)next->context.ldt)) + load_mm_ldt(next); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} static void refresh_ldt_segments(void) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b5ac9932bcf6..90296a04e5ad 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * max_low_pfn_mapped: highest directly mapped pfn < 4 GB diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 304d31d8cbbc..c9c8523a3a48 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -29,6 +29,7 @@ #include /* efi_recover_from_page_fault()*/ #include /* store_idt(), ... */ #include /* exception stack */ +#include /* VMALLOC_START, ... */ #define CREATE_TRACE_POINTS #include diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 930edeb41ec3..16274a3f751c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "mm_internal.h" diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9bb7f0ab9fe6..0e6700eaa4f9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -18,6 +18,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index bdc98150d4db..fc3f3d3e2ef2 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -5,6 +5,7 @@ #include #include +#include #include "physaddr.h" diff --git a/include/linux/mm.h b/include/linux/mm.h index c97ea3b694e6..fb8f9412e2cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ -static inline bool is_vmalloc_addr(const void *x) -{ -#ifdef CONFIG_MMU - unsigned long addr = (unsigned long)x; - - return addr >= VMALLOC_START && addr < VMALLOC_END; -#else - return false; -#endif -} #ifndef is_ioremap_addr #define is_ioremap_addr(x) is_vmalloc_addr(x) #endif #ifdef CONFIG_MMU +extern bool is_vmalloc_addr(const void *x); extern int is_vmalloc_or_module_addr(const void *x); #else +static inline bool is_vmalloc_addr(const void *x) +{ + return false; +} static inline int is_vmalloc_or_module_addr(const void *x) { return 0; diff --git a/mm/highmem.c b/mm/highmem.c index 107b10f9878e..64d8dea47dd1 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -29,7 +29,7 @@ #include #include #include - +#include #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DEFINE_PER_CPU(int, __kmap_atomic_idx); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d3b3d60d893..19cdbb11fe07 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -41,6 +41,14 @@ #include "internal.h" +bool is_vmalloc_addr(const void *x) +{ + unsigned long addr = (unsigned long)x; + + return addr >= VMALLOC_START && addr < VMALLOC_END; +} +EXPORT_SYMBOL(is_vmalloc_addr); + struct vfree_deferred { struct llist_head list; struct work_struct wq; -- cgit v1.2.3-59-g8ed1b From 8d62af1778125bd674cc66e8432305cc6aac5d89 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:01 +0100 Subject: efi/gop: Remove bogus packed attribute from GOP structures EFI structures are not packed, they follow natural alignment. The packed attribute doesn't have any effect on the structure layout due to the types and order of the members, and we only ever get these structures as output from the EFI firmware so alignment issues have not come up. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-2-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index aa54586db7a5..83a62f5c3fd7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1429,7 +1429,7 @@ struct efi_graphics_output_mode_info { int pixel_format; struct efi_pixel_bitmask pixel_information; u32 pixels_per_scan_line; -} __packed; +}; struct efi_graphics_output_protocol_mode_32 { u32 max_mode; @@ -1438,7 +1438,7 @@ struct efi_graphics_output_protocol_mode_32 { u32 size_of_info; u64 frame_buffer_base; u32 frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_mode_64 { u32 max_mode; @@ -1447,7 +1447,7 @@ struct efi_graphics_output_protocol_mode_64 { u64 size_of_info; u64 frame_buffer_base; u64 frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_mode { u32 max_mode; @@ -1456,7 +1456,7 @@ struct efi_graphics_output_protocol_mode { unsigned long size_of_info; u64 frame_buffer_base; unsigned long frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_32 { u32 query_mode; -- cgit v1.2.3-59-g8ed1b From 6c895c2fca8a8d4e740b5498b48f81111569502a Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:02 +0100 Subject: efi/gop: Remove unused typedef We have stopped using gop->query_mode(), so remove the unused typedef for the function prototype. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-3-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 83a62f5c3fd7..9ea81cfe1576 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1479,10 +1479,6 @@ struct efi_graphics_output_protocol { struct efi_graphics_output_protocol_mode *mode; }; -typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( - struct efi_graphics_output_protocol *, u32, unsigned long *, - struct efi_graphics_output_mode_info **); - extern struct list_head efivar_sysfs_list; static inline void -- cgit v1.2.3-59-g8ed1b From 44c84b4ada73b8ff156181fcf6e320459b8daefd Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:03 +0100 Subject: efi/gop: Convert GOP structures to typedef and clean up some types Use typedef for the GOP structures, in anticipation of unifying 32/64-bit code. Also use more appropriate types in the non-bitness specific structures for the framebuffer address and pointers. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-4-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/gop.c | 26 ++++++++++----------- include/linux/efi.h | 46 +++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index b7bf1e993b8b..a0c1ef64d445 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -35,7 +35,7 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) static void setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, - struct efi_pixel_bitmask pixel_info, int pixel_format) + efi_pixel_bitmask_t pixel_info, int pixel_format) { if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { si->lfb_depth = 32; @@ -87,13 +87,13 @@ static efi_status_t setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **gop_handle) { - struct efi_graphics_output_protocol_32 *gop32, *first_gop; + efi_graphics_output_protocol_32_t *gop32, *first_gop; unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; + efi_physical_addr_t fb_base; + efi_pixel_bitmask_t pixel_info; int pixel_format; efi_status_t status; u32 *handles = (u32 *)(unsigned long)gop_handle; @@ -104,13 +104,13 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u32); for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_protocol_mode_32 *mode; - struct efi_graphics_output_mode_info *info = NULL; + efi_graphics_output_protocol_mode_32_t *mode; + efi_graphics_output_mode_info_t *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; - u64 current_fb_base; + efi_physical_addr_t current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop32); @@ -184,13 +184,13 @@ static efi_status_t setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **gop_handle) { - struct efi_graphics_output_protocol_64 *gop64, *first_gop; + efi_graphics_output_protocol_64_t *gop64, *first_gop; unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; u32 ext_lfb_base; - u64 fb_base; - struct efi_pixel_bitmask pixel_info; + efi_physical_addr_t fb_base; + efi_pixel_bitmask_t pixel_info; int pixel_format; efi_status_t status; u64 *handles = (u64 *)(unsigned long)gop_handle; @@ -201,13 +201,13 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u64); for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_protocol_mode_64 *mode; - struct efi_graphics_output_mode_info *info = NULL; + efi_graphics_output_protocol_mode_64_t *mode; + efi_graphics_output_mode_info_t *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; - u64 current_fb_base; + efi_physical_addr_t current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop64); diff --git a/include/linux/efi.h b/include/linux/efi.h index 9ea81cfe1576..561db9deedae 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1415,69 +1415,69 @@ struct efi_simple_text_output_protocol { #define PIXEL_BLT_ONLY 3 #define PIXEL_FORMAT_MAX 4 -struct efi_pixel_bitmask { +typedef struct { u32 red_mask; u32 green_mask; u32 blue_mask; u32 reserved_mask; -}; +} efi_pixel_bitmask_t; -struct efi_graphics_output_mode_info { +typedef struct { u32 version; u32 horizontal_resolution; u32 vertical_resolution; int pixel_format; - struct efi_pixel_bitmask pixel_information; + efi_pixel_bitmask_t pixel_information; u32 pixels_per_scan_line; -}; +} efi_graphics_output_mode_info_t; -struct efi_graphics_output_protocol_mode_32 { +typedef struct { u32 max_mode; u32 mode; u32 info; u32 size_of_info; u64 frame_buffer_base; u32 frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_32_t; -struct efi_graphics_output_protocol_mode_64 { +typedef struct { u32 max_mode; u32 mode; u64 info; u64 size_of_info; u64 frame_buffer_base; u64 frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_64_t; -struct efi_graphics_output_protocol_mode { +typedef struct { u32 max_mode; u32 mode; - unsigned long info; + efi_graphics_output_mode_info_t *info; unsigned long size_of_info; - u64 frame_buffer_base; + efi_physical_addr_t frame_buffer_base; unsigned long frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_t; -struct efi_graphics_output_protocol_32 { +typedef struct { u32 query_mode; u32 set_mode; u32 blt; u32 mode; -}; +} efi_graphics_output_protocol_32_t; -struct efi_graphics_output_protocol_64 { +typedef struct { u64 query_mode; u64 set_mode; u64 blt; u64 mode; -}; +} efi_graphics_output_protocol_64_t; -struct efi_graphics_output_protocol { - unsigned long query_mode; - unsigned long set_mode; - unsigned long blt; - struct efi_graphics_output_protocol_mode *mode; -}; +typedef struct { + void *query_mode; + void *set_mode; + void *blt; + efi_graphics_output_protocol_mode_t *mode; +} efi_graphics_output_protocol_t; extern struct list_head efivar_sysfs_list; -- cgit v1.2.3-59-g8ed1b From 8de8788d21826457ac3bfd1629d0e280f67c7b5f Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:04 +0100 Subject: efi/gop: Unify 32/64-bit functions Use efi_table_attr macro to deal with 32/64-bit firmware using the same source code. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-5-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/gop.c | 134 +++++-------------------------------- 1 file changed, 18 insertions(+), 116 deletions(-) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index a0c1ef64d445..94045ab7dd3d 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -83,108 +83,14 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, } } -static efi_status_t -setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, - efi_guid_t *proto, unsigned long size, void **gop_handle) -{ - efi_graphics_output_protocol_32_t *gop32, *first_gop; - unsigned long nr_gops; - u16 width, height; - u32 pixels_per_scan_line; - u32 ext_lfb_base; - efi_physical_addr_t fb_base; - efi_pixel_bitmask_t pixel_info; - int pixel_format; - efi_status_t status; - u32 *handles = (u32 *)(unsigned long)gop_handle; - int i; - - first_gop = NULL; - gop32 = NULL; - - nr_gops = size / sizeof(u32); - for (i = 0; i < nr_gops; i++) { - efi_graphics_output_protocol_mode_32_t *mode; - efi_graphics_output_mode_info_t *info = NULL; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy = NULL; - efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; - efi_physical_addr_t current_fb_base; - - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop32); - if (status != EFI_SUCCESS) - continue; - - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - mode = (void *)(unsigned long)gop32->mode; - info = (void *)(unsigned long)mode->info; - current_fb_base = mode->frame_buffer_base; - - if ((!first_gop || conout_found) && - info->pixel_format != PIXEL_BLT_ONLY) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; - fb_base = current_fb_base; - - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop32; - if (conout_found) - break; - } - } - - /* Did we find any GOPs? */ - if (!first_gop) - return EFI_NOT_FOUND; - - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; - - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - - ext_lfb_base = (u64)(unsigned long)fb_base >> 32; - if (ext_lfb_base) { - si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - si->ext_lfb_base = ext_lfb_base; - } - - si->pages = 1; - - setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); - - si->lfb_size = si->lfb_linelength * si->lfb_height; - - si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; - - return EFI_SUCCESS; -} +#define efi_gop_attr(table, attr, instance) \ + (efi_table_attr(efi_graphics_output_protocol##table, attr, instance)) static efi_status_t -setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, - efi_guid_t *proto, unsigned long size, void **gop_handle) +setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, + efi_guid_t *proto, unsigned long size, void **handles) { - efi_graphics_output_protocol_64_t *gop64, *first_gop; + efi_graphics_output_protocol_t *gop, *first_gop; unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; @@ -193,24 +99,26 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_pixel_bitmask_t pixel_info; int pixel_format; efi_status_t status; - u64 *handles = (u64 *)(unsigned long)gop_handle; int i; + bool is64 = efi_is_64bit(); first_gop = NULL; - gop64 = NULL; + gop = NULL; - nr_gops = size / sizeof(u64); + nr_gops = size / (is64 ? sizeof(u64) : sizeof(u32)); for (i = 0; i < nr_gops; i++) { - efi_graphics_output_protocol_mode_64_t *mode; + efi_graphics_output_protocol_mode_t *mode; efi_graphics_output_mode_info_t *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; - efi_handle_t h = (efi_handle_t)(unsigned long)handles[i]; + efi_handle_t h = (efi_handle_t)(unsigned long) + (is64 ? ((u64 *)handles)[i] + : ((u32 *)handles)[i]); efi_physical_addr_t current_fb_base; status = efi_call_early(handle_protocol, h, - proto, (void **)&gop64); + proto, (void **)&gop); if (status != EFI_SUCCESS) continue; @@ -219,9 +127,9 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, if (status == EFI_SUCCESS) conout_found = true; - mode = (void *)(unsigned long)gop64->mode; - info = (void *)(unsigned long)mode->info; - current_fb_base = mode->frame_buffer_base; + mode = (void *)(unsigned long)efi_gop_attr(, mode, gop); + info = (void *)(unsigned long)efi_gop_attr(_mode, info, mode); + current_fb_base = efi_gop_attr(_mode, frame_buffer_base, mode); if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { @@ -243,7 +151,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, * Once we've found a GOP supporting ConOut, * don't bother looking any further. */ - first_gop = gop64; + first_gop = gop; if (conout_found) break; } @@ -298,13 +206,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, if (status != EFI_SUCCESS) goto free_handle; - if (efi_is_64bit()) { - status = setup_gop64(sys_table_arg, si, proto, size, - gop_handle); - } else { - status = setup_gop32(sys_table_arg, si, proto, size, - gop_handle); - } + status = setup_gop(sys_table_arg, si, proto, size, gop_handle); free_handle: efi_call_early(free_pool, gop_handle); -- cgit v1.2.3-59-g8ed1b From 58ec655a75731c83f403e9c04ffd66aa6b3cd4d5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:05 +0100 Subject: efi/libstub: Remove unused __efi_call_early() macro The macro __efi_call_early() is defined by various architectures but never used. Let's get rid of it. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 1 - arch/arm64/include/asm/efi.h | 1 - arch/x86/include/asm/efi.h | 3 --- 3 files changed, 5 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 7667826b93f1..2306ed783ceb 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -51,7 +51,6 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (false) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index b54d3a86c444..7cfac5e0e310 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -94,7 +94,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, } #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (true) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d028e9acdf1c..59c19e0b6027 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -233,9 +233,6 @@ static inline bool efi_is_64bit(void) __efi_early()->call(efi_table_attr(efi_boot_services, f, \ __efi_early()->boot_services), __VA_ARGS__) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); - #define efi_call_runtime(f, ...) \ __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ __efi_early()->runtime_services), __VA_ARGS__) -- cgit v1.2.3-59-g8ed1b From a8147dba75b188bff87d4ad072db84a0b70d716d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:06 +0100 Subject: efi/x86: Rename efi_is_native() to efi_is_mixed() The ARM architecture does not permit combining 32-bit and 64-bit code at the same privilege level, and so EFI mixed mode is strictly a x86 concept. In preparation of turning the 32/64 bit distinction in shared stub code to a native vs mixed one, refactor x86's current use of the helper function efi_is_native() into efi_is_mixed(). Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-7-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 10 ++++++---- arch/x86/platform/efi/efi.c | 8 ++++---- arch/x86/platform/efi/efi_64.c | 4 ++-- arch/x86/platform/efi/quirks.c | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 59c19e0b6027..6094e7f49a99 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -153,17 +153,19 @@ extern u64 efi_setup; #ifdef CONFIG_EFI -static inline bool efi_is_native(void) +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) + if (!efi_is_mixed()) return true; - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + if (!efi_enabled(EFI_OLD_MEMMAP)) return true; return false; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 38d44f36d5ed..e188b7ce0796 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -828,7 +828,7 @@ static bool should_map_region(efi_memory_desc_t *md) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (efi_is_mixed()) { if (md->type == EFI_CONVENTIONAL_MEMORY || md->type == EFI_LOADER_DATA || md->type == EFI_LOADER_CODE) @@ -903,7 +903,7 @@ static void __init kexec_enter_virtual_mode(void) * kexec kernel because in the initial boot something else might * have been mapped at these virtual addresses. */ - if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_is_mixed() || efi_enabled(EFI_OLD_MEMMAP)) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -1040,7 +1040,7 @@ static void __init __efi_enter_virtual_mode(void) efi_sync_low_kernel_mappings(); - if (efi_is_native()) { + if (!efi_is_mixed()) { status = phys_efi_set_virtual_address_map( efi.memmap.desc_size * count, efi.memmap.desc_size, @@ -1071,7 +1071,7 @@ static void __init __efi_enter_virtual_mode(void) */ efi.runtime_version = efi_systab.hdr.revision; - if (efi_is_native()) + if (!efi_is_mixed()) efi_native_runtime_setup(); else efi_thunk_runtime_setup(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..885e50a707a6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -388,7 +388,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) + if (!efi_is_mixed()) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); @@ -449,7 +449,7 @@ void __init efi_map_region(efi_memory_desc_t *md) * booting in EFI mixed mode, because even though we may be * running a 64-bit kernel, the firmware may only be 32-bit. */ - if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + if (efi_is_mixed()) { md->virt_addr = md->phys_addr; return; } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index f8f0220b6a66..eb421cb35108 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -395,7 +395,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) * EFI runtime calls, hence don't unmap EFI boot services code/data * regions. */ - if (!efi_is_native()) + if (efi_is_mixed()) return; if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) -- cgit v1.2.3-59-g8ed1b From 2732ea0d5c0a67ec86bfbde2bd68b6152e23ec4e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:07 +0100 Subject: efi/libstub: Use a helper to iterate over a EFI handle array Iterating over a EFI handle array is a bit finicky, since we have to take mixed mode into account, where handles are only 32-bit while the native efi_handle_t type is 64-bit. So introduce a helper, and replace the various occurrences of this pattern. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-8-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 14 +++++--------- drivers/firmware/efi/libstub/gop.c | 9 ++------- include/linux/efi.h | 13 +++++++++++++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 72b08fde6de6..959bcdd8c1fe 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -135,6 +135,7 @@ static void setup_efi_pci(struct boot_params *params) unsigned long size = 0; unsigned long nr_pci; struct setup_data *data; + efi_handle_t h; int i; status = efi_call_early(locate_handle, @@ -164,14 +165,11 @@ static void setup_efi_pci(struct boot_params *params) while (data && data->next) data = (struct setup_data *)(unsigned long)data->next; - nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_pci; i++) { + for_each_efi_handle(h, pci_handle, size, i) { efi_pci_io_protocol_t *pci = NULL; struct pci_setup_rom *rom; - status = efi_call_early(handle_protocol, - efi_is_64bit() ? ((u64 *)pci_handle)[i] - : ((u32 *)pci_handle)[i], + status = efi_call_early(handle_protocol, h, &pci_proto, (void **)&pci); if (status != EFI_SUCCESS || !pci) continue; @@ -266,6 +264,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) void **uga_handle = NULL; efi_uga_draw_protocol_t *uga = NULL, *first_uga; unsigned long nr_ugas; + efi_handle_t handle; int i; status = efi_call_early(allocate_pool, EFI_LOADER_DATA, @@ -283,13 +282,10 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) width = 0; first_uga = NULL; - nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_ugas; i++) { + for_each_efi_handle(handle, uga_handle, size, i) { efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; u32 w, h, depth, refresh; void *pciio; - unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i] - : ((u32 *)uga_handle)[i]; status = efi_call_early(handle_protocol, handle, uga_proto, (void **)&uga); diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 94045ab7dd3d..5f4fbc2ac687 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -91,7 +91,6 @@ setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **handles) { efi_graphics_output_protocol_t *gop, *first_gop; - unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; u32 ext_lfb_base; @@ -99,22 +98,18 @@ setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_pixel_bitmask_t pixel_info; int pixel_format; efi_status_t status; + efi_handle_t h; int i; - bool is64 = efi_is_64bit(); first_gop = NULL; gop = NULL; - nr_gops = size / (is64 ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_gops; i++) { + for_each_efi_handle(h, handles, size, i) { efi_graphics_output_protocol_mode_t *mode; efi_graphics_output_mode_info_t *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; void *dummy = NULL; - efi_handle_t h = (efi_handle_t)(unsigned long) - (is64 ? ((u64 *)handles)[i] - : ((u32 *)handles)[i]); efi_physical_addr_t current_fb_base; status = efi_call_early(handle_protocol, h, diff --git a/include/linux/efi.h b/include/linux/efi.h index 561db9deedae..8d267715ce22 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,6 +48,19 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; +#define efi_get_handle_at(array, idx) \ + (efi_is_64bit() ? (efi_handle_t)(unsigned long)((u64 *)(array))[idx] \ + : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) + +#define efi_get_handle_num(size) \ + ((size) / (efi_is_64bit() ? sizeof(u64) : sizeof(u32))) + +#define for_each_efi_handle(handle, array, size, i) \ + for (i = 0; \ + i < efi_get_handle_num(size) && \ + ((handle = efi_get_handle_at((array), i)) || true); \ + i++) + /* * The UEFI spec and EDK2 reference implementation both define EFI_GUID as * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment -- cgit v1.2.3-59-g8ed1b From 1786e83011644e18732ed006413339d5323766e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:08 +0100 Subject: efi/libstub: Extend native protocol definitions with mixed_mode aliases In preparation of moving to a native vs. mixed mode split rather than a 32 vs. 64 bit split when it comes to invoking EFI firmware services, update all the native protocol definitions and redefine them as unions containing an anonymous struct for the native view and a struct called 'mixed_mode' describing the 32-bit view of the protocol when called from 64-bit code. While at it, flesh out some PCI I/O member definitions that we will be needing shortly. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-9-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.h | 15 +- drivers/firmware/efi/libstub/arm-stub.c | 4 +- drivers/firmware/efi/libstub/random.c | 22 +- include/linux/efi.h | 496 +++++++++++++++++++++----------- 4 files changed, 357 insertions(+), 180 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 8297387c4676..26f1f2635f64 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -24,10 +24,17 @@ typedef struct { u64 blt; } efi_uga_draw_protocol_64_t; -typedef struct { - void *get_mode; - void *set_mode; - void *blt; +typedef union { + struct { + void *get_mode; + void *set_mode; + void *blt; + }; + struct { + u32 get_mode; + u32 set_mode; + u32 blt; + } mixed_mode; } efi_uga_draw_protocol_t; #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 817237ce2420..60a301e1c072 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -40,9 +40,9 @@ static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; void efi_char16_printk(efi_system_table_t *sys_table_arg, efi_char16_t *str) { - struct efi_simple_text_output_protocol *out; + efi_simple_text_output_protocol_t *out; - out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out; + out = (efi_simple_text_output_protocol_t *)sys_table_arg->con_out; out->output_string(out, str); } diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 97378cf96a2e..d92cd640c73d 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -9,7 +9,7 @@ #include "efistub.h" -typedef struct efi_rng_protocol efi_rng_protocol_t; +typedef union efi_rng_protocol efi_rng_protocol_t; typedef struct { u32 get_info; @@ -21,11 +21,17 @@ typedef struct { u64 get_rng; } efi_rng_protocol_64_t; -struct efi_rng_protocol { - efi_status_t (*get_info)(struct efi_rng_protocol *, - unsigned long *, efi_guid_t *); - efi_status_t (*get_rng)(struct efi_rng_protocol *, - efi_guid_t *, unsigned long, u8 *out); +union efi_rng_protocol { + struct { + efi_status_t (*get_info)(efi_rng_protocol_t *, + unsigned long *, efi_guid_t *); + efi_status_t (*get_rng)(efi_rng_protocol_t *, + efi_guid_t *, unsigned long, u8 *out); + }; + struct { + u32 get_info; + u32 get_rng; + } mixed_mode; }; efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, @@ -33,7 +39,7 @@ efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, { efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; efi_status_t status; - struct efi_rng_protocol *rng = NULL; + efi_rng_protocol_t *rng = NULL; status = efi_call_early(locate_protocol, &rng_proto, NULL, (void **)&rng); @@ -162,7 +168,7 @@ efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg) efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW; efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID; - struct efi_rng_protocol *rng = NULL; + efi_rng_protocol_t *rng = NULL; struct linux_efi_random_seed *seed = NULL; efi_status_t status; diff --git a/include/linux/efi.h b/include/linux/efi.h index 8d267715ce22..5a220af263b1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -315,55 +315,58 @@ typedef struct { /* * EFI Boot Services table */ -typedef struct { - efi_table_hdr_t hdr; - void *raise_tpl; - void *restore_tpl; - efi_status_t (*allocate_pages)(int, int, unsigned long, - efi_physical_addr_t *); - efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); - efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, - unsigned long *, u32 *); - efi_status_t (*allocate_pool)(int, unsigned long, void **); - efi_status_t (*free_pool)(void *); - void *create_event; - void *set_timer; - void *wait_for_event; - void *signal_event; - void *close_event; - void *check_event; - void *install_protocol_interface; - void *reinstall_protocol_interface; - void *uninstall_protocol_interface; - efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); - void *__reserved; - void *register_protocol_notify; - efi_status_t (*locate_handle)(int, efi_guid_t *, void *, - unsigned long *, efi_handle_t *); - void *locate_device_path; - efi_status_t (*install_configuration_table)(efi_guid_t *, void *); - void *load_image; - void *start_image; - void *exit; - void *unload_image; - efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); - void *get_next_monotonic_count; - void *stall; - void *set_watchdog_timer; - void *connect_controller; - void *disconnect_controller; - void *open_protocol; - void *close_protocol; - void *open_protocol_information; - void *protocols_per_handle; - void *locate_handle_buffer; - efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); - void *install_multiple_protocol_interfaces; - void *uninstall_multiple_protocol_interfaces; - void *calculate_crc32; - void *copy_mem; - void *set_mem; - void *create_event_ex; +typedef union { + struct { + efi_table_hdr_t hdr; + void *raise_tpl; + void *restore_tpl; + efi_status_t (*allocate_pages)(int, int, unsigned long, + efi_physical_addr_t *); + efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); + efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, + unsigned long *, u32 *); + efi_status_t (*allocate_pool)(int, unsigned long, void **); + efi_status_t (*free_pool)(void *); + void *create_event; + void *set_timer; + void *wait_for_event; + void *signal_event; + void *close_event; + void *check_event; + void *install_protocol_interface; + void *reinstall_protocol_interface; + void *uninstall_protocol_interface; + efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); + void *__reserved; + void *register_protocol_notify; + efi_status_t (*locate_handle)(int, efi_guid_t *, void *, + unsigned long *, efi_handle_t *); + void *locate_device_path; + efi_status_t (*install_configuration_table)(efi_guid_t *, void *); + void *load_image; + void *start_image; + void *exit; + void *unload_image; + efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); + void *get_next_monotonic_count; + void *stall; + void *set_watchdog_timer; + void *connect_controller; + void *disconnect_controller; + void *open_protocol; + void *close_protocol; + void *open_protocol_information; + void *protocols_per_handle; + void *locate_handle_buffer; + efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); + void *install_multiple_protocol_interfaces; + void *uninstall_multiple_protocol_interfaces; + void *calculate_crc32; + void *copy_mem; + void *set_mem; + void *create_event_ex; + }; + efi_boot_services_32_t mixed_mode; } efi_boot_services_t; typedef enum { @@ -401,11 +404,24 @@ typedef struct { u64 write; } efi_pci_io_protocol_access_64_t; +typedef union efi_pci_io_protocol efi_pci_io_protocol_t; + +typedef +efi_status_t (*efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, + EFI_PCI_IO_PROTOCOL_WIDTH, + u32 offset, unsigned long count, + void *buffer); + typedef struct { void *read; void *write; } efi_pci_io_protocol_access_t; +typedef struct { + efi_pci_io_protocol_cfg_t read; + efi_pci_io_protocol_cfg_t write; +} efi_pci_io_protocol_config_access_t; + typedef struct { u32 poll_mem; u32 poll_io; @@ -446,25 +462,46 @@ typedef struct { u64 romimage; } efi_pci_io_protocol_64_t; -typedef struct { - void *poll_mem; - void *poll_io; - efi_pci_io_protocol_access_t mem; - efi_pci_io_protocol_access_t io; - efi_pci_io_protocol_access_t pci; - void *copy_mem; - void *map; - void *unmap; - void *allocate_buffer; - void *free_buffer; - void *flush; - void *get_location; - void *attributes; - void *get_bar_attributes; - void *set_bar_attributes; - uint64_t romsize; - void *romimage; -} efi_pci_io_protocol_t; +union efi_pci_io_protocol { + struct { + void *poll_mem; + void *poll_io; + efi_pci_io_protocol_access_t mem; + efi_pci_io_protocol_access_t io; + efi_pci_io_protocol_config_access_t pci; + void *copy_mem; + void *map; + void *unmap; + void *allocate_buffer; + void *free_buffer; + void *flush; + void *get_location; + void *attributes; + void *get_bar_attributes; + void *set_bar_attributes; + uint64_t romsize; + void *romimage; + }; + struct { + u32 poll_mem; + u32 poll_io; + efi_pci_io_protocol_access_32_t mem; + efi_pci_io_protocol_access_32_t io; + efi_pci_io_protocol_access_32_t pci; + u32 copy_mem; + u32 map; + u32 unmap; + u32 allocate_buffer; + u32 free_buffer; + u32 flush; + u32 get_location; + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; + u64 romsize; + u32 romimage; + } mixed_mode; +}; #define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001 #define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002 @@ -502,6 +539,33 @@ typedef struct { u64 get_all; } apple_properties_protocol_64_t; +struct efi_dev_path; + +typedef union apple_properties_protocol apple_properties_protocol_t; + +union apple_properties_protocol { + struct { + unsigned long version; + efi_status_t (*get)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *, + void *, u32 *); + efi_status_t (*set)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *, + void *, u32); + efi_status_t (*del)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *); + efi_status_t (*get_all)(apple_properties_protocol_t *, + void *buffer, u32 *); + }; + struct { + u32 version; + u32 get; + u32 set; + u32 del; + u32 get_all; + } mixed_mode; +}; + typedef struct { u32 get_capability; u32 get_event_log; @@ -524,16 +588,32 @@ typedef struct { typedef u32 efi_tcg2_event_log_format; -typedef struct { - void *get_capability; - efi_status_t (*get_event_log)(efi_handle_t, efi_tcg2_event_log_format, - efi_physical_addr_t *, efi_physical_addr_t *, efi_bool_t *); - void *hash_log_extend_event; - void *submit_command; - void *get_active_pcr_banks; - void *set_active_pcr_banks; - void *get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_t; +typedef union efi_tcg2_protocol efi_tcg2_protocol_t; + +union efi_tcg2_protocol { + struct { + void *get_capability; + efi_status_t (*get_event_log)(efi_handle_t, + efi_tcg2_event_log_format, + efi_physical_addr_t *, + efi_physical_addr_t *, + efi_bool_t *); + void *hash_log_extend_event; + void *submit_command; + void *get_active_pcr_banks; + void *set_active_pcr_banks; + void *get_result_of_set_active_pcr_banks; + }; + struct { + u32 get_capability; + u32 get_event_log; + u32 hash_log_extend_event; + u32 submit_command; + u32 get_active_pcr_banks; + u32 set_active_pcr_banks; + u32 get_result_of_set_active_pcr_banks; + } mixed_mode; +}; /* * Types and defines for EFI ResetSystem @@ -618,22 +698,25 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size, bool nonblocking); -typedef struct { - efi_table_hdr_t hdr; - efi_get_time_t *get_time; - efi_set_time_t *set_time; - efi_get_wakeup_time_t *get_wakeup_time; - efi_set_wakeup_time_t *set_wakeup_time; - efi_set_virtual_address_map_t *set_virtual_address_map; - void *convert_pointer; - efi_get_variable_t *get_variable; - efi_get_next_variable_t *get_next_variable; - efi_set_variable_t *set_variable; - efi_get_next_high_mono_count_t *get_next_high_mono_count; - efi_reset_system_t *reset_system; - efi_update_capsule_t *update_capsule; - efi_query_capsule_caps_t *query_capsule_caps; - efi_query_variable_info_t *query_variable_info; +typedef union { + struct { + efi_table_hdr_t hdr; + efi_get_time_t *get_time; + efi_set_time_t *set_time; + efi_get_wakeup_time_t *get_wakeup_time; + efi_set_wakeup_time_t *set_wakeup_time; + efi_set_virtual_address_map_t *set_virtual_address_map; + void *convert_pointer; + efi_get_variable_t *get_variable; + efi_get_next_variable_t *get_next_variable; + efi_set_variable_t *set_variable; + efi_get_next_high_mono_count_t *get_next_high_mono_count; + efi_reset_system_t *reset_system; + efi_update_capsule_t *update_capsule; + efi_query_capsule_caps_t *query_capsule_caps; + efi_query_variable_info_t *query_variable_info; + }; + efi_runtime_services_32_t mixed_mode; } efi_runtime_services_t; void efi_native_runtime_setup(void); @@ -719,9 +802,12 @@ typedef struct { u32 table; } efi_config_table_32_t; -typedef struct { - efi_guid_t guid; - unsigned long table; +typedef union { + struct { + efi_guid_t guid; + unsigned long table; + }; + efi_config_table_32_t mixed_mode; } efi_config_table_t; typedef struct { @@ -773,20 +859,23 @@ typedef struct { u32 tables; } efi_system_table_32_t; -typedef struct { - efi_table_hdr_t hdr; - unsigned long fw_vendor; /* physical addr of CHAR16 vendor string */ - u32 fw_revision; - unsigned long con_in_handle; - unsigned long con_in; - unsigned long con_out_handle; - unsigned long con_out; - unsigned long stderr_handle; - unsigned long stderr; - efi_runtime_services_t *runtime; - efi_boot_services_t *boottime; - unsigned long nr_tables; - unsigned long tables; +typedef union { + struct { + efi_table_hdr_t hdr; + unsigned long fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + unsigned long con_in_handle; + unsigned long con_in; + unsigned long con_out_handle; + unsigned long con_out; + unsigned long stderr_handle; + unsigned long stderr; + efi_runtime_services_t *runtime; + efi_boot_services_t *boottime; + unsigned long nr_tables; + unsigned long tables; + }; + efi_system_table_32_t mixed_mode; } efi_system_table_t; /* @@ -856,22 +945,40 @@ typedef struct { u64 unload; } efi_loaded_image_64_t; -typedef struct { - u32 revision; - efi_handle_t parent_handle; - efi_system_table_t *system_table; - efi_handle_t device_handle; - void *file_path; - void *reserved; - u32 load_options_size; - void *load_options; - void *image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - efi_status_t (*unload)(efi_handle_t image_handle); -} efi_loaded_image_t; +typedef union efi_loaded_image efi_loaded_image_t; +union efi_loaded_image { + struct { + u32 revision; + efi_handle_t parent_handle; + efi_system_table_t *system_table; + efi_handle_t device_handle; + void *file_path; + void *reserved; + u32 load_options_size; + void *load_options; + void *image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + efi_status_t (*unload)(efi_handle_t image_handle); + }; + struct { + u32 revision; + u32 parent_handle; + u32 system_table; + u32 device_handle; + u32 file_path; + u32 reserved; + u32 load_options_size; + u32 load_options; + u32 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + u32 unload; + } mixed_mode; +}; typedef struct { u64 size; @@ -912,23 +1019,40 @@ typedef struct { u64 flush; } efi_file_handle_64_t; -typedef struct _efi_file_handle { - u64 revision; - efi_status_t (*open)(struct _efi_file_handle *, - struct _efi_file_handle **, - efi_char16_t *, u64, u64); - efi_status_t (*close)(struct _efi_file_handle *); - void *delete; - efi_status_t (*read)(struct _efi_file_handle *, unsigned long *, - void *); - void *write; - void *get_position; - void *set_position; - efi_status_t (*get_info)(struct _efi_file_handle *, efi_guid_t *, - unsigned long *, void *); - void *set_info; - void *flush; -} efi_file_handle_t; +typedef union efi_file_handle efi_file_handle_t; + +union efi_file_handle { + struct { + u64 revision; + efi_status_t (*open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (*close)(efi_file_handle_t *); + void *delete; + efi_status_t (*read)(efi_file_handle_t *, unsigned long *, + void *); + void *write; + void *get_position; + void *set_position; + efi_status_t (*get_info)(efi_file_handle_t *, efi_guid_t *, + unsigned long *, void *); + void *set_info; + void *flush; + }; + struct { + u64 revision; + u32 open; + u32 close; + u32 delete; + u32 read; + u32 write; + u32 get_position; + u32 set_position; + u32 get_info; + u32 set_info; + u32 flush; + } mixed_mode; +}; typedef struct { u64 revision; @@ -940,11 +1064,19 @@ typedef struct { u64 open_volume; } efi_file_io_interface_64_t; -typedef struct _efi_file_io_interface { - u64 revision; - int (*open_volume)(struct _efi_file_io_interface *, - efi_file_handle_t **); -} efi_file_io_interface_t; +typedef union efi_file_io_interface efi_file_io_interface_t; + +union efi_file_io_interface { + struct { + u64 revision; + int (*open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); + }; + struct { + u64 revision; + u32 open_volume; + } mixed_mode; +} ; #define EFI_FILE_MODE_READ 0x0000000000000001 #define EFI_FILE_MODE_WRITE 0x0000000000000002 @@ -1416,10 +1548,20 @@ typedef struct { u64 test_string; } efi_simple_text_output_protocol_64_t; -struct efi_simple_text_output_protocol { - void *reset; - efi_status_t (*output_string)(void *, void *); - void *test_string; +typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; + +union efi_simple_text_output_protocol { + struct { + void *reset; + efi_status_t (*output_string)(efi_simple_text_output_protocol_t *, + efi_char16_t *); + void *test_string; + }; + struct { + u32 reset; + u32 output_string; + u32 test_string; + } mixed_mode; }; #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 @@ -1462,14 +1604,26 @@ typedef struct { u64 frame_buffer_size; } efi_graphics_output_protocol_mode_64_t; -typedef struct { - u32 max_mode; - u32 mode; - efi_graphics_output_mode_info_t *info; - unsigned long size_of_info; - efi_physical_addr_t frame_buffer_base; - unsigned long frame_buffer_size; -} efi_graphics_output_protocol_mode_t; +typedef union efi_graphics_output_protocol_mode efi_graphics_output_protocol_mode_t; + +union efi_graphics_output_protocol_mode { + struct { + u32 max_mode; + u32 mode; + efi_graphics_output_mode_info_t *info; + unsigned long size_of_info; + efi_physical_addr_t frame_buffer_base; + unsigned long frame_buffer_size; + }; + struct { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; + } mixed_mode; +}; typedef struct { u32 query_mode; @@ -1485,12 +1639,22 @@ typedef struct { u64 mode; } efi_graphics_output_protocol_64_t; -typedef struct { - void *query_mode; - void *set_mode; - void *blt; - efi_graphics_output_protocol_mode_t *mode; -} efi_graphics_output_protocol_t; +typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t; + +union efi_graphics_output_protocol { + struct { + void *query_mode; + void *set_mode; + void *blt; + efi_graphics_output_protocol_mode_t *mode; + }; + struct { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; + } mixed_mode; +}; extern struct list_head efivar_sysfs_list; -- cgit v1.2.3-59-g8ed1b From f958efe97596837f9504fc38d75ef8e284bc0ebd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:09 +0100 Subject: efi/libstub: Distinguish between native/mixed not 32/64 bit Currently, we support mixed mode by casting all boot time firmware calls to 64-bit explicitly on native 64-bit systems, and to 32-bit on 32-bit systems or 64-bit systems running with 32-bit firmware. Due to this explicit awareness of the bitness in the code, we do a lot of casting even on generic code that is shared with other architectures, where mixed mode does not even exist. This casting leads to loss of coverage of type checking by the compiler, which we should try to avoid. So instead of distinguishing between 32-bit vs 64-bit, distinguish between native vs mixed, and limit all the nasty casting and pointer mangling to the code that actually deals with mixed mode. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-10-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 2 +- arch/arm64/include/asm/efi.h | 2 +- arch/x86/boot/compressed/eboot.c | 3 +- arch/x86/include/asm/efi.h | 35 ++++++++++++++++---- drivers/firmware/efi/libstub/efi-stub-helper.c | 45 +++++++++----------------- include/linux/efi.h | 6 ++-- 6 files changed, 49 insertions(+), 44 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 2306ed783ceb..9b0c64c28bff 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -52,7 +52,7 @@ void efi_virtmap_unload(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (false) +#define efi_is_native() (true) #define efi_table_attr(table, attr, instance) \ ((table##_t *)instance)->attr diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7cfac5e0e310..189082c44c28 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -95,7 +95,7 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (true) +#define efi_is_native() (true) #define efi_table_attr(table, attr, instance) \ ((table##_t *)instance)->attr diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 959bcdd8c1fe..990b93379965 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -63,8 +63,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. */ - romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol, - romimage, pci); + romimage = efi_table_attr(efi_pci_io_protocol, romimage, pci); romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci); if (!romimage || !romsize || romsize > SZ_16M) return EFI_INVALID_PARAMETER; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 6094e7f49a99..c27323cb49e5 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -222,21 +222,42 @@ static inline bool efi_is_64bit(void) return __efi_early()->is64; } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +static inline bool efi_is_native(void) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return true; + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) + +#define efi_table_attr(table, attr, instance) ({ \ + __typeof__(((table##_t *)0)->attr) __ret; \ + if (efi_is_native()) { \ + __ret = ((table##_t *)(unsigned long)instance)->attr; \ + } else { \ + __ret = (__typeof__(__ret))efi_mixed_mode_cast( \ + ((table##_t *)(unsigned long)instance)->mixed_mode.attr);\ + } \ + __ret; \ +}) #define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ + __efi_early()->call((unsigned long) \ + efi_table_attr(protocol, f, instance), \ instance, ##__VA_ARGS__) #define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ + __efi_early()->call((unsigned long) \ + efi_table_attr(efi_boot_services, f, \ __efi_early()->boot_services), __VA_ARGS__) #define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ + __efi_early()->call((unsigned long) \ + efi_table_attr(efi_runtime_services, f, \ __efi_early()->runtime_services), __VA_ARGS__) extern bool efi_reboot_required(void); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index e02579907f2e..1a814dc235ba 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -431,9 +431,7 @@ static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, efi_file_handle_t *fh; efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; efi_status_t status; - void *handle = (void *)(unsigned long)efi_table_attr(efi_loaded_image, - device_handle, - image); + void *handle = efi_table_attr(efi_loaded_image, device_handle, image); status = efi_call_early(handle_protocol, handle, &fs_proto, (void **)&io); @@ -942,33 +940,20 @@ fail: return status; } -#define GET_EFI_CONFIG_TABLE(bits) \ -static void *get_efi_config_table##bits(efi_system_table_t *_sys_table, \ - efi_guid_t guid) \ -{ \ - efi_system_table_##bits##_t *sys_table; \ - efi_config_table_##bits##_t *tables; \ - int i; \ - \ - sys_table = (typeof(sys_table))_sys_table; \ - tables = (typeof(tables))(unsigned long)sys_table->tables; \ - \ - for (i = 0; i < sys_table->nr_tables; i++) { \ - if (efi_guidcmp(tables[i].guid, guid) != 0) \ - continue; \ - \ - return (void *)(unsigned long)tables[i].table; \ - } \ - \ - return NULL; \ -} -GET_EFI_CONFIG_TABLE(32) -GET_EFI_CONFIG_TABLE(64) - void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid) { - if (efi_is_64bit()) - return get_efi_config_table64(sys_table, guid); - else - return get_efi_config_table32(sys_table, guid); + unsigned long tables = efi_table_attr(efi_system_table, tables, sys_table); + int nr_tables = efi_table_attr(efi_system_table, nr_tables, sys_table); + int i; + + for (i = 0; i < nr_tables; i++) { + efi_config_table_t *t = (void *)tables; + + if (efi_guidcmp(t->guid, guid) == 0) + return efi_table_attr(efi_config_table, table, t); + + tables += efi_is_native() ? sizeof(efi_config_table_t) + : sizeof(efi_config_table_32_t); + } + return NULL; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 5a220af263b1..e9d74e9667c0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -49,11 +49,11 @@ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; #define efi_get_handle_at(array, idx) \ - (efi_is_64bit() ? (efi_handle_t)(unsigned long)((u64 *)(array))[idx] \ + (efi_is_native() ? (array)[idx] \ : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) #define efi_get_handle_num(size) \ - ((size) / (efi_is_64bit() ? sizeof(u64) : sizeof(u32))) + ((size) / (efi_is_native() ? sizeof(efi_handle_t) : sizeof(u32))) #define for_each_efi_handle(handle, array, size, i) \ for (i = 0; \ @@ -805,7 +805,7 @@ typedef struct { typedef union { struct { efi_guid_t guid; - unsigned long table; + void *table; }; efi_config_table_32_t mixed_mode; } efi_config_table_t; -- cgit v1.2.3-59-g8ed1b From e8bd5ddf60eedd6d584fa1e98d0cfe45abe95043 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:10 +0100 Subject: efi/libstub: Drop explicit 32/64-bit protocol definitions Now that we have incorporated the mixed mode protocol definitions into the native ones using unions, we no longer need the separate 32/64 bit struct definitions, with the exception of the EFI system table definition and the boot services, runtime services and configuration table definitions. So drop the unused ones. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-11-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.h | 12 -- drivers/firmware/efi/libstub/random.c | 10 -- include/linux/efi.h | 245 +--------------------------------- 3 files changed, 1 insertion(+), 266 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 26f1f2635f64..de13865dc7d2 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -12,18 +12,6 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -typedef struct { - u32 get_mode; - u32 set_mode; - u32 blt; -} efi_uga_draw_protocol_32_t; - -typedef struct { - u64 get_mode; - u64 set_mode; - u64 blt; -} efi_uga_draw_protocol_64_t; - typedef union { struct { void *get_mode; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index d92cd640c73d..1a5a4a9db2a7 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -11,16 +11,6 @@ typedef union efi_rng_protocol efi_rng_protocol_t; -typedef struct { - u32 get_info; - u32 get_rng; -} efi_rng_protocol_32_t; - -typedef struct { - u64 get_info; - u64 get_rng; -} efi_rng_protocol_64_t; - union efi_rng_protocol { struct { efi_status_t (*get_info)(efi_rng_protocol_t *, diff --git a/include/linux/efi.h b/include/linux/efi.h index e9d74e9667c0..d8e987910853 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -264,54 +264,6 @@ typedef struct { u32 create_event_ex; } __packed efi_boot_services_32_t; -typedef struct { - efi_table_hdr_t hdr; - u64 raise_tpl; - u64 restore_tpl; - u64 allocate_pages; - u64 free_pages; - u64 get_memory_map; - u64 allocate_pool; - u64 free_pool; - u64 create_event; - u64 set_timer; - u64 wait_for_event; - u64 signal_event; - u64 close_event; - u64 check_event; - u64 install_protocol_interface; - u64 reinstall_protocol_interface; - u64 uninstall_protocol_interface; - u64 handle_protocol; - u64 __reserved; - u64 register_protocol_notify; - u64 locate_handle; - u64 locate_device_path; - u64 install_configuration_table; - u64 load_image; - u64 start_image; - u64 exit; - u64 unload_image; - u64 exit_boot_services; - u64 get_next_monotonic_count; - u64 stall; - u64 set_watchdog_timer; - u64 connect_controller; - u64 disconnect_controller; - u64 open_protocol; - u64 close_protocol; - u64 open_protocol_information; - u64 protocols_per_handle; - u64 locate_handle_buffer; - u64 locate_protocol; - u64 install_multiple_protocol_interfaces; - u64 uninstall_multiple_protocol_interfaces; - u64 calculate_crc32; - u64 copy_mem; - u64 set_mem; - u64 create_event_ex; -} __packed efi_boot_services_64_t; - /* * EFI Boot Services table */ @@ -399,11 +351,6 @@ typedef struct { u32 write; } efi_pci_io_protocol_access_32_t; -typedef struct { - u64 read; - u64 write; -} efi_pci_io_protocol_access_64_t; - typedef union efi_pci_io_protocol efi_pci_io_protocol_t; typedef @@ -422,46 +369,6 @@ typedef struct { efi_pci_io_protocol_cfg_t write; } efi_pci_io_protocol_config_access_t; -typedef struct { - u32 poll_mem; - u32 poll_io; - efi_pci_io_protocol_access_32_t mem; - efi_pci_io_protocol_access_32_t io; - efi_pci_io_protocol_access_32_t pci; - u32 copy_mem; - u32 map; - u32 unmap; - u32 allocate_buffer; - u32 free_buffer; - u32 flush; - u32 get_location; - u32 attributes; - u32 get_bar_attributes; - u32 set_bar_attributes; - u64 romsize; - u32 romimage; -} efi_pci_io_protocol_32_t; - -typedef struct { - u64 poll_mem; - u64 poll_io; - efi_pci_io_protocol_access_64_t mem; - efi_pci_io_protocol_access_64_t io; - efi_pci_io_protocol_access_64_t pci; - u64 copy_mem; - u64 map; - u64 unmap; - u64 allocate_buffer; - u64 free_buffer; - u64 flush; - u64 get_location; - u64 attributes; - u64 get_bar_attributes; - u64 set_bar_attributes; - u64 romsize; - u64 romimage; -} efi_pci_io_protocol_64_t; - union efi_pci_io_protocol { struct { void *poll_mem; @@ -523,22 +430,6 @@ union efi_pci_io_protocol { #define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000 #define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000 -typedef struct { - u32 version; - u32 get; - u32 set; - u32 del; - u32 get_all; -} apple_properties_protocol_32_t; - -typedef struct { - u64 version; - u64 get; - u64 set; - u64 del; - u64 get_all; -} apple_properties_protocol_64_t; - struct efi_dev_path; typedef union apple_properties_protocol apple_properties_protocol_t; @@ -566,26 +457,6 @@ union apple_properties_protocol { } mixed_mode; }; -typedef struct { - u32 get_capability; - u32 get_event_log; - u32 hash_log_extend_event; - u32 submit_command; - u32 get_active_pcr_banks; - u32 set_active_pcr_banks; - u32 get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_32_t; - -typedef struct { - u64 get_capability; - u64 get_event_log; - u64 hash_log_extend_event; - u64 submit_command; - u64 get_active_pcr_banks; - u64 set_active_pcr_banks; - u64 get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_64_t; - typedef u32 efi_tcg2_event_log_format; typedef union efi_tcg2_protocol efi_tcg2_protocol_t; @@ -913,38 +784,6 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef struct { - u32 revision; - u32 parent_handle; - u32 system_table; - u32 device_handle; - u32 file_path; - u32 reserved; - u32 load_options_size; - u32 load_options; - u32 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u32 unload; -} efi_loaded_image_32_t; - -typedef struct { - u32 revision; - u64 parent_handle; - u64 system_table; - u64 device_handle; - u64 file_path; - u64 reserved; - u32 load_options_size; - u64 load_options; - u64 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u64 unload; -} efi_loaded_image_64_t; - typedef union efi_loaded_image efi_loaded_image_t; union efi_loaded_image { @@ -991,34 +830,6 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; -typedef struct { - u64 revision; - u32 open; - u32 close; - u32 delete; - u32 read; - u32 write; - u32 get_position; - u32 set_position; - u32 get_info; - u32 set_info; - u32 flush; -} efi_file_handle_32_t; - -typedef struct { - u64 revision; - u64 open; - u64 close; - u64 delete; - u64 read; - u64 write; - u64 get_position; - u64 set_position; - u64 get_info; - u64 set_info; - u64 flush; -} efi_file_handle_64_t; - typedef union efi_file_handle efi_file_handle_t; union efi_file_handle { @@ -1054,16 +865,6 @@ union efi_file_handle { } mixed_mode; }; -typedef struct { - u64 revision; - u32 open_volume; -} efi_file_io_interface_32_t; - -typedef struct { - u64 revision; - u64 open_volume; -} efi_file_io_interface_64_t; - typedef union efi_file_io_interface efi_file_io_interface_t; union efi_file_io_interface { @@ -1076,7 +877,7 @@ union efi_file_io_interface { u64 revision; u32 open_volume; } mixed_mode; -} ; +}; #define EFI_FILE_MODE_READ 0x0000000000000001 #define EFI_FILE_MODE_WRITE 0x0000000000000002 @@ -1536,18 +1337,6 @@ struct efivar_entry { bool deleting; }; -typedef struct { - u32 reset; - u32 output_string; - u32 test_string; -} efi_simple_text_output_protocol_32_t; - -typedef struct { - u64 reset; - u64 output_string; - u64 test_string; -} efi_simple_text_output_protocol_64_t; - typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; union efi_simple_text_output_protocol { @@ -1586,24 +1375,6 @@ typedef struct { u32 pixels_per_scan_line; } efi_graphics_output_mode_info_t; -typedef struct { - u32 max_mode; - u32 mode; - u32 info; - u32 size_of_info; - u64 frame_buffer_base; - u32 frame_buffer_size; -} efi_graphics_output_protocol_mode_32_t; - -typedef struct { - u32 max_mode; - u32 mode; - u64 info; - u64 size_of_info; - u64 frame_buffer_base; - u64 frame_buffer_size; -} efi_graphics_output_protocol_mode_64_t; - typedef union efi_graphics_output_protocol_mode efi_graphics_output_protocol_mode_t; union efi_graphics_output_protocol_mode { @@ -1625,20 +1396,6 @@ union efi_graphics_output_protocol_mode { } mixed_mode; }; -typedef struct { - u32 query_mode; - u32 set_mode; - u32 blt; - u32 mode; -} efi_graphics_output_protocol_32_t; - -typedef struct { - u64 query_mode; - u64 set_mode; - u64 blt; - u64 mode; -} efi_graphics_output_protocol_64_t; - typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t; union efi_graphics_output_protocol { -- cgit v1.2.3-59-g8ed1b From 960a8d01834eabc4549928c60f8ce0300ad08519 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:11 +0100 Subject: efi/libstub: Use stricter typing for firmware function pointers We will soon remove another level of pointer casting, so let's make sure all type handling involving firmware calls at boot time is correct. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-12-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 10 ++++++---- arch/x86/boot/compressed/eboot.h | 9 ++++++--- arch/x86/platform/efi/efi.c | 4 ++-- arch/x86/xen/efi.c | 2 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 5 +++-- drivers/firmware/efi/libstub/tpm.c | 4 ++-- include/linux/efi.h | 12 ++++++++---- 7 files changed, 28 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 990b93379965..2733bc263c04 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -70,7 +70,8 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) size = romsize + sizeof(*rom); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, + (void **)&rom); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to allocate memory for 'rom'\n"); return status; @@ -195,9 +196,9 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) struct setup_data *data, *new; efi_status_t status; u32 size = 0; - void *p; + apple_properties_protocol_t *p; - status = efi_call_early(locate_protocol, &guid, NULL, &p); + status = efi_call_early(locate_protocol, &guid, NULL, (void **)&p); if (status != EFI_SUCCESS) return; @@ -212,7 +213,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) do { status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size + sizeof(struct setup_data), &new); + size + sizeof(struct setup_data), + (void **)&new); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to allocate memory for 'properties'\n"); return; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index de13865dc7d2..b8d11928f528 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -12,9 +12,12 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -typedef union { +typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; + +union efi_uga_draw_protocol { struct { - void *get_mode; + efi_status_t (*get_mode)(efi_uga_draw_protocol_t *, + u32*, u32*, u32*, u32*); void *set_mode; void *blt; }; @@ -23,6 +26,6 @@ typedef union { u32 set_mode; u32 blt; } mixed_mode; -} efi_uga_draw_protocol_t; +}; #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e188b7ce0796..d96953d9d4e7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -385,7 +385,7 @@ static int __init efi_systab_init(void *phys) tmp |= systab64->con_in; efi_systab.con_out_handle = systab64->con_out_handle; tmp |= systab64->con_out_handle; - efi_systab.con_out = systab64->con_out; + efi_systab.con_out = (void *)(unsigned long)systab64->con_out; tmp |= systab64->con_out; efi_systab.stderr_handle = systab64->stderr_handle; tmp |= systab64->stderr_handle; @@ -427,7 +427,7 @@ static int __init efi_systab_init(void *phys) efi_systab.con_in_handle = systab32->con_in_handle; efi_systab.con_in = systab32->con_in; efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = systab32->con_out; + efi_systab.con_out = (void *)(unsigned long)systab32->con_out; efi_systab.stderr_handle = systab32->stderr_handle; efi_systab.stderr = systab32->stderr; efi_systab.runtime = (void *)(unsigned long)systab32->runtime; diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a04551ee5568..1abe455d926a 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -31,7 +31,7 @@ static efi_system_table_t efi_systab_xen __initdata = { .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = NULL, /* Not used under Xen. */ .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 1a814dc235ba..f91f4fdbe553 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -413,12 +413,13 @@ grow: return status; } -static efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr) +static efi_status_t efi_file_read(efi_file_handle_t *handle, + unsigned long *size, void *addr) { return efi_call_proto(efi_file_handle, read, handle, size, addr); } -static efi_status_t efi_file_close(void *handle) +static efi_status_t efi_file_close(efi_file_handle_t *handle) { return efi_call_proto(efi_file_handle, close, handle); } diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index eb9af83e4d59..d270acd43de8 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -69,11 +69,11 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) size_t log_size, last_entry_size; efi_bool_t truncated; int version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2; - void *tcg2_protocol = NULL; + efi_tcg2_protocol_t *tcg2_protocol = NULL; int final_events_size = 0; status = efi_call_early(locate_protocol, &tcg2_guid, NULL, - &tcg2_protocol); + (void **)&tcg2_protocol); if (status != EFI_SUCCESS) return; diff --git a/include/linux/efi.h b/include/linux/efi.h index d8e987910853..880077639113 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -382,7 +382,11 @@ union efi_pci_io_protocol { void *allocate_buffer; void *free_buffer; void *flush; - void *get_location; + efi_status_t (*get_location)(efi_pci_io_protocol_t *, + unsigned long *segment_nr, + unsigned long *bus_nr, + unsigned long *device_nr, + unsigned long *function_nr); void *attributes; void *get_bar_attributes; void *set_bar_attributes; @@ -730,6 +734,8 @@ typedef struct { u32 tables; } efi_system_table_32_t; +typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; + typedef union { struct { efi_table_hdr_t hdr; @@ -738,7 +744,7 @@ typedef union { unsigned long con_in_handle; unsigned long con_in; unsigned long con_out_handle; - unsigned long con_out; + efi_simple_text_output_protocol_t *con_out; unsigned long stderr_handle; unsigned long stderr; efi_runtime_services_t *runtime; @@ -1337,8 +1343,6 @@ struct efivar_entry { bool deleting; }; -typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; - union efi_simple_text_output_protocol { struct { void *reset; -- cgit v1.2.3-59-g8ed1b From 8f24f8c2fc82f701866419dcb594e2cc1d3f46ba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:12 +0100 Subject: efi/libstub: Annotate firmware routines as __efiapi Annotate all the firmware routines (boot services, runtime services and protocol methods) called in the boot context as __efiapi, and make it expand to __attribute__((ms_abi)) on 64-bit x86. This allows us to use the compiler to generate the calls into firmware that use the MS calling convention instead of the SysV one. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-13-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 +-- arch/x86/boot/compressed/eboot.h | 4 +- drivers/firmware/efi/libstub/random.c | 10 ++- include/linux/efi.h | 147 +++++++++++++++++++--------------- 4 files changed, 96 insertions(+), 76 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..ded73bb5c0dd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1991,11 +1991,12 @@ config EFI platforms. config EFI_STUB - bool "EFI stub support" - depends on EFI && !X86_USE_3DNOW - select RELOCATABLE - ---help--- - This kernel feature allows a bzImage to be loaded directly + bool "EFI stub support" + depends on EFI && !X86_USE_3DNOW + depends on $(cc-option,-mabi=ms) || X86_32 + select RELOCATABLE + ---help--- + This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. See Documentation/admin-guide/efi-stub.rst for more information. diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index b8d11928f528..99f35343d443 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -16,8 +16,8 @@ typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; union efi_uga_draw_protocol { struct { - efi_status_t (*get_mode)(efi_uga_draw_protocol_t *, - u32*, u32*, u32*, u32*); + efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *, + u32*, u32*, u32*, u32*); void *set_mode; void *blt; }; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 1a5a4a9db2a7..4f5c249c62dc 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -13,10 +13,12 @@ typedef union efi_rng_protocol efi_rng_protocol_t; union efi_rng_protocol { struct { - efi_status_t (*get_info)(efi_rng_protocol_t *, - unsigned long *, efi_guid_t *); - efi_status_t (*get_rng)(efi_rng_protocol_t *, - efi_guid_t *, unsigned long, u8 *out); + efi_status_t (__efiapi *get_info)(efi_rng_protocol_t *, + unsigned long *, + efi_guid_t *); + efi_status_t (__efiapi *get_rng)(efi_rng_protocol_t *, + efi_guid_t *, unsigned long, + u8 *out); }; struct { u32 get_info; diff --git a/include/linux/efi.h b/include/linux/efi.h index 880077639113..2074b737aa17 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,6 +48,12 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; +#ifdef CONFIG_X86_64 +#define __efiapi __attribute__((ms_abi)) +#else +#define __efiapi +#endif + #define efi_get_handle_at(array, idx) \ (efi_is_native() ? (array)[idx] \ : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) @@ -272,13 +278,16 @@ typedef union { efi_table_hdr_t hdr; void *raise_tpl; void *restore_tpl; - efi_status_t (*allocate_pages)(int, int, unsigned long, - efi_physical_addr_t *); - efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); - efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, - unsigned long *, u32 *); - efi_status_t (*allocate_pool)(int, unsigned long, void **); - efi_status_t (*free_pool)(void *); + efi_status_t (__efiapi *allocate_pages)(int, int, unsigned long, + efi_physical_addr_t *); + efi_status_t (__efiapi *free_pages)(efi_physical_addr_t, + unsigned long); + efi_status_t (__efiapi *get_memory_map)(unsigned long *, void *, + unsigned long *, + unsigned long *, u32 *); + efi_status_t (__efiapi *allocate_pool)(int, unsigned long, + void **); + efi_status_t (__efiapi *free_pool)(void *); void *create_event; void *set_timer; void *wait_for_event; @@ -288,18 +297,22 @@ typedef union { void *install_protocol_interface; void *reinstall_protocol_interface; void *uninstall_protocol_interface; - efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); + efi_status_t (__efiapi *handle_protocol)(efi_handle_t, + efi_guid_t *, void **); void *__reserved; void *register_protocol_notify; - efi_status_t (*locate_handle)(int, efi_guid_t *, void *, - unsigned long *, efi_handle_t *); + efi_status_t (__efiapi *locate_handle)(int, efi_guid_t *, + void *, unsigned long *, + efi_handle_t *); void *locate_device_path; - efi_status_t (*install_configuration_table)(efi_guid_t *, void *); + efi_status_t (__efiapi *install_configuration_table)(efi_guid_t *, + void *); void *load_image; void *start_image; void *exit; void *unload_image; - efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); + efi_status_t (__efiapi *exit_boot_services)(efi_handle_t, + unsigned long); void *get_next_monotonic_count; void *stall; void *set_watchdog_timer; @@ -310,7 +323,8 @@ typedef union { void *open_protocol_information; void *protocols_per_handle; void *locate_handle_buffer; - efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); + efi_status_t (__efiapi *locate_protocol)(efi_guid_t *, void *, + void **); void *install_multiple_protocol_interfaces; void *uninstall_multiple_protocol_interfaces; void *calculate_crc32; @@ -354,10 +368,11 @@ typedef struct { typedef union efi_pci_io_protocol efi_pci_io_protocol_t; typedef -efi_status_t (*efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, - EFI_PCI_IO_PROTOCOL_WIDTH, - u32 offset, unsigned long count, - void *buffer); +efi_status_t (__efiapi *efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, + EFI_PCI_IO_PROTOCOL_WIDTH, + u32 offset, + unsigned long count, + void *buffer); typedef struct { void *read; @@ -382,11 +397,11 @@ union efi_pci_io_protocol { void *allocate_buffer; void *free_buffer; void *flush; - efi_status_t (*get_location)(efi_pci_io_protocol_t *, - unsigned long *segment_nr, - unsigned long *bus_nr, - unsigned long *device_nr, - unsigned long *function_nr); + efi_status_t (__efiapi *get_location)(efi_pci_io_protocol_t *, + unsigned long *segment_nr, + unsigned long *bus_nr, + unsigned long *device_nr, + unsigned long *func_nr); void *attributes; void *get_bar_attributes; void *set_bar_attributes; @@ -441,16 +456,17 @@ typedef union apple_properties_protocol apple_properties_protocol_t; union apple_properties_protocol { struct { unsigned long version; - efi_status_t (*get)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *, - void *, u32 *); - efi_status_t (*set)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *, - void *, u32); - efi_status_t (*del)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *); - efi_status_t (*get_all)(apple_properties_protocol_t *, - void *buffer, u32 *); + efi_status_t (__efiapi *get)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *, void *, u32 *); + efi_status_t (__efiapi *set)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *, void *, u32); + efi_status_t (__efiapi *del)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *); + efi_status_t (__efiapi *get_all)(apple_properties_protocol_t *, + void *buffer, u32 *); }; struct { u32 version; @@ -468,11 +484,11 @@ typedef union efi_tcg2_protocol efi_tcg2_protocol_t; union efi_tcg2_protocol { struct { void *get_capability; - efi_status_t (*get_event_log)(efi_handle_t, - efi_tcg2_event_log_format, - efi_physical_addr_t *, - efi_physical_addr_t *, - efi_bool_t *); + efi_status_t (__efiapi *get_event_log)(efi_handle_t, + efi_tcg2_event_log_format, + efi_physical_addr_t *, + efi_physical_addr_t *, + efi_bool_t *); void *hash_log_extend_event; void *submit_command; void *get_active_pcr_banks; @@ -575,21 +591,21 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, typedef union { struct { - efi_table_hdr_t hdr; - efi_get_time_t *get_time; - efi_set_time_t *set_time; - efi_get_wakeup_time_t *get_wakeup_time; - efi_set_wakeup_time_t *set_wakeup_time; - efi_set_virtual_address_map_t *set_virtual_address_map; - void *convert_pointer; - efi_get_variable_t *get_variable; - efi_get_next_variable_t *get_next_variable; - efi_set_variable_t *set_variable; - efi_get_next_high_mono_count_t *get_next_high_mono_count; - efi_reset_system_t *reset_system; - efi_update_capsule_t *update_capsule; - efi_query_capsule_caps_t *query_capsule_caps; - efi_query_variable_info_t *query_variable_info; + efi_table_hdr_t hdr; + efi_get_time_t __efiapi *get_time; + efi_set_time_t __efiapi *set_time; + efi_get_wakeup_time_t __efiapi *get_wakeup_time; + efi_set_wakeup_time_t __efiapi *set_wakeup_time; + efi_set_virtual_address_map_t __efiapi *set_virtual_address_map; + void *convert_pointer; + efi_get_variable_t __efiapi *get_variable; + efi_get_next_variable_t __efiapi *get_next_variable; + efi_set_variable_t __efiapi *set_variable; + efi_get_next_high_mono_count_t __efiapi *get_next_high_mono_count; + efi_reset_system_t __efiapi *reset_system; + efi_update_capsule_t __efiapi *update_capsule; + efi_query_capsule_caps_t __efiapi *query_capsule_caps; + efi_query_variable_info_t __efiapi *query_variable_info; }; efi_runtime_services_32_t mixed_mode; } efi_runtime_services_t; @@ -806,7 +822,7 @@ union efi_loaded_image { __aligned_u64 image_size; unsigned int image_code_type; unsigned int image_data_type; - efi_status_t (*unload)(efi_handle_t image_handle); + efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); }; struct { u32 revision; @@ -841,18 +857,19 @@ typedef union efi_file_handle efi_file_handle_t; union efi_file_handle { struct { u64 revision; - efi_status_t (*open)(efi_file_handle_t *, - efi_file_handle_t **, - efi_char16_t *, u64, u64); - efi_status_t (*close)(efi_file_handle_t *); + efi_status_t (__efiapi *open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (__efiapi *close)(efi_file_handle_t *); void *delete; - efi_status_t (*read)(efi_file_handle_t *, unsigned long *, - void *); + efi_status_t (__efiapi *read)(efi_file_handle_t *, + unsigned long *, void *); void *write; void *get_position; void *set_position; - efi_status_t (*get_info)(efi_file_handle_t *, efi_guid_t *, - unsigned long *, void *); + efi_status_t (__efiapi *get_info)(efi_file_handle_t *, + efi_guid_t *, unsigned long *, + void *); void *set_info; void *flush; }; @@ -876,8 +893,8 @@ typedef union efi_file_io_interface efi_file_io_interface_t; union efi_file_io_interface { struct { u64 revision; - int (*open_volume)(efi_file_io_interface_t *, - efi_file_handle_t **); + int (__efiapi *open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); }; struct { u64 revision; @@ -1346,8 +1363,8 @@ struct efivar_entry { union efi_simple_text_output_protocol { struct { void *reset; - efi_status_t (*output_string)(efi_simple_text_output_protocol_t *, - efi_char16_t *); + efi_status_t (__efiapi *output_string)(efi_simple_text_output_protocol_t *, + efi_char16_t *); void *test_string; }; struct { -- cgit v1.2.3-59-g8ed1b From afc4cc71cf78a8d691023da8ebcc31c3394a1674 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:13 +0100 Subject: efi/libstub/x86: Avoid thunking for native firmware calls We use special wrapper routines to invoke firmware services in the native case as well as the mixed mode case. For mixed mode, the need is obvious, but for the native cases, we can simply rely on the compiler to generate the indirect call, given that GCC now has support for the MS calling convention (and has had it for quite some time now). Note that on i386, the decompressor and the EFI stub are not built with -mregparm=3 like the rest of the i386 kernel, so we can safely allow the compiler to emit the indirect calls here as well. So drop all the wrappers and indirection, and switch to either native calls, or direct calls into the thunk routine for mixed mode. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-14-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/eboot.c | 3 +- arch/x86/boot/compressed/efi_stub_32.S | 87 ---------------------------------- arch/x86/boot/compressed/efi_stub_64.S | 5 -- arch/x86/boot/compressed/head_32.S | 6 --- arch/x86/boot/compressed/head_64.S | 23 +-------- arch/x86/include/asm/efi.h | 34 +++++++------ arch/x86/platform/efi/efi_64.c | 2 - 8 files changed, 24 insertions(+), 138 deletions(-) delete mode 100644 arch/x86/boot/compressed/efi_stub_32.S delete mode 100644 arch/x86/boot/compressed/efi_stub_64.S diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 1dac210f7d44..56aa5fa0a66b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -89,7 +89,7 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ +vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \ $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2733bc263c04..36a26d6e2af0 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -44,7 +44,8 @@ BOOT_SERVICES(64); void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) { efi_call_proto(efi_simple_text_output_protocol, output_string, - efi_early->text_output, str); + ((efi_simple_text_output_protocol_t *)(unsigned long) + efi_early->text_output), str); } static efi_status_t diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S deleted file mode 100644 index ed6c351d34ed..000000000000 --- a/arch/x86/boot/compressed/efi_stub_32.S +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. Note that this implementation is different from the one in - * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical - * mode at this point. - */ - -#include -#include - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -SYM_FUNC_START(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Because we haven't been relocated by this point we need to - * use relative addressing. - */ - call 1f -1: popl %edx - subl $1b, %edx - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %ecx - movl %ecx, saved_return_addr(%edx) - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr(%edx) - - /* - * 3. Call the physical function. - */ - call *%ecx - - /* - * 4. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. We need to calculate our address - * again because %ecx and %edx are not preserved across EFI function - * calls. - */ - call 1f -1: popl %edx - subl $1b, %edx - - movl efi_rt_function_ptr(%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - movl saved_return_addr(%edx), %ecx - pushl %ecx - ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S deleted file mode 100644 index 99494dff2113..000000000000 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ /dev/null @@ -1,5 +0,0 @@ -#include -#include -#include - -#include "../../platform/efi/efi_stub_64.S" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f2dfd6d083ef..0b03dc7bba12 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -161,9 +161,7 @@ SYM_FUNC_START(efi_pe_entry) popl %ecx movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ - /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 40(%eax) pushl %eax call make_boot_params @@ -188,9 +186,7 @@ SYM_FUNC_START(efi32_stub_entry) movl %ecx, efi32_config(%esi) /* Handle */ movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ - /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 40(%eax) pushl %eax 2: call efi_main @@ -266,8 +262,6 @@ SYM_FUNC_END(.Lrelocated) .data efi32_config: .fill 5,8,0 - .long efi_call_phys - .long 0 .byte 0 #endif diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index ee60b81944a7..ad57edeaeeb3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -459,15 +459,6 @@ SYM_FUNC_START(efi_pe_entry) leaq efi64_config(%rip), %rax movq %rax, efi_config(%rip) - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - addq %rbp, efi64_config+40(%rip) - movq %rax, %rdi call make_boot_params cmpq $0,%rax @@ -475,20 +466,10 @@ SYM_FUNC_START(efi_pe_entry) mov %rax, %rsi leaq startup_32(%rip), %rax movl %eax, BP_code32_start(%rsi) - jmp 2f /* Skip the relocation */ handover_entry: - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - movq efi_config(%rip), %rax - addq %rbp, 40(%rax) -2: movq efi_config(%rip), %rdi + and $~0xf, %rsp /* realign the stack */ call efi_main movq %rax,%rsi cmpq $0,%rax @@ -688,14 +669,12 @@ SYM_DATA_LOCAL(efi_config, .quad 0) #ifdef CONFIG_EFI_MIXED SYM_DATA_START(efi32_config) .fill 5,8,0 - .quad efi64_thunk .byte 0 SYM_DATA_END(efi32_config) #endif SYM_DATA_START(efi64_config) .fill 5,8,0 - .quad efi_call .byte 1 SYM_DATA_END(efi64_config) #endif /* CONFIG_EFI_STUB */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c27323cb49e5..001905daa110 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -152,6 +152,7 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI +extern efi_status_t efi64_thunk(u32, ...); static inline bool efi_is_mixed(void) { @@ -205,7 +206,6 @@ struct efi_config { u64 runtime_services; u64 boot_services; u64 text_output; - efi_status_t (*call)(unsigned long, ...); bool is64; } __packed; @@ -235,30 +235,36 @@ static inline bool efi_is_native(void) (unsigned long)(attr), (attr)) #define efi_table_attr(table, attr, instance) ({ \ - __typeof__(((table##_t *)0)->attr) __ret; \ + __typeof__(instance->attr) __ret; \ if (efi_is_native()) { \ - __ret = ((table##_t *)(unsigned long)instance)->attr; \ + __ret = instance->attr; \ } else { \ - __ret = (__typeof__(__ret))efi_mixed_mode_cast( \ - ((table##_t *)(unsigned long)instance)->mixed_mode.attr);\ + __ret = (__typeof__(__ret)) \ + efi_mixed_mode_cast(instance->mixed_mode.attr); \ } \ __ret; \ }) #define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) + (efi_is_native() \ + ? instance->f(instance, ##__VA_ARGS__) \ + : efi64_thunk(instance->mixed_mode.f, instance, ##__VA_ARGS__)) #define efi_call_early(f, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) + (efi_is_native() \ + ? ((efi_boot_services_t *)(unsigned long) \ + __efi_early()->boot_services)->f(__VA_ARGS__) \ + : efi64_thunk(((efi_boot_services_t *)(unsigned long) \ + __efi_early()->boot_services)->mixed_mode.f, \ + __VA_ARGS__)) #define efi_call_runtime(f, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) + (efi_is_native() \ + ? ((efi_runtime_services_t *)(unsigned long) \ + __efi_early()->runtime_services)->f(__VA_ARGS__)\ + : efi64_thunk(((efi_runtime_services_t *)(unsigned long)\ + __efi_early()->runtime_services)->mixed_mode.f, \ + __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 885e50a707a6..03c2ed3c645c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -635,8 +635,6 @@ void efi_switch_mm(struct mm_struct *mm) } #ifdef CONFIG_EFI_MIXED -extern efi_status_t efi64_thunk(u32, ...); - static DEFINE_SPINLOCK(efi_runtime_lock); #define runtime_service32(func) \ -- cgit v1.2.3-59-g8ed1b From 14e900c7e4033d6ee3398b9f133e1716cc072401 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:14 +0100 Subject: efi/libstub: Avoid protocol wrapper for file I/O routines The EFI file I/O routines built on top of the file I/O firmware services are incompatible with mixed mode, so there is no need to obfuscate them by using protocol wrappers whose only purpose is to hide the mixed mode handling. So let's switch to plain indirect calls instead. This also means we can drop the mixed_mode aliases from the various types involved. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-15-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/efi-stub-helper.c | 17 ++-- include/linux/efi.h | 118 ++++++++----------------- 2 files changed, 46 insertions(+), 89 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f91f4fdbe553..9bb74ad4b7fe 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -370,8 +370,7 @@ static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, efi_guid_t info_guid = EFI_FILE_INFO_ID; unsigned long info_sz; - status = efi_call_proto(efi_file_handle, open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); + status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, 0); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to open file: "); efi_char16_printk(sys_table_arg, filename_16); @@ -382,8 +381,7 @@ static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, *handle = h; info_sz = 0; - status = efi_call_proto(efi_file_handle, get_info, h, &info_guid, - &info_sz, NULL); + status = h->get_info(h, &info_guid, &info_sz, NULL); if (status != EFI_BUFFER_TOO_SMALL) { efi_printk(sys_table_arg, "Failed to get file info size\n"); return status; @@ -397,8 +395,7 @@ grow: return status; } - status = efi_call_proto(efi_file_handle, get_info, h, &info_guid, - &info_sz, info); + status = h->get_info(h, &info_guid, &info_sz, info); if (status == EFI_BUFFER_TOO_SMALL) { efi_call_early(free_pool, info); goto grow; @@ -416,12 +413,12 @@ grow: static efi_status_t efi_file_read(efi_file_handle_t *handle, unsigned long *size, void *addr) { - return efi_call_proto(efi_file_handle, read, handle, size, addr); + return handle->read(handle, size, addr); } static efi_status_t efi_file_close(efi_file_handle_t *handle) { - return efi_call_proto(efi_file_handle, close, handle); + return handle->close(handle); } static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, @@ -432,7 +429,7 @@ static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, efi_file_handle_t *fh; efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; efi_status_t status; - void *handle = efi_table_attr(efi_loaded_image, device_handle, image); + efi_handle_t handle = image->device_handle; status = efi_call_early(handle_protocol, handle, &fs_proto, (void **)&io); @@ -441,7 +438,7 @@ static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, return status; } - status = efi_call_proto(efi_file_io_interface, open_volume, io, &fh); + status = io->open_volume(io, &fh); if (status != EFI_SUCCESS) efi_printk(sys_table_arg, "Failed to open volume\n"); else diff --git a/include/linux/efi.h b/include/linux/efi.h index 2074b737aa17..14dd08ecf8a7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -806,40 +806,21 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef union efi_loaded_image efi_loaded_image_t; - -union efi_loaded_image { - struct { - u32 revision; - efi_handle_t parent_handle; - efi_system_table_t *system_table; - efi_handle_t device_handle; - void *file_path; - void *reserved; - u32 load_options_size; - void *load_options; - void *image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); - }; - struct { - u32 revision; - u32 parent_handle; - u32 system_table; - u32 device_handle; - u32 file_path; - u32 reserved; - u32 load_options_size; - u32 load_options; - u32 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u32 unload; - } mixed_mode; -}; +typedef struct { + u32 revision; + efi_handle_t parent_handle; + efi_system_table_t *system_table; + efi_handle_t device_handle; + void *file_path; + void *reserved; + u32 load_options_size; + void *load_options; + void *image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); +} efi_loaded_image_t; typedef struct { u64 size; @@ -852,54 +833,33 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; -typedef union efi_file_handle efi_file_handle_t; - -union efi_file_handle { - struct { - u64 revision; - efi_status_t (__efiapi *open)(efi_file_handle_t *, - efi_file_handle_t **, - efi_char16_t *, u64, u64); - efi_status_t (__efiapi *close)(efi_file_handle_t *); - void *delete; - efi_status_t (__efiapi *read)(efi_file_handle_t *, - unsigned long *, void *); - void *write; - void *get_position; - void *set_position; - efi_status_t (__efiapi *get_info)(efi_file_handle_t *, - efi_guid_t *, unsigned long *, - void *); - void *set_info; - void *flush; - }; - struct { - u64 revision; - u32 open; - u32 close; - u32 delete; - u32 read; - u32 write; - u32 get_position; - u32 set_position; - u32 get_info; - u32 set_info; - u32 flush; - } mixed_mode; +typedef struct efi_file_handle efi_file_handle_t; + +struct efi_file_handle { + u64 revision; + efi_status_t (__efiapi *open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (__efiapi *close)(efi_file_handle_t *); + void *delete; + efi_status_t (__efiapi *read)(efi_file_handle_t *, + unsigned long *, void *); + void *write; + void *get_position; + void *set_position; + efi_status_t (__efiapi *get_info)(efi_file_handle_t *, + efi_guid_t *, unsigned long *, + void *); + void *set_info; + void *flush; }; -typedef union efi_file_io_interface efi_file_io_interface_t; +typedef struct efi_file_io_interface efi_file_io_interface_t; -union efi_file_io_interface { - struct { - u64 revision; - int (__efiapi *open_volume)(efi_file_io_interface_t *, - efi_file_handle_t **); - }; - struct { - u64 revision; - u32 open_volume; - } mixed_mode; +struct efi_file_io_interface { + u64 revision; + int (__efiapi *open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); }; #define EFI_FILE_MODE_READ 0x0000000000000001 -- cgit v1.2.3-59-g8ed1b From 2fcdad2a80a6d6fd0f77205108232d1adc709a84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:15 +0100 Subject: efi/libstub: Get rid of 'sys_table_arg' macro parameter The efi_call macros on ARM have a dependency on a variable 'sys_table_arg' existing in the scope of the macro instantiation. Since this variable always points to the same data structure, let's create a global getter for it and use that instead. Note that the use of a global variable with external linkage is avoided, given the problems we had in the past with early processing of the GOT tables. While at it, drop the redundant casts in the efi_table_attr and efi_call_proto macros. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-16-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 8 ++++---- arch/arm64/include/asm/efi.h | 8 ++++---- arch/x86/boot/compressed/eboot.c | 5 +++++ drivers/firmware/efi/libstub/arm-stub.c | 11 ++++++++++- drivers/firmware/efi/libstub/efistub.h | 2 ++ drivers/firmware/efi/libstub/gop.c | 2 ++ 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 9b0c64c28bff..555364b7bd2a 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -50,15 +50,15 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) +#define efi_call_early(f, ...) efi_system_table()->boottime->f(__VA_ARGS__) +#define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) #define efi_is_native() (true) #define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr + instance->attr #define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) + instance->f(instance, ##__VA_ARGS__) struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 189082c44c28..9aa518d67588 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -93,15 +93,15 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) +#define efi_call_early(f, ...) efi_system_table()->boottime->f(__VA_ARGS__) +#define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) #define efi_is_native() (true) #define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr + instance->attr #define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) + instance->f(instance, ##__VA_ARGS__) #define alloc_screen_info(x...) &screen_info diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 36a26d6e2af0..3a7c900b9c66 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -27,6 +27,11 @@ __pure const struct efi_config *__efi_early(void) return efi_early; } +__pure efi_system_table_t *efi_system_table(void) +{ + return sys_table; +} + #define BOOT_SERVICES(bits) \ static void setup_boot_services##bits(struct efi_config *c) \ { \ diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 60a301e1c072..47f072ac3f30 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -37,6 +37,13 @@ static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; +static efi_system_table_t *__section(.data) sys_table; + +__pure efi_system_table_t *efi_system_table(void) +{ + return sys_table; +} + void efi_char16_printk(efi_system_table_t *sys_table_arg, efi_char16_t *str) { @@ -110,7 +117,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, * for both archictectures, with the arch-specific code provided in the * handle_kernel_image() function. */ -unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, +unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, unsigned long *image_addr) { efi_loaded_image_t *image; @@ -131,6 +138,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, enum efi_secureboot_mode secure_boot; struct screen_info *si; + sys_table = sys_table_arg; + /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 05739ae013c8..e6775c16a97d 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,6 +29,8 @@ extern int __pure nokaslr(void); extern int __pure is_quiet(void); extern int __pure novamap(void); +extern __pure efi_system_table_t *efi_system_table(void); + #define pr_efi(sys_table, msg) do { \ if (!is_quiet()) efi_printk(sys_table, "EFI stub: "msg); \ } while (0) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 5f4fbc2ac687..6c49d0a9aa3f 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -10,6 +10,8 @@ #include #include +#include "efistub.h" + static void find_bits(unsigned long mask, u8 *pos, u8 *size) { u8 first, len; -- cgit v1.2.3-59-g8ed1b From dc29da14ed94a005ed2a590ec86aa8a572fea4a9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:16 +0100 Subject: efi/libstub: Unify the efi_char16_printk implementations Use a single implementation for efi_char16_printk() across all architectures. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-17-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 7 ------- drivers/firmware/efi/libstub/arm-stub.c | 9 --------- drivers/firmware/efi/libstub/efi-stub-helper.c | 9 +++++++++ 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 3a7c900b9c66..5fb12827bdc2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -46,13 +46,6 @@ static void setup_boot_services##bits(struct efi_config *c) \ BOOT_SERVICES(32); BOOT_SERVICES(64); -void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) -{ - efi_call_proto(efi_simple_text_output_protocol, output_string, - ((efi_simple_text_output_protocol_t *)(unsigned long) - efi_early->text_output), str); -} - static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) { diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 47f072ac3f30..b44b1bd4d480 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -44,15 +44,6 @@ __pure efi_system_table_t *efi_system_table(void) return sys_table; } -void efi_char16_printk(efi_system_table_t *sys_table_arg, - efi_char16_t *str) -{ - efi_simple_text_output_protocol_t *out; - - out = (efi_simple_text_output_protocol_t *)sys_table_arg->con_out; - out->output_string(out, str); -} - static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg) { efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 9bb74ad4b7fe..f74b514789d7 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -955,3 +955,12 @@ void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid) } return NULL; } + +void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +{ + efi_call_proto(efi_simple_text_output_protocol, + output_string, + efi_table_attr(efi_system_table, con_out, + efi_system_table()), + str); +} -- cgit v1.2.3-59-g8ed1b From c3710de5065d63f8b16d160a118cc50ae09050b4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:17 +0100 Subject: efi/libstub/x86: Drop __efi_early() export and efi_config struct The various pointers we stash in the efi_config struct which we retrieve using __efi_early() are simply copies of the ones in the EFI system table, which we have started accessing directly in the previous patch. So drop all the __efi_early() related plumbing, as well as all the assembly code dealing with efi_config, which allows us to move the PE/COFF entry point to C code as well. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-18-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 82 ++++++++++++++------------------------ arch/x86/boot/compressed/head_32.S | 58 +-------------------------- arch/x86/boot/compressed/head_64.S | 74 ++++++---------------------------- arch/x86/include/asm/efi.h | 38 +++++------------- 4 files changed, 54 insertions(+), 198 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 5fb12827bdc2..2a7c6d58cc56 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,32 +19,17 @@ #include "eboot.h" static efi_system_table_t *sys_table; - -static struct efi_config *efi_early; - -__pure const struct efi_config *__efi_early(void) -{ - return efi_early; -} +static bool efi_is64 = IS_ENABLED(CONFIG_X86_64); __pure efi_system_table_t *efi_system_table(void) { return sys_table; } -#define BOOT_SERVICES(bits) \ -static void setup_boot_services##bits(struct efi_config *c) \ -{ \ - efi_system_table_##bits##_t *table; \ - \ - table = (typeof(table))sys_table; \ - \ - c->runtime_services = table->runtime; \ - c->boot_services = table->boottime; \ - c->text_output = table->con_out; \ +__pure bool efi_is_64bit(void) +{ + return efi_is64; } -BOOT_SERVICES(32); -BOOT_SERVICES(64); static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) @@ -367,21 +352,24 @@ void setup_graphics(struct boot_params *boot_params) } } +void startup_32(struct boot_params *boot_params); + +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + /* * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). - * - * The caller is responsible for filling out ->code32_start in the - * returned boot_params. */ -struct boot_params *make_boot_params(struct efi_config *c) +efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg) { struct boot_params *boot_params; struct apm_bios_info *bi; struct setup_header *hdr; efi_loaded_image_t *image; - void *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -389,31 +377,24 @@ struct boot_params *make_boot_params(struct efi_config *c) unsigned long ramdisk_addr; unsigned long ramdisk_size; - efi_early = c; - sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; + sys_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - return NULL; - - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); + return EFI_INVALID_PARAMETER; status = efi_call_early(handle_protocol, handle, &proto, (void *)&image); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); - return NULL; + return status; } status = efi_low_alloc(sys_table, 0x4000, 1, (unsigned long *)&boot_params); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); - return NULL; + return status; } memset(boot_params, 0x0, 0x4000); @@ -474,14 +455,17 @@ struct boot_params *make_boot_params(struct efi_config *c) boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32; boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; - return boot_params; + hdr->code32_start = (u32)(unsigned long)startup_32; + + efi_stub_entry(handle, sys_table, boot_params); + /* not reached */ fail2: efi_free(sys_table, options_size, hdr->cmd_line_ptr); fail: efi_free(sys_table, 0x4000, (unsigned long)boot_params); - return NULL; + return status; } static void add_e820ext(struct boot_params *params, @@ -737,33 +721,26 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) * On success we return a pointer to a boot_params structure, and NULL * on failure. */ -struct boot_params * -efi_main(struct efi_config *c, struct boot_params *boot_params) +struct boot_params *efi_main(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params, + bool is64) { struct desc_ptr *gdt = NULL; struct setup_header *hdr = &boot_params->hdr; efi_status_t status; struct desc_struct *desc; - void *handle; - efi_system_table_t *_table; unsigned long cmdline_paddr; - efi_early = c; - - _table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; + sys_table = sys_table_arg; - sys_table = _table; + if (IS_ENABLED(CONFIG_EFI_MIXED)) + efi_is64 = is64; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); - /* * make_boot_params() may have been called before efi_main(), in which * case this is the second time we parse the cmdline. This is ok, @@ -925,5 +902,6 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) fail: efi_printk(sys_table, "efi_main() failed!\n"); - return NULL; + for (;;) + asm("hlt"); } diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 0b03dc7bba12..e43ac17cb9fb 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -145,63 +145,16 @@ SYM_FUNC_START(startup_32) SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB -/* - * We don't need the return address, so set up the stack so efi_main() can find - * its arguments. - */ -SYM_FUNC_START(efi_pe_entry) - add $0x4, %esp - - call 1f -1: popl %esi - subl $1b, %esi - - popl %ecx - movl %ecx, efi32_config(%esi) /* Handle */ - popl %ecx - movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ - - leal efi32_config(%esi), %eax - pushl %eax - - call make_boot_params - cmpl $0, %eax - je fail - movl %esi, BP_code32_start(%eax) - popl %ecx - pushl %eax - pushl %ecx - jmp 2f /* Skip efi_config initialization */ -SYM_FUNC_END(efi_pe_entry) - SYM_FUNC_START(efi32_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp - popl %ecx - popl %edx - - call 1f -1: popl %esi - subl $1b, %esi - - movl %ecx, efi32_config(%esi) /* Handle */ - movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ - - leal efi32_config(%esi), %eax - pushl %eax -2: call efi_main - cmpl $0, %eax movl %eax, %esi - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leal startup_32(%eax), %eax jmp *%eax SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -258,13 +211,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%eax SYM_FUNC_END(.Lrelocated) -#ifdef CONFIG_EFI_STUB - .data -efi32_config: - .fill 5,8,0 - .byte 0 -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index ad57edeaeeb3..a6f3ee9ca61d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -208,10 +208,14 @@ SYM_FUNC_START(startup_32) pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl efi32_config(%ebp), %ebx + movl efi32_boot_args(%ebp), %ebx cmp $0, %ebx jz 1f leal handover_entry(%ebp), %eax + movl 0(%ebx), %edi + movl 4(%ebx), %esi + movl 8(%ebx), %edx + movl $0x0, %ecx 1: #endif pushl %eax @@ -228,22 +232,14 @@ SYM_FUNC_END(startup_32) .org 0x190 SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - leal (BP_scratch+4)(%esi), %esp call 1f 1: pop %ebp subl $1b, %ebp - movl %ecx, efi32_config(%ebp) - movl %edx, efi32_config+8(%ebp) + movl %esp, efi32_boot_args(%ebp) sgdtl efi32_boot_gdt(%ebp) - leal efi32_config(%ebp), %eax - movl %eax, efi_config(%ebp) - /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -450,51 +446,19 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB - -/* The entry point for the PE/COFF executable is efi_pe_entry. */ -SYM_FUNC_START(efi_pe_entry) - movq %rcx, efi64_config(%rip) /* Handle */ - movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - movq %rax, %rdi - call make_boot_params - cmpq $0,%rax - je fail - mov %rax, %rsi - leaq startup_32(%rip), %rax - movl %eax, BP_code32_start(%rsi) - -handover_entry: - movq efi_config(%rip), %rdi + .org 0x390 +SYM_FUNC_START(efi64_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) + movq $1, %rcx +SYM_INNER_LABEL(handover_entry, SYM_L_LOCAL) and $~0xf, %rsp /* realign the stack */ call efi_main movq %rax,%rsi - cmpq $0,%rax - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leaq startup_64(%rax), %rax jmp *%rax -SYM_FUNC_END(efi_pe_entry) - - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - movq %rdi, efi64_config(%rip) /* Handle */ - movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - movq %rdx, %rsi - jmp handover_entry SYM_FUNC_END(efi64_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -663,22 +627,10 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x0000000000000000 /* TS continued */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA_LOCAL(efi_config, .quad 0) - #ifdef CONFIG_EFI_MIXED -SYM_DATA_START(efi32_config) - .fill 5,8,0 - .byte 0 -SYM_DATA_END(efi32_config) +SYM_DATA_LOCAL(efi32_boot_args, .long 0) #endif -SYM_DATA_START(efi64_config) - .fill 5,8,0 - .byte 1 -SYM_DATA_END(efi64_config) -#endif /* CONFIG_EFI_STUB */ - /* * Stack and heap for uncompression */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 001905daa110..1817f350618e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -200,32 +200,14 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( /* arch specific definitions used by the stub code */ -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - bool is64; -} __packed; - -__pure const struct efi_config *__efi_early(void); - -static inline bool efi_is_64bit(void) -{ - if (!IS_ENABLED(CONFIG_X86_64)) - return false; - - if (!IS_ENABLED(CONFIG_EFI_MIXED)) - return true; - - return __efi_early()->is64; -} +__pure bool efi_is_64bit(void); static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) return true; + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return true; return efi_is_64bit(); } @@ -252,18 +234,16 @@ static inline bool efi_is_native(void) #define efi_call_early(f, ...) \ (efi_is_native() \ - ? ((efi_boot_services_t *)(unsigned long) \ - __efi_early()->boot_services)->f(__VA_ARGS__) \ - : efi64_thunk(((efi_boot_services_t *)(unsigned long) \ - __efi_early()->boot_services)->mixed_mode.f, \ + ? efi_system_table()->boottime->f(__VA_ARGS__) \ + : efi64_thunk(efi_table_attr(efi_boot_services, \ + boottime, efi_system_table())->mixed_mode.f, \ __VA_ARGS__)) #define efi_call_runtime(f, ...) \ (efi_is_native() \ - ? ((efi_runtime_services_t *)(unsigned long) \ - __efi_early()->runtime_services)->f(__VA_ARGS__)\ - : efi64_thunk(((efi_runtime_services_t *)(unsigned long)\ - __efi_early()->runtime_services)->mixed_mode.f, \ + ? efi_system_table()->runtime->f(__VA_ARGS__) \ + : efi64_thunk(efi_table_attr(efi_runtime_services, \ + runtime, efi_system_table())->mixed_mode.f, \ __VA_ARGS__)) extern bool efi_reboot_required(void); -- cgit v1.2.3-59-g8ed1b From 8173ec7905b5b07c989b06a105d171c169dde93b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:18 +0100 Subject: efi/libstub: Drop sys_table_arg from printk routines As a first step towards getting rid of the need to pass around a function parameter 'sys_table_arg' pointing to the EFI system table, remove the references to it in the printing code, which is represents the majority of the use cases. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-19-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 28 ++++++++++----------- drivers/firmware/efi/libstub/arm-stub.c | 28 ++++++++++----------- drivers/firmware/efi/libstub/arm32-stub.c | 14 +++++------ drivers/firmware/efi/libstub/arm64-stub.c | 12 ++++----- drivers/firmware/efi/libstub/efi-stub-helper.c | 34 +++++++++++++------------- drivers/firmware/efi/libstub/efistub.h | 9 ++++--- drivers/firmware/efi/libstub/fdt.c | 16 ++++++------ drivers/firmware/efi/libstub/secureboot.c | 4 +-- drivers/firmware/efi/libstub/tpm.c | 3 +-- include/linux/efi.h | 2 +- 10 files changed, 74 insertions(+), 76 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2a7c6d58cc56..1e072d487833 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -57,7 +57,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, (void **)&rom); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'rom'\n"); + efi_printk("Failed to allocate memory for 'rom'\n"); return status; } @@ -74,7 +74,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) &rom->vendor); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->vendor\n"); + efi_printk("Failed to read rom->vendor\n"); goto free_struct; } @@ -83,7 +83,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) &rom->devid); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->devid\n"); + efi_printk("Failed to read rom->devid\n"); goto free_struct; } @@ -132,7 +132,7 @@ static void setup_efi_pci(struct boot_params *params) size, (void **)&pci_handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n"); + efi_printk("Failed to allocate memory for 'pci_handle'\n"); return; } @@ -187,7 +187,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) return; if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) { - efi_printk(sys_table, "Unsupported properties proto version\n"); + efi_printk("Unsupported properties proto version\n"); return; } @@ -200,7 +200,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) size + sizeof(struct setup_data), (void **)&new); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'properties'\n"); + efi_printk("Failed to allocate memory for 'properties'\n"); return; } @@ -386,14 +386,14 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, status = efi_call_early(handle_protocol, handle, &proto, (void *)&image); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); + efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); return status; } status = efi_low_alloc(sys_table, 0x4000, 1, (unsigned long *)&boot_params); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); + efi_printk("Failed to allocate lowmem for boot params\n"); return status; } @@ -441,7 +441,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (status != EFI_SUCCESS && hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { - efi_printk(sys_table, "Trying to load files to higher address\n"); + efi_printk("Trying to load files to higher address\n"); status = handle_cmdline_files(sys_table, image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", -1UL, @@ -773,7 +773,7 @@ struct boot_params *efi_main(efi_handle_t handle, status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), (void **)&gdt); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n"); + efi_printk("Failed to allocate memory for 'gdt' structure\n"); goto fail; } @@ -781,7 +781,7 @@ struct boot_params *efi_main(efi_handle_t handle, status = efi_low_alloc(sys_table, gdt->size, 8, (unsigned long *)&gdt->address); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n"); + efi_printk("Failed to allocate memory for 'gdt'\n"); goto fail; } @@ -797,7 +797,7 @@ struct boot_params *efi_main(efi_handle_t handle, hdr->kernel_alignment, LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); + efi_printk("efi_relocate_kernel() failed!\n"); goto fail; } @@ -807,7 +807,7 @@ struct boot_params *efi_main(efi_handle_t handle, status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "exit_boot() failed!\n"); + efi_printk("exit_boot() failed!\n"); goto fail; } @@ -900,7 +900,7 @@ struct boot_params *efi_main(efi_handle_t handle, return boot_params; fail: - efi_printk(sys_table, "efi_main() failed!\n"); + efi_printk("efi_main() failed!\n"); for (;;) asm("hlt"); diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index b44b1bd4d480..2ed4bd457e66 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -73,7 +73,7 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), (void **)&rsv); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to allocate memreserve entry!\n"); + pr_efi_err("Failed to allocate memreserve entry!\n"); return; } @@ -85,7 +85,7 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) &memreserve_table_guid, rsv); if (status != EFI_SUCCESS) - pr_efi_err(sys_table_arg, "Failed to install memreserve config table!\n"); + pr_efi_err("Failed to install memreserve config table!\n"); } @@ -147,13 +147,13 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, status = sys_table->boottime->handle_protocol(handle, &loaded_image_proto, (void *)&image); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to get loaded image protocol\n"); + pr_efi_err("Failed to get loaded image protocol\n"); goto fail; } dram_base = get_dram_base(sys_table); if (dram_base == EFI_ERROR) { - pr_efi_err(sys_table, "Failed to find DRAM base\n"); + pr_efi_err("Failed to find DRAM base\n"); goto fail; } @@ -164,7 +164,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, */ cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size); if (!cmdline_ptr) { - pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n"); + pr_efi_err("getting command line via LOADED_IMAGE_PROTOCOL\n"); goto fail; } @@ -176,7 +176,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0) efi_parse_options(cmdline_ptr); - pr_efi(sys_table, "Booting Linux Kernel...\n"); + pr_efi("Booting Linux Kernel...\n"); si = setup_graphics(sys_table); @@ -185,7 +185,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, &reserve_size, dram_base, image); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to relocate kernel\n"); + pr_efi_err("Failed to relocate kernel\n"); goto fail_free_cmdline; } @@ -204,29 +204,29 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) || secure_boot != efi_secureboot_mode_disabled) { if (strstr(cmdline_ptr, "dtb=")) - pr_efi(sys_table, "Ignoring DTB from command line.\n"); + pr_efi("Ignoring DTB from command line.\n"); } else { status = handle_cmdline_files(sys_table, image, cmdline_ptr, "dtb=", ~0UL, &fdt_addr, &fdt_size); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to load device tree!\n"); + pr_efi_err("Failed to load device tree!\n"); goto fail_free_image; } } if (fdt_addr) { - pr_efi(sys_table, "Using DTB from command line\n"); + pr_efi("Using DTB from command line\n"); } else { /* Look for a device tree configuration table entry. */ fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size); if (fdt_addr) - pr_efi(sys_table, "Using DTB from configuration table\n"); + pr_efi("Using DTB from configuration table\n"); } if (!fdt_addr) - pr_efi(sys_table, "Generating empty DTB\n"); + pr_efi("Generating empty DTB\n"); status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=", efi_get_max_initrd_addr(dram_base, @@ -234,7 +234,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, (unsigned long *)&initrd_addr, (unsigned long *)&initrd_size); if (status != EFI_SUCCESS) - pr_efi_err(sys_table, "Failed initrd from command line!\n"); + pr_efi_err("Failed initrd from command line!\n"); efi_random_get_seed(sys_table); @@ -275,7 +275,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (status == EFI_SUCCESS) return new_fdt_addr; - pr_efi_err(sys_table, "Failed to update FDT and exit boot services\n"); + pr_efi_err("Failed to update FDT and exit boot services\n"); efi_free(sys_table, initrd_size, initrd_addr); efi_free(sys_table, fdt_size, fdt_addr); diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 4566640de650..7b5c717ddfac 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -18,7 +18,7 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) /* LPAE kernels need compatible hardware */ block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); if (block < 5) { - pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n"); + pr_efi_err("This LPAE kernel is not supported by your CPU\n"); return EFI_UNSUPPORTED; } return EFI_SUCCESS; @@ -121,8 +121,7 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, */ status = efi_get_memory_map(sys_table_arg, &map); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, - "reserve_kernel_base(): Unable to retrieve memory map.\n"); + pr_efi_err("reserve_kernel_base(): Unable to retrieve memory map.\n"); return status; } @@ -164,8 +163,7 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, (end - start) / EFI_PAGE_SIZE, &start); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, - "reserve_kernel_base(): alloc failed.\n"); + pr_efi_err("reserve_kernel_base(): alloc failed.\n"); goto out; } break; @@ -224,7 +222,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, status = reserve_kernel_base(sys_table, kernel_base, reserve_addr, reserve_size); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n"); + pr_efi_err("Unable to allocate memory for uncompressed kernel.\n"); return status; } @@ -237,7 +235,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, *image_size, kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to relocate kernel.\n"); + pr_efi_err("Failed to relocate kernel.\n"); efi_free(sys_table, *reserve_size, *reserve_addr); *reserve_size = 0; return status; @@ -249,7 +247,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, * address at which the zImage is loaded. */ if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) { - pr_efi_err(sys_table, "Failed to relocate kernel, no low memory available.\n"); + pr_efi_err("Failed to relocate kernel, no low memory available.\n"); efi_free(sys_table, *reserve_size, *reserve_addr); *reserve_size = 0; efi_free(sys_table, *image_size, *image_addr); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 1550d244e996..b09dda600c78 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -32,9 +32,9 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) - pr_efi_err(sys_table_arg, "This 64 KB granular kernel is not supported by your CPU\n"); + pr_efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else - pr_efi_err(sys_table_arg, "This 16 KB granular kernel is not supported by your CPU\n"); + pr_efi_err("This 16 KB granular kernel is not supported by your CPU\n"); return EFI_UNSUPPORTED; } return EFI_SUCCESS; @@ -60,13 +60,13 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, sizeof(phys_seed), (u8 *)&phys_seed); if (status == EFI_NOT_FOUND) { - pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); + pr_efi("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); } else if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n"); + pr_efi_err("efi_get_random_bytes() failed\n"); return status; } } else { - pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n"); + pr_efi("KASLR disabled on kernel command line\n"); } } @@ -143,7 +143,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, MIN_KIMG_ALIGN, reserve_addr); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); + pr_efi_err("Failed to relocate kernel\n"); *reserve_size = 0; return status; } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f74b514789d7..9f0d332954ab 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -58,7 +58,7 @@ struct file_info { u64 size; }; -void efi_printk(efi_system_table_t *sys_table_arg, char *str) +void efi_printk(char *str) { char *s8; @@ -68,10 +68,10 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str) ch[0] = *s8; if (*s8 == '\n') { efi_char16_t nl[2] = { '\r', 0 }; - efi_char16_printk(sys_table_arg, nl); + efi_char16_printk(nl); } - efi_char16_printk(sys_table_arg, ch); + efi_char16_printk(ch); } } @@ -372,9 +372,9 @@ static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, 0); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open file: "); - efi_char16_printk(sys_table_arg, filename_16); - efi_printk(sys_table_arg, "\n"); + efi_printk("Failed to open file: "); + efi_char16_printk(filename_16); + efi_printk("\n"); return status; } @@ -383,7 +383,7 @@ static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, info_sz = 0; status = h->get_info(h, &info_guid, &info_sz, NULL); if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table_arg, "Failed to get file info size\n"); + efi_printk("Failed to get file info size\n"); return status; } @@ -391,7 +391,7 @@ grow: status = efi_call_early(allocate_pool, EFI_LOADER_DATA, info_sz, (void **)&info); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); + efi_printk("Failed to alloc mem for file info\n"); return status; } @@ -405,7 +405,7 @@ grow: efi_call_early(free_pool, info); if (status != EFI_SUCCESS) - efi_printk(sys_table_arg, "Failed to get initrd info\n"); + efi_printk("Failed to get initrd info\n"); return status; } @@ -434,13 +434,13 @@ static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, status = efi_call_early(handle_protocol, handle, &fs_proto, (void **)&io); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to handle fs_proto\n"); + efi_printk("Failed to handle fs_proto\n"); return status; } status = io->open_volume(io, &fh); if (status != EFI_SUCCESS) - efi_printk(sys_table_arg, "Failed to open volume\n"); + efi_printk("Failed to open volume\n"); else *__fh = fh; @@ -569,7 +569,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, status = efi_call_early(allocate_pool, EFI_LOADER_DATA, nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to alloc mem for file handle list\n"); + pr_efi_err("Failed to alloc mem for file handle list\n"); goto fail; } @@ -632,13 +632,13 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, status = efi_high_alloc(sys_table_arg, file_size_total, 0x1000, &file_addr, max_addr); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to alloc highmem for files\n"); + pr_efi_err("Failed to alloc highmem for files\n"); goto close_handles; } /* We've run out of free low memory. */ if (file_addr > max_addr) { - pr_efi_err(sys_table_arg, "We've run out of free low memory\n"); + pr_efi_err("We've run out of free low memory\n"); status = EFI_INVALID_PARAMETER; goto free_file_total; } @@ -660,7 +660,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, &chunksize, (void *)addr); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to read file\n"); + pr_efi_err("Failed to read file\n"); goto free_file_total; } addr += chunksize; @@ -746,7 +746,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, alignment, &new_addr, min_addr); } if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n"); + pr_efi_err("Failed to allocate usable memory for kernel.\n"); return status; } @@ -956,7 +956,7 @@ void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid) return NULL; } -void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +void efi_char16_printk(efi_char16_t *str) { efi_call_proto(efi_simple_text_output_protocol, output_string, diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index e6775c16a97d..4a6acd28ce65 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -31,13 +31,14 @@ extern int __pure novamap(void); extern __pure efi_system_table_t *efi_system_table(void); -#define pr_efi(sys_table, msg) do { \ - if (!is_quiet()) efi_printk(sys_table, "EFI stub: "msg); \ +#define pr_efi(msg) do { \ + if (!is_quiet()) efi_printk("EFI stub: "msg); \ } while (0) -#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg) +#define pr_efi_err(msg) efi_printk("EFI stub: ERROR: "msg) -void efi_char16_printk(efi_system_table_t *, efi_char16_t *); +void efi_char16_printk(efi_char16_t *); +void efi_char16_printk(efi_char16_t *); unsigned long get_dram_base(efi_system_table_t *sys_table_arg); diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 0bf0190917e0..1bf7edfd81ec 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -40,7 +40,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, /* Do some checks on provided FDT, if it exists: */ if (orig_fdt) { if (fdt_check_header(orig_fdt)) { - pr_efi_err(sys_table, "Device Tree header not valid!\n"); + pr_efi_err("Device Tree header not valid!\n"); return EFI_LOAD_ERROR; } /* @@ -48,7 +48,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, * configuration table: */ if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) { - pr_efi_err(sys_table, "Truncated device tree! foo!\n"); + pr_efi_err("Truncated device tree! foo!\n"); return EFI_LOAD_ERROR; } } @@ -277,17 +277,17 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, */ status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); + pr_efi_err("Unable to retrieve UEFI memory map.\n"); return status; } - pr_efi(sys_table, "Exiting boot services and installing virtual address map...\n"); + pr_efi("Exiting boot services and installing virtual address map...\n"); map.map = &memory_map; status = efi_high_alloc(sys_table, MAX_FDT_SIZE, EFI_FDT_ALIGN, new_fdt_addr, max_addr); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n"); + pr_efi_err("Unable to allocate memory for new device tree.\n"); goto fail; } @@ -304,7 +304,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, initrd_addr, initrd_size); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Unable to construct new device tree.\n"); + pr_efi_err("Unable to construct new device tree.\n"); goto fail_free_new_fdt; } @@ -350,7 +350,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, return EFI_SUCCESS; } - pr_efi_err(sys_table, "Exit boot services failed.\n"); + pr_efi_err("Exit boot services failed.\n"); fail_free_new_fdt: efi_free(sys_table, MAX_FDT_SIZE, *new_fdt_addr); @@ -371,7 +371,7 @@ void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) return NULL; if (fdt_check_header(fdt) != 0) { - pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n"); + pr_efi_err("Invalid header detected on UEFI supplied FDT, ignoring ...\n"); return NULL; } *fdt_size = fdt_totalsize(fdt); diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index edba5e7a3743..9071b57da8fe 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -72,10 +72,10 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) return efi_secureboot_mode_disabled; secure_boot_enabled: - pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n"); + pr_efi("UEFI Secure Boot is enabled.\n"); return efi_secureboot_mode_enabled; out_efi_err: - pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n"); + pr_efi_err("Could not determine UEFI Secure Boot status.\n"); return efi_secureboot_mode_unknown; } diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index d270acd43de8..86658b005e85 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -131,8 +131,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) (void **) &log_tbl); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, - "Unable to allocate memory for event log\n"); + efi_printk("Unable to allocate memory for event log\n"); return; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 14dd08ecf8a7..5b207db6ead0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1493,7 +1493,7 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) /* prototypes shared between arch specific and generic stub code */ -void efi_printk(efi_system_table_t *sys_table_arg, char *str); +void efi_printk(char *str); void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long addr); -- cgit v1.2.3-59-g8ed1b From cd33a5c1d53e43bef1683c70dc3b68b6d9e8eca6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:19 +0100 Subject: efi/libstub: Remove 'sys_table_arg' from all function prototypes We have a helper efi_system_table() that gives us the address of the EFI system table in memory, so there is no longer point in passing it around from each function to the next. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-20-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 4 +- arch/arm64/include/asm/efi.h | 3 +- arch/x86/boot/compressed/eboot.c | 40 +++++++------- drivers/firmware/efi/libstub/arm-stub.c | 55 +++++++++---------- drivers/firmware/efi/libstub/arm32-stub.c | 26 ++++----- drivers/firmware/efi/libstub/arm64-stub.c | 12 ++--- drivers/firmware/efi/libstub/efi-stub-helper.c | 74 ++++++++++++-------------- drivers/firmware/efi/libstub/efistub.h | 17 +++--- drivers/firmware/efi/libstub/fdt.c | 37 ++++++------- drivers/firmware/efi/libstub/gop.c | 10 ++-- drivers/firmware/efi/libstub/random.c | 10 ++-- drivers/firmware/efi/libstub/secureboot.c | 2 +- drivers/firmware/efi/libstub/tpm.c | 7 ++- include/linux/efi.h | 43 ++++++--------- 14 files changed, 148 insertions(+), 192 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 555364b7bd2a..58e5acc424a0 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -60,8 +60,8 @@ void efi_virtmap_unload(void); #define efi_call_proto(protocol, f, instance, ...) \ instance->f(instance, ##__VA_ARGS__) -struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); -void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); +struct screen_info *alloc_screen_info(void); +void free_screen_info(struct screen_info *si); static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 9aa518d67588..d73693177f31 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -105,8 +105,7 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, #define alloc_screen_info(x...) &screen_info -static inline void free_screen_info(efi_system_table_t *sys_table_arg, - struct screen_info *si) +static inline void free_screen_info(struct screen_info *si) { } diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 1e072d487833..f81dd66626ce 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -340,7 +340,7 @@ void setup_graphics(struct boot_params *boot_params) EFI_LOCATE_BY_PROTOCOL, &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) - status = efi_setup_gop(NULL, si, &graphics_proto, size); + status = efi_setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; @@ -390,8 +390,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, return status; } - status = efi_low_alloc(sys_table, 0x4000, 1, - (unsigned long *)&boot_params); + status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate lowmem for boot params\n"); return status; @@ -416,7 +415,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ - cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); + cmdline_ptr = efi_convert_cmdline(image, &options_size); if (!cmdline_ptr) goto fail; @@ -434,7 +433,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (status != EFI_SUCCESS) goto fail2; - status = handle_cmdline_files(sys_table, image, + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", hdr->initrd_addr_max, &ramdisk_addr, &ramdisk_size); @@ -442,7 +441,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (status != EFI_SUCCESS && hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { efi_printk("Trying to load files to higher address\n"); - status = handle_cmdline_files(sys_table, image, + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", -1UL, &ramdisk_addr, &ramdisk_size); @@ -461,9 +460,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, /* not reached */ fail2: - efi_free(sys_table, options_size, hdr->cmd_line_ptr); + efi_free(options_size, hdr->cmd_line_ptr); fail: - efi_free(sys_table, 0x4000, (unsigned long)boot_params); + efi_free(0x4000, (unsigned long)boot_params); return status; } @@ -630,7 +629,7 @@ static efi_status_t allocate_e820(struct boot_params *params, boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) return status; @@ -652,8 +651,7 @@ struct exit_boot_struct { struct efi_info *efi; }; -static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map, +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) { const char *signature; @@ -663,14 +661,14 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, : EFI32_LOADER_SIGNATURE; memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); - p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_systab = (unsigned long)efi_system_table(); p->efi->efi_memdesc_size = *map->desc_size; p->efi->efi_memdesc_version = *map->desc_ver; p->efi->efi_memmap = (unsigned long)*map->map; p->efi->efi_memmap_size = *map->map_size; #ifdef CONFIG_X86_64 - p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_systab_hi = (unsigned long)efi_system_table() >> 32; p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; #endif @@ -702,8 +700,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return status; /* Might as well exit boot services now */ - status = efi_exit_boot_services(sys_table, handle, &map, &priv, - exit_boot_func); + status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); if (status != EFI_SUCCESS) return status; @@ -755,14 +752,14 @@ struct boot_params *efi_main(efi_handle_t handle, * otherwise we ask the BIOS. */ if (boot_params->secure_boot == efi_secureboot_mode_unset) - boot_params->secure_boot = efi_get_secureboot(sys_table); + boot_params->secure_boot = efi_get_secureboot(); /* Ask the firmware to clear memory on unclean shutdown */ - efi_enable_reset_attack_mitigation(sys_table); + efi_enable_reset_attack_mitigation(); - efi_random_get_seed(sys_table); + efi_random_get_seed(); - efi_retrieve_tpm2_eventlog(sys_table); + efi_retrieve_tpm2_eventlog(); setup_graphics(boot_params); @@ -778,8 +775,7 @@ struct boot_params *efi_main(efi_handle_t handle, } gdt->size = 0x800; - status = efi_low_alloc(sys_table, gdt->size, 8, - (unsigned long *)&gdt->address); + status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate memory for 'gdt'\n"); goto fail; @@ -791,7 +787,7 @@ struct boot_params *efi_main(efi_handle_t handle, */ if (hdr->pref_address != hdr->code32_start) { unsigned long bzimage_addr = hdr->code32_start; - status = efi_relocate_kernel(sys_table, &bzimage_addr, + status = efi_relocate_kernel(&bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, hdr->kernel_alignment, diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 2ed4bd457e66..e1ec0b2cde29 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -44,7 +44,7 @@ __pure efi_system_table_t *efi_system_table(void) return sys_table; } -static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg) +static struct screen_info *setup_graphics(void) { efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; efi_status_t status; @@ -56,15 +56,15 @@ static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg) status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL, &gop_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) { - si = alloc_screen_info(sys_table_arg); + si = alloc_screen_info(); if (!si) return NULL; - efi_setup_gop(sys_table_arg, si, &gop_proto, size); + efi_setup_gop(si, &gop_proto, size); } return si; } -void install_memreserve_table(efi_system_table_t *sys_table_arg) +void install_memreserve_table(void) { struct linux_efi_memreserve *rsv; efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; @@ -95,8 +95,7 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) * must be reserved. On failure it is required to free all * all allocations it has made. */ -efi_status_t handle_kernel_image(efi_system_table_t *sys_table, - unsigned long *image_addr, +efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, unsigned long *reserve_size, @@ -135,7 +134,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; - status = check_platform_features(sys_table); + status = check_platform_features(); if (status != EFI_SUCCESS) goto fail; @@ -151,7 +150,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, goto fail; } - dram_base = get_dram_base(sys_table); + dram_base = get_dram_base(); if (dram_base == EFI_ERROR) { pr_efi_err("Failed to find DRAM base\n"); goto fail; @@ -162,7 +161,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, * protocol. We are going to copy the command line into the * device tree, so this can be allocated anywhere. */ - cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size); + cmdline_ptr = efi_convert_cmdline(image, &cmdline_size); if (!cmdline_ptr) { pr_efi_err("getting command line via LOADED_IMAGE_PROTOCOL\n"); goto fail; @@ -178,9 +177,9 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, pr_efi("Booting Linux Kernel...\n"); - si = setup_graphics(sys_table); + si = setup_graphics(); - status = handle_kernel_image(sys_table, image_addr, &image_size, + status = handle_kernel_image(image_addr, &image_size, &reserve_addr, &reserve_size, dram_base, image); @@ -189,12 +188,12 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, goto fail_free_cmdline; } - efi_retrieve_tpm2_eventlog(sys_table); + efi_retrieve_tpm2_eventlog(); /* Ask the firmware to clear memory on unclean shutdown */ - efi_enable_reset_attack_mitigation(sys_table); + efi_enable_reset_attack_mitigation(); - secure_boot = efi_get_secureboot(sys_table); + secure_boot = efi_get_secureboot(); /* * Unauthenticated device tree data is a security hazard, so ignore @@ -206,8 +205,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (strstr(cmdline_ptr, "dtb=")) pr_efi("Ignoring DTB from command line.\n"); } else { - status = handle_cmdline_files(sys_table, image, cmdline_ptr, - "dtb=", + status = handle_cmdline_files(image, cmdline_ptr, "dtb=", ~0UL, &fdt_addr, &fdt_size); if (status != EFI_SUCCESS) { @@ -220,7 +218,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, pr_efi("Using DTB from command line\n"); } else { /* Look for a device tree configuration table entry. */ - fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size); + fdt_addr = (uintptr_t)get_fdt(&fdt_size); if (fdt_addr) pr_efi("Using DTB from configuration table\n"); } @@ -228,7 +226,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (!fdt_addr) pr_efi("Generating empty DTB\n"); - status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=", + status = handle_cmdline_files(image, cmdline_ptr, "initrd=", efi_get_max_initrd_addr(dram_base, *image_addr), (unsigned long *)&initrd_addr, @@ -236,7 +234,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, if (status != EFI_SUCCESS) pr_efi_err("Failed initrd from command line!\n"); - efi_random_get_seed(sys_table); + efi_random_get_seed(); /* hibernation expects the runtime regions to stay in the same place */ if (!IS_ENABLED(CONFIG_HIBERNATION) && !nokaslr()) { @@ -251,18 +249,17 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, EFI_RT_VIRTUAL_SIZE; u32 rnd; - status = efi_get_random_bytes(sys_table, sizeof(rnd), - (u8 *)&rnd); + status = efi_get_random_bytes(sizeof(rnd), (u8 *)&rnd); if (status == EFI_SUCCESS) { virtmap_base = EFI_RT_VIRTUAL_BASE + (((headroom >> 21) * rnd) >> (32 - 21)); } } - install_memreserve_table(sys_table); + install_memreserve_table(); new_fdt_addr = fdt_addr; - status = allocate_new_fdt_and_exit_boot(sys_table, handle, + status = allocate_new_fdt_and_exit_boot(handle, &new_fdt_addr, efi_get_max_fdt_addr(dram_base), initrd_addr, initrd_size, cmdline_ptr, fdt_addr, fdt_size); @@ -277,15 +274,15 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table_arg, pr_efi_err("Failed to update FDT and exit boot services\n"); - efi_free(sys_table, initrd_size, initrd_addr); - efi_free(sys_table, fdt_size, fdt_addr); + efi_free(initrd_size, initrd_addr); + efi_free(fdt_size, fdt_addr); fail_free_image: - efi_free(sys_table, image_size, *image_addr); - efi_free(sys_table, reserve_size, reserve_addr); + efi_free(image_size, *image_addr); + efi_free(reserve_size, reserve_addr); fail_free_cmdline: - free_screen_info(sys_table, si); - efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); + free_screen_info(si); + efi_free(cmdline_size, (unsigned long)cmdline_ptr); fail: return EFI_ERROR; } diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 7b5c717ddfac..e7a38d912749 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -7,7 +7,7 @@ #include "efistub.h" -efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +efi_status_t check_platform_features(void) { int block; @@ -26,7 +26,7 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; -struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg) +struct screen_info *alloc_screen_info(void) { struct screen_info *si; efi_status_t status; @@ -52,7 +52,7 @@ struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg) return NULL; } -void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si) +void free_screen_info(struct screen_info *si) { if (!si) return; @@ -61,8 +61,7 @@ void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si) efi_call_early(free_pool, si); } -static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, - unsigned long dram_base, +static efi_status_t reserve_kernel_base(unsigned long dram_base, unsigned long *reserve_addr, unsigned long *reserve_size) { @@ -119,7 +118,7 @@ static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, * released to the OS after ExitBootServices(), the decompressor can * safely overwrite them. */ - status = efi_get_memory_map(sys_table_arg, &map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) { pr_efi_err("reserve_kernel_base(): Unable to retrieve memory map.\n"); return status; @@ -190,8 +189,7 @@ out: return status; } -efi_status_t handle_kernel_image(efi_system_table_t *sys_table, - unsigned long *image_addr, +efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, unsigned long *reserve_size, @@ -219,8 +217,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, */ kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE; - status = reserve_kernel_base(sys_table, kernel_base, reserve_addr, - reserve_size); + status = reserve_kernel_base(kernel_base, reserve_addr, reserve_size); if (status != EFI_SUCCESS) { pr_efi_err("Unable to allocate memory for uncompressed kernel.\n"); return status; @@ -231,12 +228,11 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, * memory window. */ *image_size = image->image_size; - status = efi_relocate_kernel(sys_table, image_addr, *image_size, - *image_size, + status = efi_relocate_kernel(image_addr, *image_size, *image_size, kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); if (status != EFI_SUCCESS) { pr_efi_err("Failed to relocate kernel.\n"); - efi_free(sys_table, *reserve_size, *reserve_addr); + efi_free(*reserve_size, *reserve_addr); *reserve_size = 0; return status; } @@ -248,9 +244,9 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, */ if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) { pr_efi_err("Failed to relocate kernel, no low memory available.\n"); - efi_free(sys_table, *reserve_size, *reserve_addr); + efi_free(*reserve_size, *reserve_addr); *reserve_size = 0; - efi_free(sys_table, *image_size, *image_addr); + efi_free(*image_size, *image_addr); *image_size = 0; return EFI_LOAD_ERROR; } diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b09dda600c78..beba45e478c7 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -21,7 +21,7 @@ #include "efistub.h" -efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +efi_status_t check_platform_features(void) { u64 tg; @@ -40,8 +40,7 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) return EFI_SUCCESS; } -efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, +efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, unsigned long *reserve_size, @@ -56,8 +55,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { if (!nokaslr()) { - status = efi_get_random_bytes(sys_table_arg, - sizeof(phys_seed), + status = efi_get_random_bytes(sizeof(phys_seed), (u8 *)&phys_seed); if (status == EFI_NOT_FOUND) { pr_efi("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); @@ -108,7 +106,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, * locate the kernel at a randomized offset in physical memory. */ *reserve_size = kernel_memsize + offset; - status = efi_random_alloc(sys_table_arg, *reserve_size, + status = efi_random_alloc(*reserve_size, MIN_KIMG_ALIGN, reserve_addr, (u32)phys_seed); @@ -139,7 +137,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, if (status != EFI_SUCCESS) { *reserve_size = kernel_memsize + TEXT_OFFSET; - status = efi_low_alloc(sys_table_arg, *reserve_size, + status = efi_low_alloc(*reserve_size, MIN_KIMG_ALIGN, reserve_addr); if (status != EFI_SUCCESS) { diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 9f0d332954ab..d4215571f05a 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -84,8 +84,7 @@ static inline bool mmap_has_headroom(unsigned long buff_size, return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS; } -efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map) +efi_status_t efi_get_memory_map(struct efi_boot_memmap *map) { efi_memory_desc_t *m = NULL; efi_status_t status; @@ -135,7 +134,7 @@ fail: } -unsigned long get_dram_base(efi_system_table_t *sys_table_arg) +unsigned long get_dram_base(void) { efi_status_t status; unsigned long map_size, buff_size; @@ -151,7 +150,7 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg) boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) return membase; @@ -172,8 +171,7 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg) /* * Allocate at the highest possible address that is not above 'max'. */ -efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_high_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long max) { unsigned long map_size, desc_size, buff_size; @@ -191,7 +189,7 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) goto fail; @@ -271,8 +269,7 @@ fail: /* * Allocate at the lowest possible address that is not below 'min'. */ -efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min) { unsigned long map_size, desc_size, buff_size; @@ -289,7 +286,7 @@ efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) goto fail; @@ -348,8 +345,7 @@ fail: return status; } -void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, - unsigned long addr) +void efi_free(unsigned long size, unsigned long addr) { unsigned long nr_pages; @@ -360,9 +356,8 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, efi_call_early(free_pages, addr, nr_pages); } -static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, - efi_char16_t *filename_16, void **handle, - u64 *file_sz) +static efi_status_t efi_file_size(void *__fh, efi_char16_t *filename_16, + void **handle, u64 *file_sz) { efi_file_handle_t *h, *fh = __fh; efi_file_info_t *info; @@ -421,8 +416,7 @@ static efi_status_t efi_file_close(efi_file_handle_t *handle) return handle->close(handle); } -static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, +static efi_status_t efi_open_volume(efi_loaded_image_t *image, efi_file_handle_t **__fh) { efi_file_io_interface_t *io; @@ -516,8 +510,7 @@ efi_status_t efi_parse_options(char const *cmdline) * We only support loading a file from the same filesystem as * the kernel image. */ -efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, +efi_status_t handle_cmdline_files(efi_loaded_image_t *image, char *cmd_line, char *option_string, unsigned long max_addr, unsigned long *load_addr, @@ -608,13 +601,13 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, /* Only open the volume once. */ if (!i) { - status = efi_open_volume(sys_table_arg, image, &fh); + status = efi_open_volume(image, &fh); if (status != EFI_SUCCESS) goto free_files; } - status = efi_file_size(sys_table_arg, fh, filename_16, - (void **)&file->handle, &file->size); + status = efi_file_size(fh, filename_16, (void **)&file->handle, + &file->size); if (status != EFI_SUCCESS) goto close_handles; @@ -629,8 +622,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, * so allocate enough memory for all the files. This is used * for loading multiple files. */ - status = efi_high_alloc(sys_table_arg, file_size_total, 0x1000, - &file_addr, max_addr); + status = efi_high_alloc(file_size_total, 0x1000, &file_addr, + max_addr); if (status != EFI_SUCCESS) { pr_efi_err("Failed to alloc highmem for files\n"); goto close_handles; @@ -680,7 +673,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, return status; free_file_total: - efi_free(sys_table_arg, file_size_total, file_addr); + efi_free(file_size_total, file_addr); close_handles: for (k = j; k < i; k++) @@ -703,8 +696,7 @@ fail: * address is not available the lowest available address will * be used. */ -efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, +efi_status_t efi_relocate_kernel(unsigned long *image_addr, unsigned long image_size, unsigned long alloc_size, unsigned long preferred_addr, @@ -742,8 +734,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * possible. */ if (status != EFI_SUCCESS) { - status = efi_low_alloc_above(sys_table_arg, alloc_size, - alignment, &new_addr, min_addr); + status = efi_low_alloc_above(alloc_size, alignment, &new_addr, + min_addr); } if (status != EFI_SUCCESS) { pr_efi_err("Failed to allocate usable memory for kernel.\n"); @@ -820,8 +812,7 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) * Size of memory allocated return in *cmd_line_len. * Returns NULL on error. */ -char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, +char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len) { const u16 *s2; @@ -850,8 +841,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, options_bytes++; /* NUL termination */ - status = efi_high_alloc(sys_table_arg, options_bytes, 0, - &cmdline_addr, MAX_CMDLINE_ADDRESS); + status = efi_high_alloc(options_bytes, 0, &cmdline_addr, + MAX_CMDLINE_ADDRESS); if (status != EFI_SUCCESS) return NULL; @@ -873,20 +864,19 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, * specific structure may be passed to the function via priv. The client * function may be called multiple times. */ -efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, - void *handle, +efi_status_t efi_exit_boot_services(void *handle, struct efi_boot_memmap *map, void *priv, efi_exit_boot_map_processing priv_func) { efi_status_t status; - status = efi_get_memory_map(sys_table_arg, map); + status = efi_get_memory_map(map); if (status != EFI_SUCCESS) goto fail; - status = priv_func(sys_table_arg, map, priv); + status = priv_func(map, priv); if (status != EFI_SUCCESS) goto free_map; @@ -918,7 +908,7 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, if (status != EFI_SUCCESS) goto fail; - status = priv_func(sys_table_arg, map, priv); + status = priv_func(map, priv); /* exit_boot_services() was called, thus cannot free */ if (status != EFI_SUCCESS) goto fail; @@ -938,10 +928,12 @@ fail: return status; } -void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid) +void *get_efi_config_table(efi_guid_t guid) { - unsigned long tables = efi_table_attr(efi_system_table, tables, sys_table); - int nr_tables = efi_table_attr(efi_system_table, nr_tables, sys_table); + unsigned long tables = efi_table_attr(efi_system_table, tables, + efi_system_table()); + int nr_tables = efi_table_attr(efi_system_table, nr_tables, + efi_system_table()); int i; for (i = 0; i < nr_tables; i++) { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 4a6acd28ce65..b5d9c9e65213 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -40,10 +40,9 @@ extern __pure efi_system_table_t *efi_system_table(void); void efi_char16_printk(efi_char16_t *); void efi_char16_printk(efi_char16_t *); -unsigned long get_dram_base(efi_system_table_t *sys_table_arg); +unsigned long get_dram_base(void); -efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, - void *handle, +efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, unsigned long max_addr, u64 initrd_addr, u64 initrd_size, @@ -51,22 +50,20 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long fdt_addr, unsigned long fdt_size); -void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size); +void *get_fdt(unsigned long *fdt_size); void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count); -efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table, - unsigned long size, u8 *out); +efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); -efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed); -efi_status_t check_platform_features(efi_system_table_t *sys_table_arg); +efi_status_t check_platform_features(void); -void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid); +void *get_efi_config_table(efi_guid_t guid); /* Helper macros for the usual case of using simple C variables: */ #ifndef fdt_setprop_inplace_var diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 1bf7edfd81ec..0a91e5232127 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -16,7 +16,7 @@ #define EFI_DT_ADDR_CELLS_DEFAULT 2 #define EFI_DT_SIZE_CELLS_DEFAULT 2 -static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt) +static void fdt_update_cell_size(void *fdt) { int offset; @@ -27,8 +27,7 @@ static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt) fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT); } -static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, +static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size, void *fdt, int new_fdt_size, char *cmdline_ptr, u64 initrd_addr, u64 initrd_size) { @@ -62,7 +61,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, * Any failure from the following function is * non-critical: */ - fdt_update_cell_size(sys_table, fdt); + fdt_update_cell_size(fdt); } } @@ -111,7 +110,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, /* Add FDT entries for EFI runtime services in chosen node. */ node = fdt_subnode_offset(fdt, 0, "chosen"); - fdt_val64 = cpu_to_fdt64((u64)(unsigned long)sys_table); + fdt_val64 = cpu_to_fdt64((u64)(unsigned long)efi_system_table()); status = fdt_setprop_var(fdt, node, "linux,uefi-system-table", fdt_val64); if (status) @@ -140,7 +139,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { efi_status_t efi_status; - efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64), + efi_status = efi_get_random_bytes(sizeof(fdt_val64), (u8 *)&fdt_val64); if (efi_status == EFI_SUCCESS) { status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64); @@ -210,8 +209,7 @@ struct exit_boot_struct { void *new_fdt_addr; }; -static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map, +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) { struct exit_boot_struct *p = priv; @@ -244,8 +242,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, * with the final memory map in it. */ -efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, - void *handle, +efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, unsigned long max_addr, u64 initrd_addr, u64 initrd_size, @@ -275,7 +272,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * subsequent allocations adding entries, since they could not affect * the number of EFI_MEMORY_RUNTIME regions. */ - status = efi_get_memory_map(sys_table, &map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) { pr_efi_err("Unable to retrieve UEFI memory map.\n"); return status; @@ -284,7 +281,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi("Exiting boot services and installing virtual address map...\n"); map.map = &memory_map; - status = efi_high_alloc(sys_table, MAX_FDT_SIZE, EFI_FDT_ALIGN, + status = efi_high_alloc(MAX_FDT_SIZE, EFI_FDT_ALIGN, new_fdt_addr, max_addr); if (status != EFI_SUCCESS) { pr_efi_err("Unable to allocate memory for new device tree.\n"); @@ -295,11 +292,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * Now that we have done our final memory allocation (and free) * we can get the memory map key needed for exit_boot_services(). */ - status = efi_get_memory_map(sys_table, &map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) goto fail_free_new_fdt; - status = update_fdt(sys_table, (void *)fdt_addr, fdt_size, + status = update_fdt((void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr, initrd_addr, initrd_size); @@ -313,7 +310,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, priv.runtime_entry_count = &runtime_entry_count; priv.new_fdt_addr = (void *)*new_fdt_addr; - status = efi_exit_boot_services(sys_table, handle, &map, &priv, exit_boot_func); + status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; @@ -322,7 +319,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, return EFI_SUCCESS; /* Install the new virtual address map */ - svam = sys_table->runtime->set_virtual_address_map; + svam = efi_system_table()->runtime->set_virtual_address_map; status = svam(runtime_entry_count * desc_size, desc_size, desc_ver, runtime_map); @@ -353,19 +350,19 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi_err("Exit boot services failed.\n"); fail_free_new_fdt: - efi_free(sys_table, MAX_FDT_SIZE, *new_fdt_addr); + efi_free(MAX_FDT_SIZE, *new_fdt_addr); fail: - sys_table->boottime->free_pool(runtime_map); + efi_system_table()->boottime->free_pool(runtime_map); return EFI_LOAD_ERROR; } -void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) +void *get_fdt(unsigned long *fdt_size) { void *fdt; - fdt = get_efi_config_table(sys_table, DEVICE_TREE_GUID); + fdt = get_efi_config_table(DEVICE_TREE_GUID); if (!fdt) return NULL; diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 6c49d0a9aa3f..c3afe8d4a688 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -88,9 +88,8 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, #define efi_gop_attr(table, attr, instance) \ (efi_table_attr(efi_graphics_output_protocol##table, attr, instance)) -static efi_status_t -setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, - efi_guid_t *proto, unsigned long size, void **handles) +static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, + unsigned long size, void **handles) { efi_graphics_output_protocol_t *gop, *first_gop; u16 width, height; @@ -185,8 +184,7 @@ setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, /* * See if we have Graphics Output Protocol */ -efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, - struct screen_info *si, efi_guid_t *proto, +efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto, unsigned long size) { efi_status_t status; @@ -203,7 +201,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, if (status != EFI_SUCCESS) goto free_handle; - status = setup_gop(sys_table_arg, si, proto, size, gop_handle); + status = setup_gop(si, proto, size, gop_handle); free_handle: efi_call_early(free_pool, gop_handle); diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 4f5c249c62dc..9b30d953d13b 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -26,8 +26,7 @@ union efi_rng_protocol { } mixed_mode; }; -efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, - unsigned long size, u8 *out) +efi_status_t efi_get_random_bytes(unsigned long size, u8 *out) { efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; efi_status_t status; @@ -79,8 +78,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, */ #define MD_NUM_SLOTS(md) ((md)->virt_addr) -efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, +efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed) @@ -99,7 +97,7 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, map.key_ptr = NULL; map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) return status; @@ -155,7 +153,7 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, return status; } -efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg) +efi_status_t efi_random_get_seed(void) { efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW; diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 9071b57da8fe..0935d824a6ab 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -32,7 +32,7 @@ static const efi_char16_t shim_MokSBState_name[] = L"MokSBState"; * Please keep the logic in sync with * arch/x86/xen/efi.c:xen_efi_get_secureboot(). */ -enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) +enum efi_secureboot_mode efi_get_secureboot(void) { u32 attr; u8 secboot, setupmode, moksbstate; diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index 86658b005e85..f6fa1c9de77c 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -36,7 +36,7 @@ static const efi_char16_t efi_MemoryOverWriteRequest_name[] = * are cleared. If userland has ensured that all secrets have been removed * from RAM before reboot it can simply reset this variable. */ -void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) +void efi_enable_reset_attack_mitigation(void) { u8 val = 1; efi_guid_t var_guid = MEMORY_ONLY_RESET_CONTROL_GUID; @@ -57,7 +57,7 @@ void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) #endif -void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) +void efi_retrieve_tpm2_eventlog(void) { efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID; efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID; @@ -139,8 +139,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg) * Figure out whether any events have already been logged to the * final events structure, and if so how much space they take up */ - final_events_table = get_efi_config_table(sys_table_arg, - LINUX_EFI_TPM_FINAL_LOG_GUID); + final_events_table = get_efi_config_table(LINUX_EFI_TPM_FINAL_LOG_GUID); if (final_events_table && final_events_table->nr_events) { struct tcg_pcr_event2_head *header; int offset; diff --git a/include/linux/efi.h b/include/linux/efi.h index 5b207db6ead0..726673e98990 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1495,22 +1495,17 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) void efi_printk(char *str); -void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, - unsigned long addr); +void efi_free(unsigned long size, unsigned long addr); -char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, int *cmd_line_len); +char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len); -efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map); +efi_status_t efi_get_memory_map(struct efi_boot_memmap *map); -efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); static inline -efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_low_alloc(unsigned long size, unsigned long align, unsigned long *addr) { /* @@ -1518,23 +1513,20 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, * checks pointers against NULL. Skip the first 8 * bytes so we start at a nice even number. */ - return efi_low_alloc_above(sys_table_arg, size, align, addr, 0x8); + return efi_low_alloc_above(size, align, addr, 0x8); } -efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_high_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long max); -efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, +efi_status_t efi_relocate_kernel(unsigned long *image_addr, unsigned long image_size, unsigned long alloc_size, unsigned long preferred_addr, unsigned long alignment, unsigned long min_addr); -efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, +efi_status_t handle_cmdline_files(efi_loaded_image_t *image, char *cmd_line, char *option_string, unsigned long max_addr, unsigned long *load_addr, @@ -1542,8 +1534,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char const *cmdline); -efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, - struct screen_info *si, efi_guid_t *proto, +efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto, unsigned long size); #ifdef CONFIG_EFI @@ -1561,18 +1552,18 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); +enum efi_secureboot_mode efi_get_secureboot(void); #ifdef CONFIG_RESET_ATTACK_MITIGATION -void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg); +void efi_enable_reset_attack_mitigation(void); #else static inline void -efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) { } +efi_enable_reset_attack_mitigation(void) { } #endif -efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg); +efi_status_t efi_random_get_seed(void); -void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); +void efi_retrieve_tpm2_eventlog(void); /* * Arch code can implement the following three template macros, avoiding @@ -1624,12 +1615,10 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); }) typedef efi_status_t (*efi_exit_boot_map_processing)( - efi_system_table_t *sys_table_arg, struct efi_boot_memmap *map, void *priv); -efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, - void *handle, +efi_status_t efi_exit_boot_services(void *handle, struct efi_boot_memmap *map, void *priv, efi_exit_boot_map_processing priv_func); -- cgit v1.2.3-59-g8ed1b From 23e60394046a831d3245f83c0f5d46dee7d83326 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:20 +0100 Subject: efi/libstub/x86: Work around page freeing issue in mixed mode Mixed mode translates calls from the 64-bit kernel into the 32-bit firmware by wrapping them in a call to a thunking routine that pushes a 32-bit word onto the stack for each argument passed to the function, regardless of the argument type. This works surprisingly well for most services and protocols, with the exception of ones that take explicit 64-bit arguments. efi_free() invokes the FreePages() EFI boot service, which takes a efi_physical_addr_t as its address argument, and this is one of those 64-bit types. This means that the 32-bit firmware will interpret the (addr, size) pair as a single 64-bit quantity, and since it is guaranteed to have the high word set (as size > 0), it will always fail due to the fact that EFI memory allocations are always < 4 GB on 32-bit firmware. So let's fix this by giving the thunking code a little hand, and pass two values for the address, and a third one for the size. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-21-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 16 ++++++++++++++++ drivers/firmware/efi/libstub/efi-stub-helper.c | 5 ++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f81dd66626ce..ec92c4decc86 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -901,3 +901,19 @@ fail: for (;;) asm("hlt"); } + +#ifdef CONFIG_EFI_MIXED +void efi_free_native(unsigned long size, unsigned long addr); + +void efi_free(unsigned long size, unsigned long addr) +{ + if (!size) + return; + + if (efi_is_native()) + efi_free_native(size, addr); + else + efi64_thunk(efi_system_table()->boottime->mixed_mode.free_pages, + addr, 0, DIV_ROUND_UP(size, EFI_PAGE_SIZE)); +} +#endif diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index d4215571f05a..b715ac6a0c94 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -346,6 +346,9 @@ fail: } void efi_free(unsigned long size, unsigned long addr) + __weak __alias(efi_free_native); + +void efi_free_native(unsigned long size, unsigned long addr) { unsigned long nr_pages; @@ -353,7 +356,7 @@ void efi_free(unsigned long size, unsigned long addr) return; nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; - efi_call_early(free_pages, addr, nr_pages); + efi_system_table()->boottime->free_pages(addr, nr_pages); } static efi_status_t efi_file_size(void *__fh, efi_char16_t *filename_16, -- cgit v1.2.3-59-g8ed1b From 47c0fd39b7b81f51cc8f767c34a57d12289bdc60 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:21 +0100 Subject: efi/libstub: Drop protocol argument from efi_call_proto() macro After refactoring the mixed mode support code, efi_call_proto() no longer uses its protocol argument in any of its implementation, so let's remove it altogether. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-22-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 3 +-- arch/arm64/include/asm/efi.h | 3 +-- arch/x86/boot/compressed/eboot.c | 23 +++++++++-------------- arch/x86/include/asm/efi.h | 6 +++--- drivers/firmware/efi/libstub/efi-stub-helper.c | 6 ++---- drivers/firmware/efi/libstub/random.c | 8 ++++---- drivers/firmware/efi/libstub/tpm.c | 11 +++++------ 7 files changed, 25 insertions(+), 35 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 58e5acc424a0..bdc5288cc643 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -57,8 +57,7 @@ void efi_virtmap_unload(void); #define efi_table_attr(table, attr, instance) \ instance->attr -#define efi_call_proto(protocol, f, instance, ...) \ - instance->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) struct screen_info *alloc_screen_info(void); void free_screen_info(struct screen_info *si); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d73693177f31..4bc1e89671ab 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -100,8 +100,7 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, #define efi_table_attr(table, attr, instance) \ instance->attr -#define efi_call_proto(protocol, f, instance, ...) \ - instance->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) #define alloc_screen_info(x...) &screen_info diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ec92c4decc86..751fd5fc3367 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -69,27 +69,24 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) rom->pcilen = pci->romsize; *__rom = rom; - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_VENDOR_ID, 1, - &rom->vendor); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &rom->vendor); if (status != EFI_SUCCESS) { efi_printk("Failed to read rom->vendor\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_DEVICE_ID, 1, - &rom->devid); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &rom->devid); if (status != EFI_SUCCESS) { efi_printk("Failed to read rom->devid\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, get_location, pci, - &rom->segment, &rom->bus, &rom->device, - &rom->function); + status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus, + &rom->device, &rom->function); if (status != EFI_SUCCESS) goto free_struct; @@ -191,7 +188,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) return; } - efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size); + efi_call_proto(p, get_all, NULL, &size); if (!size) return; @@ -204,8 +201,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) return; } - status = efi_call_proto(apple_properties_protocol, get_all, p, - new->data, &size); + status = efi_call_proto(p, get_all, new->data, &size); if (status == EFI_BUFFER_TOO_SMALL) efi_call_early(free_pool, new); @@ -280,8 +276,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) pciio = NULL; efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); - status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga, - &w, &h, &depth, &refresh); + status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { width = w; height = h; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1817f350618e..b7cd14e3a634 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -227,10 +227,10 @@ static inline bool efi_is_native(void) __ret; \ }) -#define efi_call_proto(protocol, f, instance, ...) \ +#define efi_call_proto(inst, func, ...) \ (efi_is_native() \ - ? instance->f(instance, ##__VA_ARGS__) \ - : efi64_thunk(instance->mixed_mode.f, instance, ##__VA_ARGS__)) + ? inst->func(inst, ##__VA_ARGS__) \ + : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) #define efi_call_early(f, ...) \ (efi_is_native() \ diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index b715ac6a0c94..48eab7b9d066 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -953,9 +953,7 @@ void *get_efi_config_table(efi_guid_t guid) void efi_char16_printk(efi_char16_t *str) { - efi_call_proto(efi_simple_text_output_protocol, - output_string, - efi_table_attr(efi_system_table, con_out, + efi_call_proto(efi_table_attr(efi_system_table, con_out, efi_system_table()), - str); + output_string, str); } diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 9b30d953d13b..fbd5b5724b19 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -37,7 +37,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out) if (status != EFI_SUCCESS) return status; - return efi_call_proto(efi_rng_protocol, get_rng, rng, NULL, size, out); + return efi_call_proto(rng, get_rng, NULL, size, out); } /* @@ -173,7 +173,7 @@ efi_status_t efi_random_get_seed(void) if (status != EFI_SUCCESS) return status; - status = efi_call_proto(efi_rng_protocol, get_rng, rng, &rng_algo_raw, + status = efi_call_proto(rng, get_rng, &rng_algo_raw, EFI_RANDOM_SEED_SIZE, seed->bits); if (status == EFI_UNSUPPORTED) @@ -181,8 +181,8 @@ efi_status_t efi_random_get_seed(void) * Use whatever algorithm we have available if the raw algorithm * is not implemented. */ - status = efi_call_proto(efi_rng_protocol, get_rng, rng, NULL, - EFI_RANDOM_SEED_SIZE, seed->bits); + status = efi_call_proto(rng, get_rng, NULL, + EFI_RANDOM_SEED_SIZE, seed->bits); if (status != EFI_SUCCESS) goto err_freepool; diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index f6fa1c9de77c..4a0017a181bf 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -77,15 +77,14 @@ void efi_retrieve_tpm2_eventlog(void) if (status != EFI_SUCCESS) return; - status = efi_call_proto(efi_tcg2_protocol, get_event_log, - tcg2_protocol, version, &log_location, - &log_last_entry, &truncated); + status = efi_call_proto(tcg2_protocol, get_event_log, version, + &log_location, &log_last_entry, &truncated); if (status != EFI_SUCCESS || !log_location) { version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2; - status = efi_call_proto(efi_tcg2_protocol, get_event_log, - tcg2_protocol, version, &log_location, - &log_last_entry, &truncated); + status = efi_call_proto(tcg2_protocol, get_event_log, version, + &log_location, &log_last_entry, + &truncated); if (status != EFI_SUCCESS || !log_location) return; -- cgit v1.2.3-59-g8ed1b From 99ea8b1db2d23ac856bf3ee0673628df088a21ea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:22 +0100 Subject: efi/libstub: Drop 'table' argument from efi_table_attr() macro None of the definitions of the efi_table_attr() still refer to their 'table' argument so let's get rid of it entirely. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-23-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 3 +-- arch/arm64/include/asm/efi.h | 3 +-- arch/x86/boot/compressed/eboot.c | 8 ++++---- arch/x86/include/asm/efi.h | 25 +++++++++---------------- drivers/firmware/efi/libstub/efi-stub-helper.c | 11 ++++------- drivers/firmware/efi/libstub/gop.c | 9 +++------ 6 files changed, 22 insertions(+), 37 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index bdc5288cc643..bc720024a260 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -54,8 +54,7 @@ void efi_virtmap_unload(void); #define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) #define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - instance->attr +#define efi_table_attr(inst, attr) (inst->attr) #define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 4bc1e89671ab..6f041ae446d2 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -97,8 +97,7 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, #define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) #define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - instance->attr +#define efi_table_attr(inst, attr) (inst->attr) #define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 751fd5fc3367..cccd9e16b329 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -47,8 +47,8 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. */ - romimage = efi_table_attr(efi_pci_io_protocol, romimage, pci); - romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci); + romimage = efi_table_attr(pci, romimage); + romsize = efi_table_attr(pci, romsize); if (!romimage || !romsize || romsize > SZ_16M) return EFI_INVALID_PARAMETER; @@ -183,7 +183,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) if (status != EFI_SUCCESS) return; - if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) { + if (efi_table_attr(p, version) != 0x10000) { efi_printk("Unsupported properties proto version\n"); return; } @@ -226,7 +226,7 @@ static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor, sys_table); + efi_table_attr(efi_system_table(), fw_vendor); if (!memcmp(fw_vendor, apple, sizeof(apple))) { if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b7cd14e3a634..39814a0a92f7 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -216,16 +216,11 @@ static inline bool efi_is_native(void) __builtin_types_compatible_p(u32, __typeof__(attr)), \ (unsigned long)(attr), (attr)) -#define efi_table_attr(table, attr, instance) ({ \ - __typeof__(instance->attr) __ret; \ - if (efi_is_native()) { \ - __ret = instance->attr; \ - } else { \ - __ret = (__typeof__(__ret)) \ - efi_mixed_mode_cast(instance->mixed_mode.attr); \ - } \ - __ret; \ -}) +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) #define efi_call_proto(inst, func, ...) \ (efi_is_native() \ @@ -235,16 +230,14 @@ static inline bool efi_is_native(void) #define efi_call_early(f, ...) \ (efi_is_native() \ ? efi_system_table()->boottime->f(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_boot_services, \ - boottime, efi_system_table())->mixed_mode.f, \ - __VA_ARGS__)) + : efi64_thunk(efi_table_attr(efi_system_table(), \ + boottime)->mixed_mode.f, __VA_ARGS__)) #define efi_call_runtime(f, ...) \ (efi_is_native() \ ? efi_system_table()->runtime->f(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_runtime_services, \ - runtime, efi_system_table())->mixed_mode.f, \ - __VA_ARGS__)) + : efi64_thunk(efi_table_attr(efi_system_table(), \ + runtime)->mixed_mode.f, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 48eab7b9d066..8754ec04788b 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -933,17 +933,15 @@ fail: void *get_efi_config_table(efi_guid_t guid) { - unsigned long tables = efi_table_attr(efi_system_table, tables, - efi_system_table()); - int nr_tables = efi_table_attr(efi_system_table, nr_tables, - efi_system_table()); + unsigned long tables = efi_table_attr(efi_system_table(), tables); + int nr_tables = efi_table_attr(efi_system_table(), nr_tables); int i; for (i = 0; i < nr_tables; i++) { efi_config_table_t *t = (void *)tables; if (efi_guidcmp(t->guid, guid) == 0) - return efi_table_attr(efi_config_table, table, t); + return efi_table_attr(t, table); tables += efi_is_native() ? sizeof(efi_config_table_t) : sizeof(efi_config_table_32_t); @@ -953,7 +951,6 @@ void *get_efi_config_table(efi_guid_t guid) void efi_char16_printk(efi_char16_t *str) { - efi_call_proto(efi_table_attr(efi_system_table, con_out, - efi_system_table()), + efi_call_proto(efi_table_attr(efi_system_table(), con_out), output_string, str); } diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index c3afe8d4a688..8f746282c219 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -85,9 +85,6 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, } } -#define efi_gop_attr(table, attr, instance) \ - (efi_table_attr(efi_graphics_output_protocol##table, attr, instance)) - static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, unsigned long size, void **handles) { @@ -123,9 +120,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - mode = (void *)(unsigned long)efi_gop_attr(, mode, gop); - info = (void *)(unsigned long)efi_gop_attr(_mode, info, mode); - current_fb_base = efi_gop_attr(_mode, frame_buffer_base, mode); + mode = efi_table_attr(gop, mode); + info = efi_table_attr(mode, info); + current_fb_base = efi_table_attr(mode, frame_buffer_base); if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { -- cgit v1.2.3-59-g8ed1b From 966291f6344d7eb6fc3204381a426bafa20a3d18 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:23 +0100 Subject: efi/libstub: Rename efi_call_early/_runtime macros to be more intuitive The macros efi_call_early and efi_call_runtime are used to call EFI boot services and runtime services, respectively. However, the naming is confusing, given that the early vs runtime distinction may suggest that these are used for calling the same set of services either early or late (== at runtime), while in reality, the sets of services they can be used with are completely disjoint, and efi_call_runtime is also only usable in 'early' code. So do a global sweep to replace all occurrences with efi_bs_call or efi_rt_call, respectively, where BS and RT match the idiom used by the UEFI spec to refer to boot time or runtime services. While at it, use 'func' as the macro parameter name for the function pointers, which is less likely to collide and cause weird build errors. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-24-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/efi.h | 6 +- arch/arm64/include/asm/efi.h | 6 +- arch/x86/boot/compressed/eboot.c | 77 ++++++++++++-------------- arch/x86/include/asm/efi.h | 12 ++-- drivers/firmware/efi/libstub/arm-stub.c | 13 ++--- drivers/firmware/efi/libstub/arm32-stub.c | 30 +++++----- drivers/firmware/efi/libstub/arm64-stub.c | 8 +-- drivers/firmware/efi/libstub/efi-stub-helper.c | 70 +++++++++++------------ drivers/firmware/efi/libstub/efistub.h | 8 +++ drivers/firmware/efi/libstub/gop.c | 17 +++--- drivers/firmware/efi/libstub/random.c | 23 ++++---- drivers/firmware/efi/libstub/secureboot.c | 5 -- drivers/firmware/efi/libstub/tpm.c | 25 +++------ 13 files changed, 137 insertions(+), 163 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index bc720024a260..5ac46e2860bc 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -50,9 +50,9 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) efi_system_table()->boottime->f(__VA_ARGS__) -#define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) -#define efi_is_native() (true) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) #define efi_table_attr(inst, attr) (inst->attr) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 6f041ae446d2..44531a69d32b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -93,9 +93,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_call_early(f, ...) efi_system_table()->boottime->f(__VA_ARGS__) -#define efi_call_runtime(f, ...) efi_system_table()->runtime->f(__VA_ARGS__) -#define efi_is_native() (true) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) #define efi_table_attr(inst, attr) (inst->attr) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index cccd9e16b329..da04948d75ed 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -54,8 +54,8 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) size = romsize + sizeof(*rom); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, - (void **)&rom); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&rom); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate memory for 'rom'\n"); return status; @@ -95,7 +95,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) return status; free_struct: - efi_call_early(free_pool, rom); + efi_bs_call(free_pool, rom); return status; } @@ -119,23 +119,20 @@ static void setup_efi_pci(struct boot_params *params) efi_handle_t h; int i; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &pci_proto, NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_call_early(allocate_pool, - EFI_LOADER_DATA, - size, (void **)&pci_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&pci_handle); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate memory for 'pci_handle'\n"); return; } - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); } if (status != EFI_SUCCESS) @@ -150,8 +147,8 @@ static void setup_efi_pci(struct boot_params *params) efi_pci_io_protocol_t *pci = NULL; struct pci_setup_rom *rom; - status = efi_call_early(handle_protocol, h, - &pci_proto, (void **)&pci); + status = efi_bs_call(handle_protocol, h, &pci_proto, + (void **)&pci); if (status != EFI_SUCCESS || !pci) continue; @@ -168,7 +165,7 @@ static void setup_efi_pci(struct boot_params *params) } free_handle: - efi_call_early(free_pool, pci_handle); + efi_bs_call(free_pool, pci_handle); } static void retrieve_apple_device_properties(struct boot_params *boot_params) @@ -179,7 +176,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) u32 size = 0; apple_properties_protocol_t *p; - status = efi_call_early(locate_protocol, &guid, NULL, (void **)&p); + status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p); if (status != EFI_SUCCESS) return; @@ -193,9 +190,9 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) return; do { - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size + sizeof(struct setup_data), - (void **)&new); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + size + sizeof(struct setup_data), + (void **)&new); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate memory for 'properties'\n"); return; @@ -204,7 +201,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) status = efi_call_proto(p, get_all, new->data, &size); if (status == EFI_BUFFER_TOO_SMALL) - efi_call_early(free_pool, new); + efi_bs_call(free_pool, new); } while (status == EFI_BUFFER_TOO_SMALL); new->type = SETUP_APPLE_PROPERTIES; @@ -248,14 +245,13 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) efi_handle_t handle; int i; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&uga_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&uga_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -268,13 +264,13 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) u32 w, h, depth, refresh; void *pciio; - status = efi_call_early(handle_protocol, handle, - uga_proto, (void **)&uga); + status = efi_bs_call(handle_protocol, handle, uga_proto, + (void **)&uga); if (status != EFI_SUCCESS) continue; pciio = NULL; - efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); + efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio); status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { @@ -312,7 +308,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) si->rsvd_pos = 24; free_handle: - efi_call_early(free_pool, uga_handle); + efi_bs_call(free_pool, uga_handle); return status; } @@ -331,17 +327,15 @@ void setup_graphics(struct boot_params *boot_params) memset(si, 0, sizeof(*si)); size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &graphics_proto, NULL, &size, gop_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) status = efi_setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); if (status == EFI_BUFFER_TOO_SMALL) setup_uga(si, &uga_proto, size); } @@ -378,8 +372,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) return EFI_INVALID_PARAMETER; - status = efi_call_early(handle_protocol, handle, - &proto, (void *)&image); + status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image); if (status != EFI_SUCCESS) { efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); return status; @@ -594,13 +587,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, sizeof(struct e820_entry) * nr_desc; if (*e820ext) { - efi_call_early(free_pool, *e820ext); + efi_bs_call(free_pool, *e820ext); *e820ext = NULL; *e820ext_size = 0; } - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)e820ext); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)e820ext); if (status == EFI_SUCCESS) *e820ext_size = size; @@ -762,8 +755,8 @@ struct boot_params *efi_main(efi_handle_t handle, setup_quirks(boot_params); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - sizeof(*gdt), (void **)&gdt); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), + (void **)&gdt); if (status != EFI_SUCCESS) { efi_printk("Failed to allocate memory for 'gdt' structure\n"); goto fail; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 39814a0a92f7..2d1378f19b74 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -227,17 +227,17 @@ static inline bool efi_is_native(void) ? inst->func(inst, ##__VA_ARGS__) \ : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) -#define efi_call_early(f, ...) \ +#define efi_bs_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->boottime->f(__VA_ARGS__) \ + ? efi_system_table()->boottime->func(__VA_ARGS__) \ : efi64_thunk(efi_table_attr(efi_system_table(), \ - boottime)->mixed_mode.f, __VA_ARGS__)) + boottime)->mixed_mode.func, __VA_ARGS__)) -#define efi_call_runtime(f, ...) \ +#define efi_rt_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->runtime->f(__VA_ARGS__) \ + ? efi_system_table()->runtime->func(__VA_ARGS__) \ : efi64_thunk(efi_table_attr(efi_system_table(), \ - runtime)->mixed_mode.f, __VA_ARGS__)) + runtime)->mixed_mode.func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index e1ec0b2cde29..62280df09dd4 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -53,8 +53,8 @@ static struct screen_info *setup_graphics(void) struct screen_info *si = NULL; size = 0; - status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL, - &gop_proto, NULL, &size, gop_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &gop_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) { si = alloc_screen_info(); if (!si) @@ -70,8 +70,8 @@ void install_memreserve_table(void) efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; efi_status_t status; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), - (void **)&rsv); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), + (void **)&rsv); if (status != EFI_SUCCESS) { pr_efi_err("Failed to allocate memreserve entry!\n"); return; @@ -81,9 +81,8 @@ void install_memreserve_table(void) rsv->size = 0; atomic_set(&rsv->count, 0); - status = efi_call_early(install_configuration_table, - &memreserve_table_guid, - rsv); + status = efi_bs_call(install_configuration_table, + &memreserve_table_guid, rsv); if (status != EFI_SUCCESS) pr_efi_err("Failed to install memreserve config table!\n"); } diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index e7a38d912749..7b2a6382b647 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -37,18 +37,18 @@ struct screen_info *alloc_screen_info(void) * its contents while we hand over to the kernel proper from the * decompressor. */ - status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA, - sizeof(*si), (void **)&si); + status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + sizeof(*si), (void **)&si); if (status != EFI_SUCCESS) return NULL; - status = efi_call_early(install_configuration_table, - &screen_info_guid, si); + status = efi_bs_call(install_configuration_table, + &screen_info_guid, si); if (status == EFI_SUCCESS) return si; - efi_call_early(free_pool, si); + efi_bs_call(free_pool, si); return NULL; } @@ -57,8 +57,8 @@ void free_screen_info(struct screen_info *si) if (!si) return; - efi_call_early(install_configuration_table, &screen_info_guid, NULL); - efi_call_early(free_pool, si); + efi_bs_call(install_configuration_table, &screen_info_guid, NULL); + efi_bs_call(free_pool, si); } static efi_status_t reserve_kernel_base(unsigned long dram_base, @@ -91,8 +91,8 @@ static efi_status_t reserve_kernel_base(unsigned long dram_base, */ alloc_addr = dram_base + MAX_UNCOMP_KERNEL_SIZE; nr_pages = MAX_UNCOMP_KERNEL_SIZE / EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, - EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, + EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr); if (status == EFI_SUCCESS) { if (alloc_addr == dram_base) { *reserve_addr = alloc_addr; @@ -156,11 +156,11 @@ static efi_status_t reserve_kernel_base(unsigned long dram_base, start = max(start, (u64)dram_base); end = min(end, (u64)dram_base + MAX_UNCOMP_KERNEL_SIZE); - status = efi_call_early(allocate_pages, - EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, - (end - start) / EFI_PAGE_SIZE, - &start); + status = efi_bs_call(allocate_pages, + EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, + (end - start) / EFI_PAGE_SIZE, + &start); if (status != EFI_SUCCESS) { pr_efi_err("reserve_kernel_base(): alloc failed.\n"); goto out; @@ -185,7 +185,7 @@ static efi_status_t reserve_kernel_base(unsigned long dram_base, status = EFI_SUCCESS; out: - efi_call_early(free_pool, memory_map); + efi_bs_call(free_pool, memory_map); return status; } diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index beba45e478c7..2915b44132e6 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -129,10 +129,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, *image_addr = *reserve_addr = preferred_offset; *reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN); - status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, - *reserve_size / EFI_PAGE_SIZE, - (efi_physical_addr_t *)reserve_addr); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, + *reserve_size / EFI_PAGE_SIZE, + (efi_physical_addr_t *)reserve_addr); } if (status != EFI_SUCCESS) { diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 8754ec04788b..ef0ffa512c20 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -95,19 +95,19 @@ efi_status_t efi_get_memory_map(struct efi_boot_memmap *map) *map->map_size = *map->desc_size * 32; *map->buff_size = *map->map_size; again: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - *map->map_size, (void **)&m); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + *map->map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; *map->desc_size = 0; key = 0; - status = efi_call_early(get_memory_map, map->map_size, m, - &key, map->desc_size, &desc_version); + status = efi_bs_call(get_memory_map, map->map_size, m, + &key, map->desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL || !mmap_has_headroom(*map->buff_size, *map->map_size, *map->desc_size)) { - efi_call_early(free_pool, m); + efi_bs_call(free_pool, m); /* * Make sure there is some entries of headroom so that the * buffer can be reused for a new map after allocations are @@ -121,7 +121,7 @@ again: } if (status != EFI_SUCCESS) - efi_call_early(free_pool, m); + efi_bs_call(free_pool, m); if (map->key_ptr && status == EFI_SUCCESS) *map->key_ptr = key; @@ -163,7 +163,7 @@ unsigned long get_dram_base(void) } } - efi_call_early(free_pool, map.map); + efi_bs_call(free_pool, map.map); return membase; } @@ -249,9 +249,8 @@ again: if (!max_addr) status = EFI_NOT_FOUND; else { - status = efi_call_early(allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &max_addr); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &max_addr); if (status != EFI_SUCCESS) { max = max_addr; max_addr = 0; @@ -261,7 +260,7 @@ again: *addr = max_addr; } - efi_call_early(free_pool, map); + efi_bs_call(free_pool, map); fail: return status; } @@ -328,9 +327,8 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, if ((start + size) > end) continue; - status = efi_call_early(allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &start); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &start); if (status == EFI_SUCCESS) { *addr = start; break; @@ -340,7 +338,7 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, if (i == map_size / desc_size) status = EFI_NOT_FOUND; - efi_call_early(free_pool, map); + efi_bs_call(free_pool, map); fail: return status; } @@ -386,8 +384,8 @@ static efi_status_t efi_file_size(void *__fh, efi_char16_t *filename_16, } grow: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, info_sz, + (void **)&info); if (status != EFI_SUCCESS) { efi_printk("Failed to alloc mem for file info\n"); return status; @@ -395,12 +393,12 @@ grow: status = h->get_info(h, &info_guid, &info_sz, info); if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_early(free_pool, info); + efi_bs_call(free_pool, info); goto grow; } *file_sz = info->file_size; - efi_call_early(free_pool, info); + efi_bs_call(free_pool, info); if (status != EFI_SUCCESS) efi_printk("Failed to get initrd info\n"); @@ -428,8 +426,7 @@ static efi_status_t efi_open_volume(efi_loaded_image_t *image, efi_status_t status; efi_handle_t handle = image->device_handle; - status = efi_call_early(handle_protocol, handle, - &fs_proto, (void **)&io); + status = efi_bs_call(handle_protocol, handle, &fs_proto, (void **)&io); if (status != EFI_SUCCESS) { efi_printk("Failed to handle fs_proto\n"); return status; @@ -562,8 +559,8 @@ efi_status_t handle_cmdline_files(efi_loaded_image_t *image, if (!nr_files) return EFI_SUCCESS; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - nr_files * sizeof(*files), (void **)&files); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { pr_efi_err("Failed to alloc mem for file handle list\n"); goto fail; @@ -668,7 +665,7 @@ efi_status_t handle_cmdline_files(efi_loaded_image_t *image, } - efi_call_early(free_pool, files); + efi_bs_call(free_pool, files); *load_addr = file_addr; *load_size = file_size_total; @@ -682,7 +679,7 @@ close_handles: for (k = j; k < i; k++) efi_file_close(files[k].handle); free_files: - efi_call_early(free_pool, files); + efi_bs_call(free_pool, files); fail: *load_addr = 0; *load_size = 0; @@ -728,9 +725,8 @@ efi_status_t efi_relocate_kernel(unsigned long *image_addr, * as possible while respecting the required alignment. */ nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &efi_addr); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &efi_addr); new_addr = efi_addr; /* * If preferred address allocation failed allocate as low as @@ -883,7 +879,7 @@ efi_status_t efi_exit_boot_services(void *handle, if (status != EFI_SUCCESS) goto free_map; - status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + status = efi_bs_call(exit_boot_services, handle, *map->key_ptr); if (status == EFI_INVALID_PARAMETER) { /* @@ -900,12 +896,12 @@ efi_status_t efi_exit_boot_services(void *handle, * to get_memory_map() is expected to succeed here. */ *map->map_size = *map->buff_size; - status = efi_call_early(get_memory_map, - map->map_size, - *map->map, - map->key_ptr, - map->desc_size, - map->desc_ver); + status = efi_bs_call(get_memory_map, + map->map_size, + *map->map, + map->key_ptr, + map->desc_size, + map->desc_ver); /* exit_boot_services() was called, thus cannot free */ if (status != EFI_SUCCESS) @@ -916,7 +912,7 @@ efi_status_t efi_exit_boot_services(void *handle, if (status != EFI_SUCCESS) goto fail; - status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + status = efi_bs_call(exit_boot_services, handle, *map->key_ptr); } /* exit_boot_services() was called, thus cannot free */ @@ -926,7 +922,7 @@ efi_status_t efi_exit_boot_services(void *handle, return EFI_SUCCESS; free_map: - efi_call_early(free_pool, *map->map); + efi_bs_call(free_pool, *map->map); fail: return status; } diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index b5d9c9e65213..4e2b33fd6a43 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -76,4 +76,12 @@ void *get_efi_config_table(efi_guid_t guid); fdt_setprop((fdt), (node_offset), (name), &(var), sizeof(var)) #endif +#define get_efi_var(name, vendor, ...) \ + efi_rt_call(get_variable, (efi_char16_t *)(name), \ + (efi_guid_t *)(vendor), __VA_ARGS__) + +#define set_efi_var(name, vendor, ...) \ + efi_rt_call(set_variable, (efi_char16_t *)(name), \ + (efi_guid_t *)(vendor), __VA_ARGS__) + #endif diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 8f746282c219..55e6b3f286fe 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -110,13 +110,11 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, void *dummy = NULL; efi_physical_addr_t current_fb_base; - status = efi_call_early(handle_protocol, h, - proto, (void **)&gop); + status = efi_bs_call(handle_protocol, h, proto, (void **)&gop); if (status != EFI_SUCCESS) continue; - status = efi_call_early(handle_protocol, h, - &conout_proto, &dummy); + status = efi_bs_call(handle_protocol, h, &conout_proto, &dummy); if (status == EFI_SUCCESS) conout_found = true; @@ -187,20 +185,19 @@ efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto, efi_status_t status; void **gop_handle = NULL; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&gop_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&gop_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - proto, NULL, &size, gop_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, proto, NULL, + &size, gop_handle); if (status != EFI_SUCCESS) goto free_handle; status = setup_gop(si, proto, size, gop_handle); free_handle: - efi_call_early(free_pool, gop_handle); + efi_bs_call(free_pool, gop_handle); return status; } diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index fbd5b5724b19..316ce9ff0193 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -32,8 +32,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out) efi_status_t status; efi_rng_protocol_t *rng = NULL; - status = efi_call_early(locate_protocol, &rng_proto, NULL, - (void **)&rng); + status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng); if (status != EFI_SUCCESS) return status; @@ -141,14 +140,14 @@ efi_status_t efi_random_alloc(unsigned long size, target = round_up(md->phys_addr, align) + target_slot * align; pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, pages, &target); + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, pages, &target); if (status == EFI_SUCCESS) *addr = target; break; } - efi_call_early(free_pool, memory_map); + efi_bs_call(free_pool, memory_map); return status; } @@ -162,14 +161,13 @@ efi_status_t efi_random_get_seed(void) struct linux_efi_random_seed *seed = NULL; efi_status_t status; - status = efi_call_early(locate_protocol, &rng_proto, NULL, - (void **)&rng); + status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng); if (status != EFI_SUCCESS) return status; - status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA, - sizeof(*seed) + EFI_RANDOM_SEED_SIZE, - (void **)&seed); + status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + sizeof(*seed) + EFI_RANDOM_SEED_SIZE, + (void **)&seed); if (status != EFI_SUCCESS) return status; @@ -188,14 +186,13 @@ efi_status_t efi_random_get_seed(void) goto err_freepool; seed->size = EFI_RANDOM_SEED_SIZE; - status = efi_call_early(install_configuration_table, &rng_table_guid, - seed); + status = efi_bs_call(install_configuration_table, &rng_table_guid, seed); if (status != EFI_SUCCESS) goto err_freepool; return EFI_SUCCESS; err_freepool: - efi_call_early(free_pool, seed); + efi_bs_call(free_pool, seed); return status; } diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 0935d824a6ab..a765378ad18c 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -21,11 +21,6 @@ static const efi_char16_t efi_SetupMode_name[] = L"SetupMode"; static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; static const efi_char16_t shim_MokSBState_name[] = L"MokSBState"; -#define get_efi_var(name, vendor, ...) \ - efi_call_runtime(get_variable, \ - (efi_char16_t *)(name), (efi_guid_t *)(vendor), \ - __VA_ARGS__); - /* * Determine whether we're in secure boot mode. * diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index 4a0017a181bf..1d59e103a2e3 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -20,16 +20,6 @@ static const efi_char16_t efi_MemoryOverWriteRequest_name[] = #define MEMORY_ONLY_RESET_CONTROL_GUID \ EFI_GUID(0xe20939be, 0x32d4, 0x41be, 0xa1, 0x50, 0x89, 0x7f, 0x85, 0xd4, 0x98, 0x29) -#define get_efi_var(name, vendor, ...) \ - efi_call_runtime(get_variable, \ - (efi_char16_t *)(name), (efi_guid_t *)(vendor), \ - __VA_ARGS__) - -#define set_efi_var(name, vendor, ...) \ - efi_call_runtime(set_variable, \ - (efi_char16_t *)(name), (efi_guid_t *)(vendor), \ - __VA_ARGS__) - /* * Enable reboot attack mitigation. This requests that the firmware clear the * RAM on next reboot before proceeding with boot, ensuring that any secrets @@ -72,8 +62,8 @@ void efi_retrieve_tpm2_eventlog(void) efi_tcg2_protocol_t *tcg2_protocol = NULL; int final_events_size = 0; - status = efi_call_early(locate_protocol, &tcg2_guid, NULL, - (void **)&tcg2_protocol); + status = efi_bs_call(locate_protocol, &tcg2_guid, NULL, + (void **)&tcg2_protocol); if (status != EFI_SUCCESS) return; @@ -125,9 +115,8 @@ void efi_retrieve_tpm2_eventlog(void) } /* Allocate space for the logs and copy them. */ - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - sizeof(*log_tbl) + log_size, - (void **) &log_tbl); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + sizeof(*log_tbl) + log_size, (void **)&log_tbl); if (status != EFI_SUCCESS) { efi_printk("Unable to allocate memory for event log\n"); @@ -166,12 +155,12 @@ void efi_retrieve_tpm2_eventlog(void) log_tbl->version = version; memcpy(log_tbl->log, (void *) first_entry_addr, log_size); - status = efi_call_early(install_configuration_table, - &linux_eventlog_guid, log_tbl); + status = efi_bs_call(install_configuration_table, + &linux_eventlog_guid, log_tbl); if (status != EFI_SUCCESS) goto err_free; return; err_free: - efi_call_early(free_pool, log_tbl); + efi_bs_call(free_pool, log_tbl); } -- cgit v1.2.3-59-g8ed1b From 7d4e323d02c9a37d68c067f5a951307f3af14694 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:24 +0100 Subject: efi/libstub: Tidy up types and names of global cmdline variables Drop leading underscores and use bool not int for true/false variables set on the command line. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-25-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/arm-stub.c | 2 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 36 +++++++++++++------------- drivers/firmware/efi/libstub/efistub.h | 12 ++++++--- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 62280df09dd4..7bbef4a67350 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -37,7 +37,7 @@ static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; -static efi_system_table_t *__section(.data) sys_table; +static efi_system_table_t *__efistub_global sys_table; __pure efi_system_table_t *efi_system_table(void) { diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index ef0ffa512c20..f1b9c36934e9 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -27,24 +27,24 @@ */ #define EFI_READ_CHUNK_SIZE (1024 * 1024) -static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; +static unsigned long efi_chunk_size = EFI_READ_CHUNK_SIZE; -static int __section(.data) __nokaslr; -static int __section(.data) __quiet; -static int __section(.data) __novamap; -static bool __section(.data) efi_nosoftreserve; +static bool __efistub_global efi_nokaslr; +static bool __efistub_global efi_quiet; +static bool __efistub_global efi_novamap; +static bool __efistub_global efi_nosoftreserve; -int __pure nokaslr(void) +bool __pure nokaslr(void) { - return __nokaslr; + return efi_nokaslr; } -int __pure is_quiet(void) +bool __pure is_quiet(void) { - return __quiet; + return efi_quiet; } -int __pure novamap(void) +bool __pure novamap(void) { - return __novamap; + return efi_novamap; } bool __pure __efi_soft_reserve_enabled(void) { @@ -455,11 +455,11 @@ efi_status_t efi_parse_options(char const *cmdline) str = strstr(cmdline, "nokaslr"); if (str == cmdline || (str && str > cmdline && *(str - 1) == ' ')) - __nokaslr = 1; + efi_nokaslr = true; str = strstr(cmdline, "quiet"); if (str == cmdline || (str && str > cmdline && *(str - 1) == ' ')) - __quiet = 1; + efi_quiet = true; /* * If no EFI parameters were specified on the cmdline we've got @@ -479,18 +479,18 @@ efi_status_t efi_parse_options(char const *cmdline) while (*str && *str != ' ') { if (!strncmp(str, "nochunk", 7)) { str += strlen("nochunk"); - __chunk_size = -1UL; + efi_chunk_size = -1UL; } if (!strncmp(str, "novamap", 7)) { str += strlen("novamap"); - __novamap = 1; + efi_novamap = true; } if (IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) && !strncmp(str, "nosoftreserve", 7)) { str += strlen("nosoftreserve"); - efi_nosoftreserve = 1; + efi_nosoftreserve = true; } /* Group words together, delimited by "," */ @@ -644,8 +644,8 @@ efi_status_t handle_cmdline_files(efi_loaded_image_t *image, while (size) { unsigned long chunksize; - if (IS_ENABLED(CONFIG_X86) && size > __chunk_size) - chunksize = __chunk_size; + if (IS_ENABLED(CONFIG_X86) && size > efi_chunk_size) + chunksize = efi_chunk_size; else chunksize = size; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 4e2b33fd6a43..c244b165005e 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -25,9 +25,15 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif -extern int __pure nokaslr(void); -extern int __pure is_quiet(void); -extern int __pure novamap(void); +#ifdef CONFIG_ARM +#define __efistub_global __section(.data) +#else +#define __efistub_global +#endif + +extern bool __pure nokaslr(void); +extern bool __pure is_quiet(void); +extern bool __pure novamap(void); extern __pure efi_system_table_t *efi_system_table(void); -- cgit v1.2.3-59-g8ed1b From 0679715e714345d273c0e1eb78078535ffc4b2a1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:25 +0100 Subject: efi/libstub/x86: Avoid globals to store context during mixed mode calls Instead of storing the return address in a global variable when calling a 32-bit EFI service from the 64-bit stub, avoid the indirection via efi_exit32, and take the return address from the stack. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-26-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/efi_thunk_64.S | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 593913692d16..6d95eb6b8912 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -10,7 +10,7 @@ * needs to be able to service interrupts. * * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identify + * addresses into 32-bits because we're executing with an identity * mapped pagetable and haven't transitioned to 64-bit virtual addresses * yet. */ @@ -28,7 +28,7 @@ SYM_FUNC_START(efi64_thunk) push %rbx subq $8, %rsp - leaq efi_exit32(%rip), %rax + leaq 1f(%rip), %rax movl %eax, 4(%rsp) leaq efi_gdt64(%rip), %rax movl %eax, (%rsp) @@ -55,9 +55,6 @@ SYM_FUNC_START(efi64_thunk) sgdt save_gdt(%rip) - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) - /* * Switch to gdt with 32-bit segments. This is the firmware GDT * that was installed when the kernel started executing. This @@ -72,6 +69,7 @@ SYM_FUNC_START(efi64_thunk) lretq 1: addq $32, %rsp + movq %rdi, %rax lgdt save_gdt(%rip) @@ -99,13 +97,6 @@ SYM_FUNC_START(efi64_thunk) ret SYM_FUNC_END(efi64_thunk) -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) - .code32 /* * EFI service pointer must be in %edi. @@ -186,8 +177,6 @@ SYM_DATA_START_LOCAL(save_gdt) .quad 0 SYM_DATA_END(save_gdt) -SYM_DATA_LOCAL(func_rt_ptr, .quad 0) - SYM_DATA_START(efi_gdt64) .word efi_gdt64_end - efi_gdt64 .long 0 /* Filled out by user */ -- cgit v1.2.3-59-g8ed1b From 12dc9e154dbe18db337c9f7e4b1d40128e675f0f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:34 +0100 Subject: efi/libstub: Fix boot argument handling in mixed mode entry code The mixed mode refactor actually broke mixed mode by failing to pass the bootparam structure to startup_32(). This went unnoticed because it apparently has a high tolerance for being passed random junk, and still boots fine in some cases. So let's fix this by populating %esi as required when entering via efi32_stub_entry, and while at it, preserve the arguments themselves instead of their address in memory (via the stack pointer) since that memory could be clobbered before we get to it. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arvind Sankar Cc: Matthew Garrett Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-2-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_64.S | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index a6f3ee9ca61d..44a6bb6964b5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -208,13 +208,12 @@ SYM_FUNC_START(startup_32) pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl efi32_boot_args(%ebp), %ebx - cmp $0, %ebx + movl efi32_boot_args(%ebp), %edi + cmp $0, %edi jz 1f leal handover_entry(%ebp), %eax - movl 0(%ebx), %edi - movl 4(%ebx), %esi - movl 8(%ebx), %edx + movl %esi, %edx + movl efi32_boot_args+4(%ebp), %esi movl $0x0, %ecx 1: #endif @@ -232,12 +231,16 @@ SYM_FUNC_END(startup_32) .org 0x190 SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi call 1f 1: pop %ebp subl $1b, %ebp - movl %esp, efi32_boot_args(%ebp) + movl %ecx, efi32_boot_args(%ebp) + movl %edx, efi32_boot_args+4(%ebp) sgdtl efi32_boot_gdt(%ebp) /* Disable paging */ @@ -628,7 +631,7 @@ SYM_DATA_START_LOCAL(gdt) SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) #ifdef CONFIG_EFI_MIXED -SYM_DATA_LOCAL(efi32_boot_args, .long 0) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0) #endif /* -- cgit v1.2.3-59-g8ed1b From bc7c9d62052b32b1f1fe5a141165a0f4d82103ac Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:35 +0100 Subject: efi/libstub/x86: Force 'hidden' visibility for extern declarations Commit c3710de5065d ("efi/libstub/x86: Drop __efi_early() export and efi_config struct") introduced a reference from C code in eboot.c to the startup_32 symbol defined in the .S startup code. This results in a GOT based reference to startup_32, and since GOT entries carry absolute addresses, they need to be fixed up before they can be used. On modern toolchains (binutils 2.26 or later), this reference is relaxed into a R_386_GOTOFF relocation (or the analogous X86_64 one) which never uses the absolute address in the entry, and so we get away with not fixing up the GOT table before calling the EFI entry point. However, GCC 4.6 combined with a binutils of the era (2.24) will produce a true GOT indirected reference, resulting in a wrong value to be returned for the address of startup_32() if the boot code is not running at the address it was linked at. Fortunately, we can easily override this behavior, and force GCC to emit the GOTOFF relocations explicitly, by setting the visibility pragma 'hidden'. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-3-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index da04948d75ed..565ee4733579 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -6,6 +6,8 @@ * * ----------------------------------------------------------------------- */ +#pragma GCC visibility push(hidden) + #include #include -- cgit v1.2.3-59-g8ed1b From 6cfcd6f001b42fdbe6948cd113a5024945a8f50f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:36 +0100 Subject: efi/x86: Re-disable RT services for 32-bit kernels running on 64-bit EFI Commit a8147dba75b1 ("efi/x86: Rename efi_is_native() to efi_is_mixed()") renamed and refactored efi_is_native() into efi_is_mixed(), but failed to take into account that these are not diametrical opposites. Mixed mode is a construct that permits 64-bit kernels to boot on 32-bit firmware, but there is another non-native combination which is supported, i.e., 32-bit kernels booting on 64-bit firmware, but only for boot and not for runtime services. Also, mixed mode can be disabled in Kconfig, in which case the 64-bit kernel can still be booted from 32-bit firmware, but without access to runtime services. Due to this oversight, efi_runtime_supported() now incorrectly returns true for such configurations, resulting in crashes at boot. So fix this by making efi_runtime_supported() aware of this. As a side effect, some efi_thunk_xxx() stubs have become obsolete, so remove them as well. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-4-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2d1378f19b74..b35b5d423e9d 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -163,10 +163,10 @@ static inline bool efi_is_mixed(void) static inline bool efi_runtime_supported(void) { - if (!efi_is_mixed()) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - if (!efi_enabled(EFI_OLD_MEMMAP)) + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) return true; return false; @@ -176,7 +176,6 @@ extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); extern efi_status_t efi_thunk_set_virtual_address_map( void *phys_set_virtual_address_map, @@ -184,19 +183,6 @@ extern efi_status_t efi_thunk_set_virtual_address_map( unsigned long descriptor_size, u32 descriptor_version, efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return EFI_SUCCESS; -} -#endif /* CONFIG_EFI_MIXED */ - /* arch specific definitions used by the stub code */ -- cgit v1.2.3-59-g8ed1b From ffc2760bcf2dba0dbef74013ed73eea8310cc52c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:37 +0100 Subject: efi/x86: Map the entire EFI vendor string before copying it Fix a couple of issues with the way we map and copy the vendor string: - we map only 2 bytes, which usually works since you get at least a page, but if the vendor string happens to cross a page boundary, a crash will result - only call early_memunmap() if early_memremap() succeeded, or we will call it with a NULL address which it doesn't like, - while at it, switch to early_memremap_ro(), and array indexing rather than pointer dereferencing to read the CHAR16 characters. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Fixes: 5b83683f32b1 ("x86: EFI runtime service support") Link: https://lkml.kernel.org/r/20200103113953.9571-5-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index d96953d9d4e7..3ce32c31bb61 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -541,7 +541,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -566,14 +565,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, -- cgit v1.2.3-59-g8ed1b From 89ed486532c4d155565cc4b7984a918ee3c58f80 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:38 +0100 Subject: efi/x86: Avoid redundant cast of EFI firmware service pointer All EFI firmware call prototypes have been annotated as __efiapi, permitting us to attach attributes regarding the calling convention by overriding __efiapi to an architecture specific value. On 32-bit x86, EFI firmware calls use the plain calling convention where all arguments are passed via the stack, and cleaned up by the caller. Let's add this to the __efiapi definition so we no longer need to cast the function pointers before invoking them. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 8 +------- include/linux/efi.h | 4 +++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b35b5d423e9d..09c3fc468793 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -51,13 +51,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); }) -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ -}) +#define arch_efi_call_virt(p, f, args...) p->f(args) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) diff --git a/include/linux/efi.h b/include/linux/efi.h index 726673e98990..952c1659dfd9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,8 +48,10 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) #define __efiapi __attribute__((ms_abi)) +#elif defined(CONFIG_X86_32) +#define __efiapi __attribute__((regparm(0))) #else #define __efiapi #endif -- cgit v1.2.3-59-g8ed1b From 98dd0e3a0ceebd594942777e4aca9791177e3f2b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:39 +0100 Subject: efi/x86: Split off some old memmap handling into separate routines In a subsequent patch, we will fold the prolog/epilog routines that are part of the support code to call SetVirtualAddressMap() with a 1:1 mapping into the callers. However, the 64-bit version mostly consists of ugly mapping code that is only used when efi=old_map is in effect, which is extremely rare. So let's move this code out of the way so it does not clutter the common code. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-7-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 03c2ed3c645c..a72bbabbc595 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -72,7 +72,9 @@ static void __init early_code_mapping_set_exec(int executable) } } -pgd_t * __init efi_call_phys_prolog(void) +void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd); + +pgd_t * __init efi_old_memmap_phys_prolog(void) { unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; pgd_t *save_pgd, *pgd_k, *pgd_efi; @@ -82,11 +84,6 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds, i, j; - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(&efi_mm); - return efi_mm.pgd; - } - early_code_mapping_set_exec(1); n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); @@ -143,11 +140,11 @@ pgd_t * __init efi_call_phys_prolog(void) __flush_tlb_all(); return save_pgd; out: - efi_call_phys_epilog(save_pgd); + efi_old_memmap_phys_epilog(save_pgd); return NULL; } -void __init efi_call_phys_epilog(pgd_t *save_pgd) +void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd) { /* * After the lock is released, the original page table is restored. @@ -158,11 +155,6 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) p4d_t *p4d; pud_t *pud; - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(efi_scratch.prev_mm); - return; - } - nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { @@ -193,6 +185,23 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +pgd_t * __init efi_call_phys_prolog(void) +{ + if (efi_enabled(EFI_OLD_MEMMAP)) + return efi_old_memmap_phys_prolog(); + + efi_switch_mm(&efi_mm); + return efi_mm.pgd; +} + +void __init efi_call_phys_epilog(pgd_t *save_pgd) +{ + if (efi_enabled(EFI_OLD_MEMMAP)) + efi_old_memmap_phys_epilog(save_pgd); + else + efi_switch_mm(efi_scratch.prev_mm); +} + EXPORT_SYMBOL_GPL(efi_mm); /* -- cgit v1.2.3-59-g8ed1b From 6982947045734480b8b57521e8068073fe36bd14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:40 +0100 Subject: efi/x86: Split SetVirtualAddresMap() wrappers into 32 and 64 bit versions Split the phys_efi_set_virtual_address_map() routine into 32 and 64 bit versions, so we can simplify them individually in subsequent patches. There is very little overlap between the logic anyway, and this has already been factored out in prolog/epilog routines which are completely different between 32 bit and 64 bit. So let's take it one step further, and get rid of the overlap completely. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-8-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 8 +++--- arch/x86/platform/efi/efi.c | 30 ++-------------------- arch/x86/platform/efi/efi_32.c | 21 ++++++++++++---- arch/x86/platform/efi/efi_64.c | 56 +++++++++++++++++++++++++++--------------- 4 files changed, 58 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 09c3fc468793..e29e5dc0b750 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -61,8 +61,6 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); extern asmlinkage u64 efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) - /* * struct efi_scratch - Scratch space used while switching to/from efi_mm * @phys_stack: stack used during EFI Mixed Mode @@ -115,8 +113,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); @@ -177,6 +173,10 @@ extern efi_status_t efi_thunk_set_virtual_address_map( unsigned long descriptor_size, u32 descriptor_version, efi_memory_desc_t *virtual_map); +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); /* arch specific definitions used by the stub code */ diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3ce32c31bb61..50f8123e658a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,7 +54,7 @@ #include #include -static struct efi efi_phys __initdata; +struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; static efi_config_table_type_t arch_tables[] __initdata = { @@ -97,32 +97,6 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - unsigned long flags; - pgd_t *save_pgd; - - save_pgd = efi_call_phys_prolog(); - if (!save_pgd) - return EFI_ABORTED; - - /* Disable interrupts around EFI calls: */ - local_irq_save(flags); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - local_irq_restore(flags); - - efi_call_phys_epilog(save_pgd); - - return status; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; @@ -1042,7 +1016,7 @@ static void __init __efi_enter_virtual_mode(void) efi_sync_low_kernel_mappings(); if (!efi_is_mixed()) { - status = phys_efi_set_virtual_address_map( + status = efi_set_virtual_address_map( efi.memmap.desc_size * count, efi.memmap.desc_size, efi.memmap.desc_version, diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9959657127f4..185950ade0e9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -66,9 +66,16 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -pgd_t * __init efi_call_phys_prolog(void) +extern struct efi efi_phys; + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; pgd_t *save_pgd; /* Current pgd is swapper_pg_dir, we'll restore it later: */ @@ -80,14 +87,18 @@ pgd_t * __init efi_call_phys_prolog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - return save_pgd; -} + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_phys(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ load_fixmap_gdt(0); load_cr3(save_pgd); __flush_tlb_all(); + + return status; } void __init efi_runtime_update_mappings(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a72bbabbc595..a7f11d1ff7c4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -72,9 +72,9 @@ static void __init early_code_mapping_set_exec(int executable) } } -void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd); +static void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd); -pgd_t * __init efi_old_memmap_phys_prolog(void) +static pgd_t * __init efi_old_memmap_phys_prolog(void) { unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; pgd_t *save_pgd, *pgd_k, *pgd_efi; @@ -144,7 +144,7 @@ out: return NULL; } -void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd) +static void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd) { /* * After the lock is released, the original page table is restored. @@ -185,23 +185,6 @@ void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } -pgd_t * __init efi_call_phys_prolog(void) -{ - if (efi_enabled(EFI_OLD_MEMMAP)) - return efi_old_memmap_phys_prolog(); - - efi_switch_mm(&efi_mm); - return efi_mm.pgd; -} - -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ - if (efi_enabled(EFI_OLD_MEMMAP)) - efi_old_memmap_phys_epilog(save_pgd); - else - efi_switch_mm(efi_scratch.prev_mm); -} - EXPORT_SYMBOL_GPL(efi_mm); /* @@ -1018,3 +1001,36 @@ void efi_thunk_runtime_setup(void) efi.query_capsule_caps = efi_thunk_query_capsule_caps; } #endif /* CONFIG_EFI_MIXED */ + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd = NULL; + + if (efi_enabled(EFI_OLD_MEMMAP)) { + save_pgd = efi_old_memmap_phys_prolog(); + if (!save_pgd) + return EFI_ABORTED; + } else { + efi_switch_mm(&efi_mm); + } + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call(efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + + if (save_pgd) + efi_old_memmap_phys_epilog(save_pgd); + else + efi_switch_mm(efi_scratch.prev_mm); + + return status; +} -- cgit v1.2.3-59-g8ed1b From a46d674068b69b3897fc0d659e25f74b7ab52647 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:41 +0100 Subject: efi/x86: Simplify i386 efi_call_phys() firmware call wrapper The variadic efi_call_phys() wrapper that exists on i386 was originally created to call into any EFI firmware runtime service, but in practice, we only use it once, to call SetVirtualAddressMap() during early boot. The flexibility provided by the variadic nature also makes it type unsafe, and makes the assembler code more complicated than needed, since it has to deal with an unknown number of arguments living on the stack. So clean this up, by renaming the helper to efi_call_svam(), and dropping the unneeded complexity. Let's also drop the reference to the efi_phys struct and grab the address from the EFI system table directly. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-9-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 3 - arch/x86/platform/efi/efi_32.c | 5 +- arch/x86/platform/efi/efi_stub_32.S | 109 ++++++------------------------------ 3 files changed, 20 insertions(+), 97 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e29e5dc0b750..cb08035b89a0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -35,9 +35,6 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF #ifdef CONFIG_X86_32 - -extern asmlinkage unsigned long efi_call_phys(void *, ...); - #define arch_efi_call_virt_setup() \ ({ \ kernel_fpu_begin(); \ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 185950ade0e9..71dddd1620f9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -66,7 +66,8 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -extern struct efi efi_phys; +efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *, + u32, u32, u32, void *); efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, @@ -89,7 +90,7 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, /* Disable interrupts around EFI calls: */ local_irq_save(flags); - status = efi_call_phys(efi_phys.set_virtual_address_map, + status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map, memory_map_size, descriptor_size, descriptor_version, virtual_map); local_irq_restore(flags); diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index eed8b5b441f8..75c46e7a809f 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -7,118 +7,43 @@ */ #include +#include #include -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ + __INIT +SYM_FUNC_START(efi_call_svam) + push 8(%esp) + push 8(%esp) + push %ecx + push %edx -.text -SYM_FUNC_START(efi_call_phys) /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prolog and epilog. - */ - - /* - * 1. Now I am running with EIP = + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prolog and - * epilog. + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. */ movl $1f, %edx subl $__PAGE_OFFSET, %edx jmp *%edx 1: - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ + /* disable paging */ movl %cr0, %edx andl $0x7fffffff, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 4. Adjust stack pointer. - */ + /* convert the stack pointer to a flat mapped address */ subl $__PAGE_OFFSET, %esp - /* - * 5. Call the physical function. - */ - jmp *%ecx + /* call the EFI routine */ + call *(%eax) -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp + /* convert ESP back to a kernel VA, and pop the outgoing args */ + addl $__PAGE_OFFSET + 16, %esp - /* - * 7. Restore PG bit - */ + /* re-enable paging */ movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 +SYM_FUNC_END(efi_call_svam) -- cgit v1.2.3-59-g8ed1b From e5f930fe8dafd2055220c95958926af16ee20713 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:42 +0100 Subject: efi/x86: Simplify 64-bit EFI firmware call wrapper The efi_call() wrapper used to invoke EFI runtime services serves a number of purposes: - realign the stack to 16 bytes - preserve FP and CR0 register state - translate from SysV to MS calling convention. Preserving CR0.TS is no longer necessary in Linux, and preserving the FP register state is also redundant in most cases, since efi_call() is almost always used from within the scope of a pair of kernel_fpu_begin()/ kernel_fpu_end() calls, with the exception of the early call to SetVirtualAddressMap() and the SGI UV support code. So let's add a pair of kernel_fpu_begin()/_end() calls there as well, and remove the unnecessary code from the assembly implementation of efi_call(), and only keep the pieces that deal with the stack alignment and the ABI translation. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-10-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/Makefile | 1 - arch/x86/platform/efi/efi_64.c | 3 +++ arch/x86/platform/efi/efi_stub_64.S | 39 ++++--------------------------------- arch/x86/platform/uv/bios_uv.c | 7 +++++-- 4 files changed, 12 insertions(+), 38 deletions(-) diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index fe29f3f5d384..7ec3a8b31f8b 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a7f11d1ff7c4..03565dad0c4b 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -1019,6 +1019,8 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, efi_switch_mm(&efi_mm); } + kernel_fpu_begin(); + /* Disable interrupts around EFI calls: */ local_irq_save(flags); status = efi_call(efi.systab->runtime->set_virtual_address_map, @@ -1026,6 +1028,7 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, descriptor_version, virtual_map); local_irq_restore(flags); + kernel_fpu_end(); if (save_pgd) efi_old_memmap_phys_epilog(save_pgd); diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index b1d2313fe3bf..e7e1020f4ccb 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -8,41 +8,12 @@ */ #include -#include -#include -#include -#include - -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp +#include SYM_FUNC_START(efi_call) pushq %rbp movq %rsp, %rbp - SAVE_XMM + and $~0xf, %rsp mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) @@ -50,9 +21,7 @@ SYM_FUNC_START(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - popq %rbp + CALL_NOSPEC %rdi + leave ret SYM_FUNC_END(efi_call) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index ece9cb9c1189..5c0e2eb5d87c 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -34,10 +34,13 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI * callback method, which uses efi_call() directly, with the kernel page tables: */ - if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) + if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) { + kernel_fpu_begin(); ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); - else + kernel_fpu_end(); + } else { ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + } return ret; } -- cgit v1.2.3-59-g8ed1b From ea5e1919b44f09fce72d919fbb87f9611fc700a6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:43 +0100 Subject: efi/x86: Simplify mixed mode call wrapper Calling 32-bit EFI runtime services from a 64-bit OS involves switching back to the flat mapping with a stack carved out of memory that is 32-bit addressable. There is no need to actually execute the 64-bit part of this routine from the flat mapping as well, as long as the entry and return address fit in 32 bits. There is also no need to preserve part of the calling context in global variables: we can simply push the old stack pointer value to the new stack, and keep the return address from the code32 section in EBX. While at it, move the conditional check whether to invoke the mixed mode version of SetVirtualAddressMap() into the 64-bit implementation of the wrapper routine. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-11-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 6 -- arch/x86/platform/efi/efi.c | 19 ++---- arch/x86/platform/efi/efi_64.c | 73 +++++++++++++-------- arch/x86/platform/efi/efi_thunk_64.S | 121 ++++++----------------------------- 4 files changed, 71 insertions(+), 148 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cb08035b89a0..e7e9c6e057f9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -164,12 +164,6 @@ extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 50f8123e658a..e4d3afac7be3 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1015,21 +1015,10 @@ static void __init __efi_enter_virtual_mode(void) efi_sync_low_kernel_mappings(); - if (!efi_is_mixed()) { - status = efi_set_virtual_address_map( - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } else { - status = efi_thunk_set_virtual_address_map( - efi_phys.set_virtual_address_map, - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } - + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); if (status != EFI_SUCCESS) { pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", status); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 03565dad0c4b..910e9ec03b09 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -626,61 +626,74 @@ void efi_switch_mm(struct mm_struct *mm) switch_mm(efi_scratch.prev_mm, mm, NULL); } -#ifdef CONFIG_EFI_MIXED static DEFINE_SPINLOCK(efi_runtime_lock); -#define runtime_service32(func) \ -({ \ - u32 table = (u32)(unsigned long)efi.systab; \ - u32 *rt, *___f; \ - \ - rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ - ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ - *___f; \ +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + efi_runtime_services_32_t *__rt; \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(__rt->func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ }) /* * Switch to the EFI page tables early so that we can access the 1:1 * runtime services mappings which are not mapped in any other page - * tables. This function must be called before runtime_service32(). + * tables. * * Also, disable interrupts because the IDT points to 64-bit handlers, * which aren't going to function correctly when we switch to 32-bit. */ -#define efi_thunk(f, ...) \ +#define efi_thunk(func...) \ ({ \ efi_status_t __s; \ - u32 __func; \ \ arch_efi_call_virt_setup(); \ \ - __func = runtime_service32(f); \ - __s = efi64_thunk(__func, __VA_ARGS__); \ + __s = __efi_thunk(func); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) +static efi_status_t __init +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { efi_status_t status; unsigned long flags; - u32 func; efi_sync_low_kernel_mappings(); local_irq_save(flags); efi_switch_mm(&efi_mm); - func = (u32)(unsigned long)phys_set_virtual_address_map; - status = efi64_thunk(func, memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); efi_switch_mm(efi_scratch.prev_mm); local_irq_restore(flags); @@ -983,8 +996,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, return EFI_UNSUPPORTED; } -void efi_thunk_runtime_setup(void) +void __init efi_thunk_runtime_setup(void) { + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + efi.get_time = efi_thunk_get_time; efi.set_time = efi_thunk_set_time; efi.get_wakeup_time = efi_thunk_get_wakeup_time; @@ -1000,7 +1016,6 @@ void efi_thunk_runtime_setup(void) efi.update_capsule = efi_thunk_update_capsule; efi.query_capsule_caps = efi_thunk_query_capsule_caps; } -#endif /* CONFIG_EFI_MIXED */ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, @@ -1011,6 +1026,12 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long flags; pgd_t *save_pgd = NULL; + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + if (efi_enabled(EFI_OLD_MEMMAP)) { save_pgd = efi_old_memmap_phys_prolog(); if (!save_pgd) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 3189f1394701..162b35729633 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,15 +25,16 @@ .text .code64 -SYM_FUNC_START(efi64_thunk) +SYM_CODE_START(efi64_thunk) push %rbp push %rbx /* * Switch to 1:1 mapped 32-bit stack pointer. */ - movq %rsp, efi_saved_sp(%rip) + movq %rsp, %rax movq efi_scratch(%rip), %rsp + push %rax /* * Calculate the physical address of the kernel text. @@ -41,113 +42,31 @@ SYM_FUNC_START(efi64_thunk) movq $__START_KERNEL_map, %rax subq phys_base(%rip), %rax - /* - * Push some physical addresses onto the stack. This is easier - * to do now in a code64 section while the assembler can address - * 64-bit values. Note that all the addresses on the stack are - * 32-bit. - */ - subq $16, %rsp - leaq efi_exit32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 8(%rsp) - - leaq __efi64_thunk(%rip), %rbx + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp subq %rax, %rbx - call *%rbx - - movq efi_saved_sp(%rip), %rsp - pop %rbx - pop %rbp - retq -SYM_FUNC_END(efi64_thunk) -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -SYM_FUNC_START_LOCAL(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + subq $28, %rsp + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) /* Switch to 32-bit descriptor */ pushq $__KERNEL32_CS - leaq efi_enter32(%rip), %rax - pushq %rax + pushq %rdi /* EFI runtime service address */ lretq -1: addq $32, %rsp - +1: movq 24(%rsp), %rsp pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -SYM_FUNC_END(__efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) + pop %rbp + retq .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - +2: pushl $__KERNEL_CS + pushl %ebp lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -func_rt_ptr: .quad 0 -efi_saved_sp: .quad 0 +SYM_CODE_END(efi64_thunk) -- cgit v1.2.3-59-g8ed1b From 33b85447fa61946b94fea93dd4bc24772af14d54 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:44 +0100 Subject: efi/x86: Drop two near identical versions of efi_runtime_init() The routines efi_runtime_init32() and efi_runtime_init64() are almost indistinguishable, and the only relevant difference is the offset in the runtime struct from where to obtain the physical address of the SetVirtualAddressMap() routine. However, this address is only used once, when installing the virtual address map that the OS will use to invoke EFI runtime services, and at the time of the call, we will necessarily be running with a 1:1 mapping, and so there is no need to do the map/unmap dance here to retrieve the address. In fact, in the preceding changes to these users, we stopped using the address recorded here entirely. So let's just get rid of all this code since it no longer serves a purpose. While at it, tweak the logic so that we handle unsupported and disable EFI runtime services in the same way, and unmap the EFI memory map in both cases. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-12-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 95 +++------------------------------------------ include/linux/efi.h | 19 --------- 2 files changed, 5 insertions(+), 109 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e4d3afac7be3..67cb0fd18777 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -429,87 +429,6 @@ static int __init efi_systab_init(void *phys) return 0; } -static int __init efi_runtime_init32(void) -{ - efi_runtime_services_32_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_32_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); - - return 0; -} - -static int __init efi_runtime_init64(void) -{ - efi_runtime_services_64_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_64_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); - - return 0; -} - -static int __init efi_runtime_init(void) -{ - int rv; - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - * - * When EFI_PARAVIRT is in force then we could not map runtime - * service memory region because we do not have direct access to it. - * However, runtime services are available through proxy functions - * (e.g. in case of Xen dom0 EFI implementation they call special - * hypercall which executes relevant EFI functions) and that is why - * they are always enabled. - */ - - if (!efi_enabled(EFI_PARAVIRT)) { - if (efi_enabled(EFI_64BIT)) - rv = efi_runtime_init64(); - else - rv = efi_runtime_init32(); - - if (rv) - return rv; - } - - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; @@ -567,13 +486,13 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else { - if (efi_runtime_disabled() || efi_runtime_init()) { - efi_memmap_unmap(); - return; - } + + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); + return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); if (efi_enabled(EFI_DBG)) @@ -934,8 +853,6 @@ static void __init kexec_enter_virtual_mode(void) efi_native_runtime_setup(); - efi.set_virtual_address_map = NULL; - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); #endif @@ -1040,8 +957,6 @@ static void __init __efi_enter_virtual_mode(void) else efi_thunk_runtime_setup(); - efi.set_virtual_address_map = NULL; - /* * Apply more restrictive page table mapping attributes now that * SVAM() has been called and the firmware has performed all diff --git a/include/linux/efi.h b/include/linux/efi.h index 952c1659dfd9..ee68ea6f85ff 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -539,24 +539,6 @@ typedef struct { u32 query_variable_info; } efi_runtime_services_32_t; -typedef struct { - efi_table_hdr_t hdr; - u64 get_time; - u64 set_time; - u64 get_wakeup_time; - u64 set_wakeup_time; - u64 set_virtual_address_map; - u64 convert_pointer; - u64 get_variable; - u64 get_next_variable; - u64 set_variable; - u64 get_next_high_mono_count; - u64 reset_system; - u64 update_capsule; - u64 query_capsule_caps; - u64 query_variable_info; -} efi_runtime_services_64_t; - typedef efi_status_t efi_get_time_t (efi_time_t *tm, efi_time_cap_t *tc); typedef efi_status_t efi_set_time_t (efi_time_t *tm); typedef efi_status_t efi_get_wakeup_time_t (efi_bool_t *enabled, efi_bool_t *pending, @@ -946,7 +928,6 @@ extern struct efi { efi_query_capsule_caps_t *query_capsule_caps; efi_get_next_high_mono_count_t *get_next_high_mono_count; efi_reset_system_t *reset_system; - efi_set_virtual_address_map_t *set_virtual_address_map; struct efi_memory_map memmap; unsigned long flags; } efi; -- cgit v1.2.3-59-g8ed1b From 5b279a262f5413889529802b82e153e87ee6bef8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:45 +0100 Subject: efi/x86: Clean up efi_systab_init() routine for legibility Clean up the efi_systab_init() routine which maps the EFI system table and copies the relevant pieces of data out of it. The current routine is very difficult to read, so let's clean that up. Also, switch to a R/O mapping of the system table since that is all we need. Finally, use a plain u64 variable to record the physical address of the system table instead of pointlessly stashing it in a struct efi that is never used for anything else. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-13-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 166 ++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 84 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 67cb0fd18777..53ed0b123641 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,8 +54,8 @@ #include #include -struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_systab_phys __initdata; static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV @@ -327,89 +327,90 @@ void __init efi_print_memmap(void) } } -static int __init efi_systab_init(void *phys) +static int __init efi_systab_init(u64 phys) { + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + bool over4g = false; + void *p; + + p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + if (efi_enabled(EFI_64BIT)) { - efi_system_table_64_t *systab64; - struct efi_setup_data *data = NULL; - u64 tmp = 0; + const efi_system_table_64_t *systab64 = p; + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + efi_systab.runtime = (void *)(unsigned long)systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = systab64->tables; + + over4g = systab64->con_in_handle > U32_MAX || + systab64->con_in > U32_MAX || + systab64->con_out_handle > U32_MAX || + systab64->con_out > U32_MAX || + systab64->stderr_handle > U32_MAX || + systab64->stderr > U32_MAX || + systab64->boottime > U32_MAX; if (efi_setup) { - data = early_memremap(efi_setup, sizeof(*data)); - if (!data) + struct efi_setup_data *data; + + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); return -ENOMEM; - } - systab64 = early_memremap((unsigned long)phys, - sizeof(*systab64)); - if (systab64 == NULL) { - pr_err("Couldn't map the system table!\n"); - if (data) - early_memunmap(data, sizeof(*data)); - return -ENOMEM; - } + } + + efi_systab.fw_vendor = (unsigned long)data->fw_vendor; + efi_systab.runtime = (void *)(unsigned long)data->runtime; + efi_systab.tables = (unsigned long)data->tables; + + over4g |= data->fw_vendor > U32_MAX || + data->runtime > U32_MAX || + data->tables > U32_MAX; - efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : - systab64->fw_vendor; - tmp |= data ? data->fw_vendor : systab64->fw_vendor; - efi_systab.fw_revision = systab64->fw_revision; - efi_systab.con_in_handle = systab64->con_in_handle; - tmp |= systab64->con_in_handle; - efi_systab.con_in = systab64->con_in; - tmp |= systab64->con_in; - efi_systab.con_out_handle = systab64->con_out_handle; - tmp |= systab64->con_out_handle; - efi_systab.con_out = (void *)(unsigned long)systab64->con_out; - tmp |= systab64->con_out; - efi_systab.stderr_handle = systab64->stderr_handle; - tmp |= systab64->stderr_handle; - efi_systab.stderr = systab64->stderr; - tmp |= systab64->stderr; - efi_systab.runtime = data ? - (void *)(unsigned long)data->runtime : - (void *)(unsigned long)systab64->runtime; - tmp |= data ? data->runtime : systab64->runtime; - efi_systab.boottime = (void *)(unsigned long)systab64->boottime; - tmp |= systab64->boottime; - efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = data ? (unsigned long)data->tables : - systab64->tables; - tmp |= data ? data->tables : systab64->tables; - - early_memunmap(systab64, sizeof(*systab64)); - if (data) early_memunmap(data, sizeof(*data)); -#ifdef CONFIG_X86_32 - if (tmp >> 32) { - pr_err("EFI data located above 4GB, disabling EFI.\n"); - return -EINVAL; + } else { + over4g |= systab64->fw_vendor > U32_MAX || + systab64->runtime > U32_MAX || + systab64->tables > U32_MAX; } -#endif } else { - efi_system_table_32_t *systab32; - - systab32 = early_memremap((unsigned long)phys, - sizeof(*systab32)); - if (systab32 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; - } + const efi_system_table_32_t *systab32 = p; + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + } - efi_systab.hdr = systab32->hdr; - efi_systab.fw_vendor = systab32->fw_vendor; - efi_systab.fw_revision = systab32->fw_revision; - efi_systab.con_in_handle = systab32->con_in_handle; - efi_systab.con_in = systab32->con_in; - efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = (void *)(unsigned long)systab32->con_out; - efi_systab.stderr_handle = systab32->stderr_handle; - efi_systab.stderr = systab32->stderr; - efi_systab.runtime = (void *)(unsigned long)systab32->runtime; - efi_systab.boottime = (void *)(unsigned long)systab32->boottime; - efi_systab.nr_tables = systab32->nr_tables; - efi_systab.tables = systab32->tables; + early_memunmap(p, size); - early_memunmap(systab32, sizeof(*systab32)); + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; } efi.systab = &efi_systab; @@ -435,20 +436,17 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; -#ifdef CONFIG_X86_32 - if (boot_params.efi_info.efi_systab_hi || - boot_params.efi_info.efi_memmap_hi) { + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { pr_info("Table located above 4GB, disabling EFI.\n"); return; } - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - if (efi_systab_init(efi_phys.systab)) + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); + + if (efi_systab_init(efi_systab_phys)) return; efi.config_table = (unsigned long)efi.systab->tables; @@ -601,7 +599,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - systab = (u64)(unsigned long)efi_phys.systab; + systab = efi_systab_phys; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; efi.systab = (efi_system_table_t *)(unsigned long)systab; -- cgit v1.2.3-59-g8ed1b From e2d68a955e49d61fd0384f23e92058dc9b79be5e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:46 +0100 Subject: efi/x86: Don't panic or BUG() on non-critical error conditions The logic in __efi_enter_virtual_mode() does a number of steps in sequence, all of which may fail in one way or the other. In most cases, we simply print an error and disable EFI runtime services support, but in some cases, we BUG() or panic() and bring down the system when encountering conditions that we could easily handle in the same way. While at it, replace a pointless page-to-virt-phys conversion with one that goes straight from struct page to physical. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-14-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 28 ++++++++++++++-------------- arch/x86/platform/efi/efi_64.c | 9 +++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 53ed0b123641..4f539bfdc051 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -889,16 +889,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -912,8 +910,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -921,12 +918,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -935,9 +931,9 @@ static void __init __efi_enter_virtual_mode(void) efi.memmap.desc_version, (efi_memory_desc_t *)pa); if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -964,6 +960,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 910e9ec03b09..c13fa2150976 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -384,11 +384,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); -- cgit v1.2.3-59-g8ed1b From 4684abe375927e745974bfddd85e171a3eb887a5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:47 +0100 Subject: efi/x86: Remove unreachable code in kexec_enter_virtual_mode() Remove some code that is guaranteed to be unreachable, given that we have already bailed by this time if EFI_OLD_MEMMAP is set. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-15-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4f539bfdc051..b931c4bea284 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -850,9 +850,6 @@ static void __init kexec_enter_virtual_mode(void) efi.runtime_version = efi_systab.hdr.revision; efi_native_runtime_setup(); - - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) - runtime_code_page_mkexec(); #endif } -- cgit v1.2.3-59-g8ed1b From 14b864f4b5c402fe1ca394042beeea6fdf54f8f5 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 3 Jan 2020 12:39:48 +0100 Subject: efi/x86: Check number of arguments to variadic functions On x86 we need to thunk through assembler stubs to call the EFI services for mixed mode, and for runtime services in 64-bit mode. The assembler stubs have limits on how many arguments it handles. Introduce a few macros to check that we do not try to pass too many arguments to the stubs. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-16-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/efi_thunk_64.S | 4 +-- arch/x86/include/asm/efi.h | 54 +++++++++++++++++++++++++++++++-- arch/x86/platform/efi/efi_stub_64.S | 4 +-- arch/x86/platform/efi/efi_thunk_64.S | 4 +-- 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 6d95eb6b8912..d040ff5458e5 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -23,7 +23,7 @@ .code64 .text -SYM_FUNC_START(efi64_thunk) +SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx @@ -95,7 +95,7 @@ SYM_FUNC_START(efi64_thunk) pop %rbx pop %rbp ret -SYM_FUNC_END(efi64_thunk) +SYM_FUNC_END(__efi64_thunk) .code32 /* diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e7e9c6e057f9..cfc450085584 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -34,6 +35,45 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ + +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; })) +#define __efi_arg_sentinel(n) , n + +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) + #ifdef CONFIG_X86_32 #define arch_efi_call_virt_setup() \ ({ \ @@ -56,7 +96,12 @@ #define EFI_LOADER_SIGNATURE "EL64" -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); + +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) /* * struct efi_scratch - Scratch space used while switching to/from efi_mm @@ -139,7 +184,12 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI -extern efi_status_t efi64_thunk(u32, ...); +extern efi_status_t __efi64_thunk(u32, ...); + +#define efi64_thunk(...) ({ \ + __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__); \ +}) static inline bool efi_is_mixed(void) { diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index e7e1020f4ccb..15da118f04f0 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -10,7 +10,7 @@ #include #include -SYM_FUNC_START(efi_call) +SYM_FUNC_START(__efi_call) pushq %rbp movq %rsp, %rbp and $~0xf, %rsp @@ -24,4 +24,4 @@ SYM_FUNC_START(efi_call) CALL_NOSPEC %rdi leave ret -SYM_FUNC_END(efi_call) +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 162b35729633..26f0da238c1c 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,7 +25,7 @@ .text .code64 -SYM_CODE_START(efi64_thunk) +SYM_CODE_START(__efi64_thunk) push %rbp push %rbx @@ -69,4 +69,4 @@ SYM_CODE_START(efi64_thunk) 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_CODE_END(efi64_thunk) +SYM_CODE_END(__efi64_thunk) -- cgit v1.2.3-59-g8ed1b From ea7d87f98fa9675076fb5ad208d889b217e83889 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 3 Jan 2020 12:39:49 +0100 Subject: efi/x86: Allow translating 64-bit arguments for mixed mode calls Introduce the ability to define macros to perform argument translation for the calls that need it, and define them for the boot services that we currently use. When calling 32-bit firmware methods in mixed mode, all output parameters that are 32-bit according to the firmware, but 64-bit in the kernel (ie OUT UINTN * or OUT VOID **) must be initialized in the kernel, or the upper 32 bits may contain garbage. Define macros that zero out the upper 32 bits of the output before invoking the firmware method. When a 32-bit EFI call takes 64-bit arguments, the mixed-mode call must push the two 32-bit halves as separate arguments onto the stack. This can be achieved by splitting the argument into its two halves when calling the assembler thunk. Define a macro to do this for the free_pages boot service. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-17-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 16 ------ arch/x86/include/asm/efi.h | 71 ++++++++++++++++++++++++-- drivers/firmware/efi/libstub/efi-stub-helper.c | 5 +- 3 files changed, 67 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 565ee4733579..4afd29eb5b34 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -891,19 +891,3 @@ fail: for (;;) asm("hlt"); } - -#ifdef CONFIG_EFI_MIXED -void efi_free_native(unsigned long size, unsigned long addr); - -void efi_free(unsigned long size, unsigned long addr) -{ - if (!size) - return; - - if (efi_is_native()) - efi_free_native(size, addr); - else - efi64_thunk(efi_system_table()->boottime->mixed_mode.free_pages, - addr, 0, DIV_ROUND_UP(size, EFI_PAGE_SIZE)); -} -#endif diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cfc450085584..166f0386719e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -243,22 +243,83 @@ static inline bool efi_is_native(void) : (__typeof__(inst->attr)) \ efi_mixed_mode_cast(inst->mixed_mode.attr)) +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ + +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; +} + +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) + +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_, following the examples above. + */ + +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + #define efi_call_proto(inst, func, ...) \ (efi_is_native() \ ? inst->func(inst, ##__VA_ARGS__) \ - : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) #define efi_bs_call(func, ...) \ (efi_is_native() \ ? efi_system_table()->boottime->func(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_system_table(), \ - boottime)->mixed_mode.func, __VA_ARGS__)) + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + boottime), func, __VA_ARGS__)) #define efi_rt_call(func, ...) \ (efi_is_native() \ ? efi_system_table()->runtime->func(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_system_table(), \ - runtime)->mixed_mode.func, __VA_ARGS__)) + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + runtime), func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f1b9c36934e9..fcc45ee94e02 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -344,9 +344,6 @@ fail: } void efi_free(unsigned long size, unsigned long addr) - __weak __alias(efi_free_native); - -void efi_free_native(unsigned long size, unsigned long addr) { unsigned long nr_pages; @@ -354,7 +351,7 @@ void efi_free_native(unsigned long size, unsigned long addr) return; nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; - efi_system_table()->boottime->free_pages(addr, nr_pages); + efi_bs_call(free_pages, addr, nr_pages); } static efi_status_t efi_file_size(void *__fh, efi_char16_t *filename_16, -- cgit v1.2.3-59-g8ed1b From 4444f8541dad16fefd9b8807ad1451e806ef1d94 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 3 Jan 2020 12:39:50 +0100 Subject: efi: Allow disabling PCI busmastering on bridges during boot Add an option to disable the busmaster bit in the control register on all PCI bridges before calling ExitBootServices() and passing control to the runtime kernel. System firmware may configure the IOMMU to prevent malicious PCI devices from being able to attack the OS via DMA. However, since firmware can't guarantee that the OS is IOMMU-aware, it will tear down IOMMU configuration when ExitBootServices() is called. This leaves a window between where a hostile device could still cause damage before Linux configures the IOMMU again. If CONFIG_EFI_DISABLE_PCI_DMA is enabled or "efi=disable_early_pci_dma" is passed on the command line, the EFI stub will clear the busmaster bit on all PCI bridges before ExitBootServices() is called. This will prevent any malicious PCI devices from being able to perform DMA until the kernel reenables busmastering after configuring the IOMMU. This option may cause failures with some poorly behaved hardware and should not be enabled without testing. The kernel commandline options "efi=disable_early_pci_dma" or "efi=no_disable_early_pci_dma" may be used to override the default. Note that PCI devices downstream from PCI bridges are disconnected from their drivers first, using the UEFI driver model API, so that DMA can be disabled safely at the bridge level. [ardb: disconnect PCI I/O handles first, as suggested by Arvind] Co-developed-by: Matthew Garrett Signed-off-by: Matthew Garrett Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-18-ardb@kernel.org Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 7 +- arch/x86/include/asm/efi.h | 5 ++ drivers/firmware/efi/Kconfig | 22 +++++ drivers/firmware/efi/libstub/Makefile | 2 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 15 ++++ drivers/firmware/efi/libstub/pci.c | 114 ++++++++++++++++++++++++ include/linux/efi.h | 6 +- 7 files changed, 168 insertions(+), 3 deletions(-) create mode 100644 drivers/firmware/efi/libstub/pci.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..994632ae48de 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1165,7 +1165,8 @@ efi= [EFI] Format: { "old_map", "nochunk", "noruntime", "debug", - "nosoftreserve" } + "nosoftreserve", "disable_early_pci_dma", + "no_disable_early_pci_dma" } old_map [X86-64]: switch to the old ioremap-based EFI runtime services mapping. 32-bit still uses this one by default. @@ -1180,6 +1181,10 @@ claim. Specify efi=nosoftreserve to disable this reservation and treat the memory by its base type (i.e. EFI_CONVENTIONAL_MEMORY / "System RAM"). + disable_early_pci_dma: Disable the busmaster bit on all + PCI bridges while in the EFI boot stub + no_disable_early_pci_dma: Leave the busmaster bit set + on all PCI bridges while in the EFI boot stub efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 166f0386719e..383f7a0fc170 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -283,6 +283,11 @@ static inline void *efi64_zero_upper(void *p) #define __efi64_argmap_locate_protocol(protocol, reg, interface) \ ((protocol), (reg), efi64_zero_upper(interface)) +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index bcc378c19ebe..ecc83e2f032c 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -215,6 +215,28 @@ config EFI_RCI2_TABLE Say Y here for Dell EMC PowerEdge systems. +config EFI_DISABLE_PCI_DMA + bool "Clear Busmaster bit on PCI bridges during ExitBootServices()" + help + Disable the busmaster bit in the control register on all PCI bridges + while calling ExitBootServices() and passing control to the runtime + kernel. System firmware may configure the IOMMU to prevent malicious + PCI devices from being able to attack the OS via DMA. However, since + firmware can't guarantee that the OS is IOMMU-aware, it will tear + down IOMMU configuration when ExitBootServices() is called. This + leaves a window between where a hostile device could still cause + damage before Linux configures the IOMMU again. + + If you say Y here, the EFI stub will clear the busmaster bit on all + PCI bridges before ExitBootServices() is called. This will prevent + any malicious PCI devices from being able to perform DMA until the + kernel reenables busmastering after configuring the IOMMU. + + This option will cause failures with some poorly behaved hardware + and should not be enabled without testing. The kernel commandline + options "efi=disable_early_pci_dma" or "efi=no_disable_early_pci_dma" + may be used to override this option. + endmenu config UEFI_CPER diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c35f893897e1..98a81576213d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -39,7 +39,7 @@ OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ - random.o + random.o pci.o # include the stub's generic dependencies from lib/ when building for ARM/arm64 arm-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index fcc45ee94e02..74ddfb496140 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -33,6 +33,8 @@ static bool __efistub_global efi_nokaslr; static bool __efistub_global efi_quiet; static bool __efistub_global efi_novamap; static bool __efistub_global efi_nosoftreserve; +static bool __efistub_global efi_disable_pci_dma = + IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA); bool __pure nokaslr(void) { @@ -490,6 +492,16 @@ efi_status_t efi_parse_options(char const *cmdline) efi_nosoftreserve = true; } + if (!strncmp(str, "disable_early_pci_dma", 21)) { + str += strlen("disable_early_pci_dma"); + efi_disable_pci_dma = true; + } + + if (!strncmp(str, "no_disable_early_pci_dma", 24)) { + str += strlen("no_disable_early_pci_dma"); + efi_disable_pci_dma = false; + } + /* Group words together, delimited by "," */ while (*str && *str != ' ' && *str != ',') str++; @@ -876,6 +888,9 @@ efi_status_t efi_exit_boot_services(void *handle, if (status != EFI_SUCCESS) goto free_map; + if (efi_disable_pci_dma) + efi_pci_disable_bridge_busmaster(); + status = efi_bs_call(exit_boot_services, handle, *map->key_ptr); if (status == EFI_INVALID_PARAMETER) { diff --git a/drivers/firmware/efi/libstub/pci.c b/drivers/firmware/efi/libstub/pci.c new file mode 100644 index 000000000000..b025e59b94df --- /dev/null +++ b/drivers/firmware/efi/libstub/pci.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI-related functions used by the EFI stub on multiple + * architectures. + * + * Copyright 2019 Google, LLC + */ + +#include +#include + +#include + +#include "efistub.h" + +void efi_pci_disable_bridge_busmaster(void) +{ + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + unsigned long pci_handle_size = 0; + efi_handle_t *pci_handle = NULL; + efi_handle_t handle; + efi_status_t status; + u16 class, command; + int i; + + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &pci_handle_size, NULL); + + if (status != EFI_BUFFER_TOO_SMALL) { + if (status != EFI_SUCCESS && status != EFI_NOT_FOUND) + pr_efi_err("Failed to locate PCI I/O handles'\n"); + return; + } + + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, pci_handle_size, + (void **)&pci_handle); + if (status != EFI_SUCCESS) { + pr_efi_err("Failed to allocate memory for 'pci_handle'\n"); + return; + } + + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &pci_handle_size, pci_handle); + if (status != EFI_SUCCESS) { + pr_efi_err("Failed to locate PCI I/O handles'\n"); + goto free_handle; + } + + for_each_efi_handle(handle, pci_handle, pci_handle_size, i) { + efi_pci_io_protocol_t *pci; + unsigned long segment_nr, bus_nr, device_nr, func_nr; + + status = efi_bs_call(handle_protocol, handle, &pci_proto, + (void **)&pci); + if (status != EFI_SUCCESS) + continue; + + /* + * Disregard devices living on bus 0 - these are not behind a + * bridge so no point in disconnecting them from their drivers. + */ + status = efi_call_proto(pci, get_location, &segment_nr, &bus_nr, + &device_nr, &func_nr); + if (status != EFI_SUCCESS || bus_nr == 0) + continue; + + /* + * Don't disconnect VGA controllers so we don't risk losing + * access to the framebuffer. Drivers for true PCIe graphics + * controllers that are behind a PCIe root port do not use + * DMA to implement the GOP framebuffer anyway [although they + * may use it in their implentation of Gop->Blt()], and so + * disabling DMA in the PCI bridge should not interfere with + * normal operation of the device. + */ + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_CLASS_DEVICE, 1, &class); + if (status != EFI_SUCCESS || class == PCI_CLASS_DISPLAY_VGA) + continue; + + /* Disconnect this handle from all its drivers */ + efi_bs_call(disconnect_controller, handle, NULL, NULL); + } + + for_each_efi_handle(handle, pci_handle, pci_handle_size, i) { + efi_pci_io_protocol_t *pci; + + status = efi_bs_call(handle_protocol, handle, &pci_proto, + (void **)&pci); + if (status != EFI_SUCCESS || !pci) + continue; + + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_CLASS_DEVICE, 1, &class); + + if (status != EFI_SUCCESS || class != PCI_CLASS_BRIDGE_PCI) + continue; + + /* Disable busmastering */ + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_COMMAND, 1, &command); + if (status != EFI_SUCCESS || !(command & PCI_COMMAND_MASTER)) + continue; + + command &= ~PCI_COMMAND_MASTER; + status = efi_call_proto(pci, pci.write, EfiPciIoWidthUint16, + PCI_COMMAND, 1, &command); + if (status != EFI_SUCCESS) + pr_efi_err("Failed to disable PCI busmastering\n"); + } + +free_handle: + efi_bs_call(free_pool, pci_handle); +} diff --git a/include/linux/efi.h b/include/linux/efi.h index ee68ea6f85ff..7e8e25b1d11c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -319,7 +319,9 @@ typedef union { void *stall; void *set_watchdog_timer; void *connect_controller; - void *disconnect_controller; + efi_status_t (__efiapi *disconnect_controller)(efi_handle_t, + efi_handle_t, + efi_handle_t); void *open_protocol; void *close_protocol; void *open_protocol_information; @@ -1692,4 +1694,6 @@ struct linux_efi_memreserve { #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) +void efi_pci_disable_bridge_busmaster(void); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3-59-g8ed1b From 796eb8d26a57f917bf22d781666aeab491f5c4f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:33 +0100 Subject: efi/libstub/x86: Use const attribute for efi_is_64bit() Reshuffle the x86 stub code a bit so that we can tag the efi_is_64bit() function with the 'const' attribute, which permits the compiler to optimize away any redundant calls. Since we have two different entry points for 32 and 64 bit firmware in the startup code, this also simplifies the C code since we'll enter it with the efi_is64 variable already set. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-2-ardb@kernel.org --- arch/x86/boot/compressed/eboot.c | 14 ++++++-------- arch/x86/boot/compressed/head_64.S | 7 +++---- arch/x86/include/asm/efi.h | 2 +- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 4afd29eb5b34..ab3a40283db7 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -21,16 +21,18 @@ #include "eboot.h" static efi_system_table_t *sys_table; -static bool efi_is64 = IS_ENABLED(CONFIG_X86_64); +extern const bool efi_is64; __pure efi_system_table_t *efi_system_table(void) { return sys_table; } -__pure bool efi_is_64bit(void) +__attribute_const__ bool efi_is_64bit(void) { - return efi_is64; + if (IS_ENABLED(CONFIG_EFI_MIXED)) + return efi_is64; + return IS_ENABLED(CONFIG_X64_64); } static efi_status_t @@ -710,8 +712,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) */ struct boot_params *efi_main(efi_handle_t handle, efi_system_table_t *sys_table_arg, - struct boot_params *boot_params, - bool is64) + struct boot_params *boot_params) { struct desc_ptr *gdt = NULL; struct setup_header *hdr = &boot_params->hdr; @@ -721,9 +722,6 @@ struct boot_params *efi_main(efi_handle_t handle, sys_table = sys_table_arg; - if (IS_ENABLED(CONFIG_EFI_MIXED)) - efi_is64 = is64; - /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 44a6bb6964b5..1f1f6c8139b3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -211,10 +211,9 @@ SYM_FUNC_START(startup_32) movl efi32_boot_args(%ebp), %edi cmp $0, %edi jz 1f - leal handover_entry(%ebp), %eax + leal efi64_stub_entry(%ebp), %eax movl %esi, %edx movl efi32_boot_args+4(%ebp), %esi - movl $0x0, %ecx 1: #endif pushl %eax @@ -242,6 +241,7 @@ SYM_FUNC_START(efi32_stub_entry) movl %ecx, efi32_boot_args(%ebp) movl %edx, efi32_boot_args+4(%ebp) sgdtl efi32_boot_gdt(%ebp) + movb $0, efi_is64(%ebp) /* Disable paging */ movl %cr0, %eax @@ -452,8 +452,6 @@ SYM_CODE_END(startup_64) .org 0x390 SYM_FUNC_START(efi64_stub_entry) SYM_FUNC_START_ALIAS(efi_stub_entry) - movq $1, %rcx -SYM_INNER_LABEL(handover_entry, SYM_L_LOCAL) and $~0xf, %rsp /* realign the stack */ call efi_main movq %rax,%rsi @@ -632,6 +630,7 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) #ifdef CONFIG_EFI_MIXED SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0) +SYM_DATA(efi_is64, .byte 1) #endif /* diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 383f7a0fc170..0a58468a7203 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -221,7 +221,7 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, /* arch specific definitions used by the stub code */ -__pure bool efi_is_64bit(void); +__attribute_const__ bool efi_is_64bit(void); static inline bool efi_is_native(void) { -- cgit v1.2.3-59-g8ed1b From ac3c76cc6d6deef573dd8c14232f20c6aa744f83 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:34 +0100 Subject: efi/libstub/x86: Use mandatory 16-byte stack alignment in mixed mode Reduce the stack frame of the EFI stub's mixed mode thunk routine by 8 bytes, by moving the GDT and return addresses to EBP and EBX, which we need to preserve anyway, since their top halves will be cleared by the call into 32-bit firmware code. Doing so results in the UEFI code being entered with a 16 byte aligned stack, as mandated by the UEFI spec, fixing the last occurrence in the 64-bit kernel where we violate this requirement. Also, move the saved GDT from a global variable to an unused part of the stack frame, and touch up some other parts of the code. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-3-ardb@kernel.org --- arch/x86/boot/compressed/efi_thunk_64.S | 46 ++++++++++----------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index d040ff5458e5..8fb7f6799c52 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -27,12 +27,9 @@ SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx - subq $8, %rsp - leaq 1f(%rip), %rax - movl %eax, 4(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, (%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ + leaq 1f(%rip), %rbp + leaq efi_gdt64(%rip), %rbx + movl %ebx, 2(%rbx) /* Fixup the gdt base address */ movl %ds, %eax push %rax @@ -48,12 +45,10 @@ SYM_FUNC_START(__efi64_thunk) movl %esi, 0x0(%rsp) movl %edx, 0x4(%rsp) movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) - sgdt save_gdt(%rip) + sgdt 0x14(%rsp) /* * Switch to gdt with 32-bit segments. This is the firmware GDT @@ -68,11 +63,10 @@ SYM_FUNC_START(__efi64_thunk) pushq %rax lretq -1: addq $32, %rsp +1: lgdt 0x14(%rsp) + addq $32, %rsp movq %rdi, %rax - lgdt save_gdt(%rip) - pop %rbx movl %ebx, %ss pop %rbx @@ -83,15 +77,9 @@ SYM_FUNC_START(__efi64_thunk) /* * Convert 32-bit status code into 64-bit. */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - addq $8, %rsp + roll $1, %eax + rorq $1, %rax + pop %rbx pop %rbp ret @@ -135,9 +123,7 @@ SYM_FUNC_START_LOCAL(efi_enter32) */ cli - movl 56(%esp), %eax - movl %eax, 2(%eax) - lgdtl (%eax) + lgdtl (%ebx) movl %cr4, %eax btsl $(X86_CR4_PAE_BIT), %eax @@ -154,9 +140,8 @@ SYM_FUNC_START_LOCAL(efi_enter32) xorl %eax, %eax lldt %ax - movl 60(%esp), %eax pushl $__KERNEL_CS - pushl %eax + pushl %ebp /* Enable paging */ movl %cr0, %eax @@ -172,11 +157,6 @@ SYM_DATA_START(efi32_boot_gdt) .quad 0 SYM_DATA_END(efi32_boot_gdt) -SYM_DATA_START_LOCAL(save_gdt) - .word 0 - .quad 0 -SYM_DATA_END(save_gdt) - SYM_DATA_START(efi_gdt64) .word efi_gdt64_end - efi_gdt64 .long 0 /* Filled out by user */ -- cgit v1.2.3-59-g8ed1b From bd1d7093a8086df37d260bd1f1d896c0922d34ef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Jan 2020 18:22:35 +0100 Subject: efi/libstub/x86: Fix unused-variable warning The only users of these got removed, so they also need to be removed to avoid warnings: arch/x86/boot/compressed/eboot.c: In function 'setup_efi_pci': arch/x86/boot/compressed/eboot.c:117:16: error: unused variable 'nr_pci' [-Werror=unused-variable] unsigned long nr_pci; ^~~~~~ arch/x86/boot/compressed/eboot.c: In function 'setup_uga': arch/x86/boot/compressed/eboot.c:244:16: error: unused variable 'nr_ugas' [-Werror=unused-variable] unsigned long nr_ugas; ^~~~~~~ Fixes: 2732ea0d5c0a ("efi/libstub: Use a helper to iterate over a EFI handle array") Signed-off-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-4-ardb@kernel.org --- arch/x86/boot/compressed/eboot.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ab3a40283db7..82e26d0ff075 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -118,7 +118,6 @@ static void setup_efi_pci(struct boot_params *params) void **pci_handle = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; unsigned long size = 0; - unsigned long nr_pci; struct setup_data *data; efi_handle_t h; int i; @@ -245,7 +244,6 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) u32 width, height; void **uga_handle = NULL; efi_uga_draw_protocol_t *uga = NULL, *first_uga; - unsigned long nr_ugas; efi_handle_t handle; int i; -- cgit v1.2.3-59-g8ed1b From 75fbef0a8b6b4bb19b9a91b5214f846c2dc5139e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:36 +0100 Subject: x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd The following commit: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd()") modified kernel_map_pages_in_pgd() to manage writable permissions of memory mappings in the EFI page table in a different way, but in the process, it removed the ability to clear NX attributes from read-only mappings, by clobbering the clear mask if _PAGE_RW is not being requested. Failure to remove the NX attribute from read-only mappings is unlikely to be a security issue, but it does prevent us from tightening the permissions in the EFI page tables going forward, so let's fix it now. Fixes: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-5-ardb@kernel.org --- arch/x86/mm/pat/set_memory.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 20823392f4f2..62a8ebe72a52 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2215,7 +2215,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; @@ -2224,12 +2224,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); -- cgit v1.2.3-59-g8ed1b From d9e3d2c4f103200d87f2c243a84c1fd3b3bfea8c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:37 +0100 Subject: efi/x86: Don't map the entire kernel text RW for mixed mode The mixed mode thunking routine requires a part of it to be mapped 1:1, and for this reason, we currently map the entire kernel .text read/write in the EFI page tables, which is bad. In fact, the kernel_map_pages_in_pgd() invocation that installs this mapping is entirely redundant, since all of DRAM is already 1:1 mapped read/write in the EFI page tables when we reach this point, which means that .rodata is mapped read-write as well. So let's remap both .text and .rodata read-only in the EFI page tables. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-6-ardb@kernel.org --- arch/x86/platform/efi/efi_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c13fa2150976..6ec58ff60b56 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -391,11 +391,11 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (_etext - _text) >> PAGE_SHIFT; + npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; - pf = _PAGE_RW | _PAGE_ENC; + pf = _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { pr_err("Failed to map kernel text 1:1\n"); return 1; -- cgit v1.2.3-59-g8ed1b From 97bb9cdc32108036170d9d0d208257168f80d9e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:38 +0100 Subject: efi/x86: Avoid RWX mappings for all of DRAM The EFI code creates RWX mappings for all memory regions that are occupied after the stub completes, and in the mixed mode case, it even creates RWX mappings for all of the remaining DRAM as well. Let's try to avoid this, by setting the NX bit for all memory regions except the ones that are marked as EFI runtime services code [which means text+rodata+data in practice, so we cannot mark them read-only right away]. For cases of buggy firmware where boot services code is called during SetVirtualAddressMap(), map those regions with exec permissions as well - they will be unmapped in efi_free_boot_services(). Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-7-ardb@kernel.org --- arch/x86/platform/efi/efi_64.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6ec58ff60b56..3eb23966e30a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -365,10 +365,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * as trim_bios_range() will reserve the first page and isolate it away * from memory allocators anyway. */ - pf = _PAGE_RW; - if (sev_active()) - pf |= _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { pr_err("Failed to create 1:1 mapping for the first page!\n"); return 1; @@ -410,6 +406,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) unsigned long pfn; pgd_t *pgd = efi_mm.pgd; + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; -- cgit v1.2.3-59-g8ed1b From 1f299fad1e312947c974c6a1d8a3a484f27a6111 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:39 +0100 Subject: efi/x86: Limit EFI old memory map to SGI UV machines We carry a quirk in the x86 EFI code to switch back to an older method of mapping the EFI runtime services memory regions, because it was deemed risky at the time to implement a new method without providing a fallback to the old method in case problems arose. Such problems did arise, but they appear to be limited to SGI UV1 machines, and so these are the only ones for which the fallback gets enabled automatically (via a DMI quirk). The fallback can be enabled manually as well, by passing efi=old_map, but there is very little evidence that suggests that this is something that is being relied upon in the field. Given that UV1 support is not enabled by default by the distros (Ubuntu, Fedora), there is no point in carrying this fallback code all the time if there are no other users. So let's move it into the UV support code, and document that efi=old_map now requires this support code to be enabled. Note that efi=old_map has been used in the past on other SGI UV machines to work around kernel regressions in production, so we keep the option to enable it by hand, but only if the kernel was built with UV support. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-8-ardb@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 3 +- arch/x86/include/asm/efi.h | 26 ++-- arch/x86/kernel/kexec-bzimage64.c | 2 +- arch/x86/platform/efi/efi.c | 30 ++--- arch/x86/platform/efi/efi_64.c | 166 ++---------------------- arch/x86/platform/efi/quirks.c | 21 ++- arch/x86/platform/uv/bios_uv.c | 164 ++++++++++++++++++++++- 7 files changed, 211 insertions(+), 201 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 994632ae48de..e5f043f0342a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1168,8 +1168,7 @@ "nosoftreserve", "disable_early_pci_dma", "no_disable_early_pci_dma" } old_map [X86-64]: switch to the old ioremap-based EFI - runtime services mapping. 32-bit still uses this one by - default. + runtime services mapping. [Needs CONFIG_X86_UV=y] nochunk: disable reading files in "chunks" in the EFI boot stub, as chunking can cause problems with some firmware implementations. diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0a58468a7203..86169a24b0d8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -20,13 +20,16 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). + * SGI UV1 machines are known to be incompatible with this scheme, so we + * provide an opt-out for these machines via a DMI quirk that sets the + * attribute below. */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI_UV1_MEMMAP EFI_ARCH_1 + +static inline bool efi_have_uv1_memmap(void) +{ + return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP); +} #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -119,7 +122,7 @@ struct efi_scratch { kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(&efi_mm); \ }) @@ -128,7 +131,7 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ @@ -172,6 +175,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); +extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); +extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); struct efi_setup_data { u64 fw_vendor; @@ -203,10 +208,7 @@ static inline bool efi_runtime_supported(void) if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) - return true; - - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } extern void parse_efi_setup(u64 phys_addr, u32 data_len); diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index d2f4e706a428..f293d872602a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -177,7 +177,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, * acpi_rsdp= on kernel command line to make second kernel boot * without efi. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; params->secure_boot = boot_params.secure_boot; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b931c4bea284..4e46d2d24352 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -497,6 +497,8 @@ void __init efi_init(void) efi_print_memmap(); } +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV) + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -561,6 +563,8 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } +#endif + /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -659,7 +663,7 @@ static inline void *efi_map_next_entry_reverse(void *entry) */ static void *efi_map_next_entry(void *entry) { - if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) { /* * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE * config table feature requires us to map all entries @@ -791,11 +795,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI. With efi=old_map, we don't do runtime services in + * non-native EFI. With the UV1 memmap, we don't do runtime services in * kexec kernel because in the initial boot something else might * have been mapped at these virtual addresses. */ - if (efi_is_mixed() || efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_is_mixed() || efi_have_uv1_memmap()) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -861,9 +865,9 @@ static void __init kexec_enter_virtual_mode(void) * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the - * kernel is booted with efi=old_map on its command line. Same old - * method enabled the runtime services to be called without having to - * thunk back into physical mode for every invocation. + * kernel is booted on SG1 UV1 hardware. Same old method enabled the + * runtime services to be called without having to thunk back into + * physical mode for every invocation. * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime @@ -976,20 +980,6 @@ void __init efi_enter_virtual_mode(void) efi_dump_pagetable(); } -static int __init arch_parse_efi_cmdline(char *str) -{ - if (!str) { - pr_warn("need at least one option\n"); - return -EINVAL; - } - - if (parse_option_str(str, "old_map")) - set_bit(EFI_OLD_MEMMAP, &efi.flags); - - return 0; -} -early_param("efi", arch_parse_efi_cmdline); - bool efi_is_table_address(unsigned long phys_addr) { unsigned int i; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 3eb23966e30a..8d1869ff1033 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -57,134 +57,6 @@ static u64 efi_va = EFI_VA_START; struct efi_scratch efi_scratch; -static void __init early_code_mapping_set_exec(int executable) -{ - efi_memory_desc_t *md; - - if (!(__supported_pte_mask & _PAGE_NX)) - return; - - /* Make EFI service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type == EFI_RUNTIME_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_CODE) - efi_set_executable(md, executable); - } -} - -static void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd); - -static pgd_t * __init efi_old_memmap_phys_prolog(void) -{ - unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; - pgd_t *save_pgd, *pgd_k, *pgd_efi; - p4d_t *p4d, *p4d_k, *p4d_efi; - pud_t *pud; - - int pgd; - int n_pgds, i, j; - - early_code_mapping_set_exec(1); - - n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); - if (!save_pgd) - return NULL; - - /* - * Build 1:1 identity mapping for efi=old_map usage. Note that - * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while - * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical - * address X, the pud_index(X) != pud_index(__va(X)), we can only copy - * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. - * This means here we can only reuse the PMD tables of the direct mapping. - */ - for (pgd = 0; pgd < n_pgds; pgd++) { - addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); - vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); - pgd_efi = pgd_offset_k(addr_pgd); - save_pgd[pgd] = *pgd_efi; - - p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); - if (!p4d) { - pr_err("Failed to allocate p4d table!\n"); - goto out; - } - - for (i = 0; i < PTRS_PER_P4D; i++) { - addr_p4d = addr_pgd + i * P4D_SIZE; - p4d_efi = p4d + p4d_index(addr_p4d); - - pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); - if (!pud) { - pr_err("Failed to allocate pud table!\n"); - goto out; - } - - for (j = 0; j < PTRS_PER_PUD; j++) { - addr_pud = addr_p4d + j * PUD_SIZE; - - if (addr_pud > (max_pfn << PAGE_SHIFT)) - break; - - vaddr = (unsigned long)__va(addr_pud); - - pgd_k = pgd_offset_k(vaddr); - p4d_k = p4d_offset(pgd_k, vaddr); - pud[j] = *pud_offset(p4d_k, vaddr); - } - } - pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; - } - - __flush_tlb_all(); - return save_pgd; -out: - efi_old_memmap_phys_epilog(save_pgd); - return NULL; -} - -static void __init efi_old_memmap_phys_epilog(pgd_t *save_pgd) -{ - /* - * After the lock is released, the original page table is restored. - */ - int pgd_idx, i; - int nr_pgds; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - - nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { - pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - - if (!pgd_present(*pgd)) - continue; - - for (i = 0; i < PTRS_PER_P4D; i++) { - p4d = p4d_offset(pgd, - pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - - if (!p4d_present(*p4d)) - continue; - - pud = (pud_t *)p4d_page_vaddr(*p4d); - pud_free(&init_mm, pud); - } - - p4d = (p4d_t *)pgd_page_vaddr(*pgd); - p4d_free(&init_mm, p4d); - } - - kfree(save_pgd); - - __flush_tlb_all(); - early_code_mapping_set_exec(0); -} - EXPORT_SYMBOL_GPL(efi_mm); /* @@ -203,7 +75,7 @@ int __init efi_alloc_page_tables(void) pud_t *pud; gfp_t gfp_mask; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; @@ -244,7 +116,7 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -338,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) unsigned npages; pgd_t *pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; /* @@ -439,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md) unsigned long size = md->num_pages << PAGE_SHIFT; u64 pa = md->phys_addr; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return old_map_region(md); /* @@ -496,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md) __map_region(md, md->virt_addr); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type, u64 attribute) -{ - unsigned long last_map_pfn; - - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); - - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { - unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type, attribute); - } - - if (!(attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); - - return (void __iomem *)__va(phys_addr); -} - void __init parse_efi_setup(u64 phys_addr, u32 data_len) { efi_setup = phys_addr + sizeof(struct setup_data); @@ -564,7 +416,7 @@ void __init efi_runtime_update_mappings(void) { efi_memory_desc_t *md; - if (efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_have_uv1_memmap()) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); return; @@ -618,7 +470,7 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) ptdump_walk_pgd_level(NULL, swapper_pg_dir); else ptdump_walk_pgd_level(NULL, efi_mm.pgd); @@ -1045,8 +897,8 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, descriptor_version, virtual_map); - if (efi_enabled(EFI_OLD_MEMMAP)) { - save_pgd = efi_old_memmap_phys_prolog(); + if (efi_have_uv1_memmap()) { + save_pgd = efi_uv1_memmap_phys_prolog(); if (!save_pgd) return EFI_ABORTED; } else { @@ -1065,7 +917,7 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, kernel_fpu_end(); if (save_pgd) - efi_old_memmap_phys_epilog(save_pgd); + efi_uv1_memmap_phys_epilog(save_pgd); else efi_switch_mm(efi_scratch.prev_mm); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index eb421cb35108..fe46ddf6c761 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -384,10 +384,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) /* * To Do: Remove this check after adding functionality to unmap EFI boot - * services code/data regions from direct mapping area because - * "efi=old_map" maps EFI regions in swapper_pg_dir. + * services code/data regions from direct mapping area because the UV1 + * memory map maps EFI regions in swapper_pg_dir. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -558,7 +558,7 @@ out: return ret; } -static const struct dmi_system_id sgi_uv1_dmi[] = { +static const struct dmi_system_id sgi_uv1_dmi[] __initconst = { { NULL, "SGI UV1", { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), @@ -581,8 +581,15 @@ void __init efi_apply_memmap_quirks(void) } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ - if (dmi_check_system(sgi_uv1_dmi)) - set_bit(EFI_OLD_MEMMAP, &efi.flags); + if (dmi_check_system(sgi_uv1_dmi)) { + if (IS_ENABLED(CONFIG_X86_UV)) { + set_bit(EFI_UV1_MEMMAP, &efi.flags); + } else { + pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_memmap_unmap(); + } + } } /* @@ -720,7 +727,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) /* * Make sure that an efi runtime service caused the page fault. * "efi_mm" cannot be used to check if the page fault had occurred - * in the firmware context because efi=old_map doesn't use efi_pgd. + * in the firmware context because the UV1 memmap doesn't use efi_pgd. */ if (efi_rts_work.efi_rts_id == EFI_NONE) return; diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 5c0e2eb5d87c..7c2b8c5d0b49 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -31,10 +31,10 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return BIOS_STATUS_UNIMPLEMENTED; /* - * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI * callback method, which uses efi_call() directly, with the kernel page tables: */ - if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) { + if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) { kernel_fpu_begin(); ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); kernel_fpu_end(); @@ -217,3 +217,163 @@ int uv_bios_init(void) pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); return 0; } + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd) +{ + /* + * After the lock is released, the original page table is restored. + */ + int pgd_idx, i; + int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + + if (!pgd_present(*pgd)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!p4d_present(*p4d)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + + kfree(save_pgd); + + __flush_tlb_all(); + early_code_mapping_set_exec(0); +} + +pgd_t * __init efi_uv1_memmap_phys_prolog(void) +{ + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; + + int pgd; + int n_pgds, i, j; + + early_code_mapping_set_exec(1); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + if (!save_pgd) + return NULL; + + /* + * Build 1:1 identity mapping for UV1 memmap usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ + for (pgd = 0; pgd < n_pgds; pgd++) { + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; + } + + __flush_tlb_all(); + return save_pgd; +out: + efi_uv1_memmap_phys_epilog(save_pgd); + return NULL; +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type, u64 attribute) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type, attribute); + } + + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + + return (void __iomem *)__va(phys_addr); +} + +static int __init arch_parse_efi_cmdline(char *str) +{ + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + + if (parse_option_str(str, "old_map")) + set_bit(EFI_UV1_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", arch_parse_efi_cmdline); -- cgit v1.2.3-59-g8ed1b From 64c8a0cd0a535891d5905c3a1651150f0f141439 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:40 +0100 Subject: efi/arm: Defer probe of PCIe backed efifb on DT systems The new of_devlink support breaks PCIe probing on ARM platforms booting via UEFI if the firmware exposes a EFI framebuffer that is backed by a PCI device. The reason is that the probing order gets reversed, resulting in a resource conflict on the framebuffer memory window when the PCIe probes last, causing it to give up entirely. Given that we rely on PCI quirks to deal with EFI framebuffers that get moved around in memory, we cannot simply drop the memory reservation, so instead, let's use the device link infrastructure to register this dependency, and force the probing to occur in the expected order. Co-developed-by: Saravana Kannan Signed-off-by: Ard Biesheuvel Signed-off-by: Saravana Kannan Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-9-ardb@kernel.org --- drivers/firmware/efi/arm-init.c | 107 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 904fa09e6a6b..d99f5b0c8a09 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -10,10 +10,12 @@ #define pr_fmt(fmt) "efi: " fmt #include +#include #include #include #include #include +#include #include #include #include @@ -276,15 +278,112 @@ void __init efi_init(void) efi_memmap_unmap(); } +static bool efifb_overlaps_pci_range(const struct of_pci_range *range) +{ + u64 fb_base = screen_info.lfb_base; + + if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) + fb_base |= (u64)(unsigned long)screen_info.ext_lfb_base << 32; + + return fb_base >= range->cpu_addr && + fb_base < (range->cpu_addr + range->size); +} + +static struct device_node *find_pci_overlap_node(void) +{ + struct device_node *np; + + for_each_node_by_type(np, "pci") { + struct of_pci_range_parser parser; + struct of_pci_range range; + int err; + + err = of_pci_range_parser_init(&parser, np); + if (err) { + pr_warn("of_pci_range_parser_init() failed: %d\n", err); + continue; + } + + for_each_of_pci_range(&parser, &range) + if (efifb_overlaps_pci_range(&range)) + return np; + } + return NULL; +} + +/* + * If the efifb framebuffer is backed by a PCI graphics controller, we have + * to ensure that this relation is expressed using a device link when + * running in DT mode, or the probe order may be reversed, resulting in a + * resource reservation conflict on the memory window that the efifb + * framebuffer steals from the PCIe host bridge. + */ +static int efifb_add_links(const struct fwnode_handle *fwnode, + struct device *dev) +{ + struct device_node *sup_np; + struct device *sup_dev; + + sup_np = find_pci_overlap_node(); + + /* + * If there's no PCI graphics controller backing the efifb, we are + * done here. + */ + if (!sup_np) + return 0; + + sup_dev = get_dev_from_fwnode(&sup_np->fwnode); + of_node_put(sup_np); + + /* + * Return -ENODEV if the PCI graphics controller device hasn't been + * registered yet. This ensures that efifb isn't allowed to probe + * and this function is retried again when new devices are + * registered. + */ + if (!sup_dev) + return -ENODEV; + + /* + * If this fails, retrying this function at a later point won't + * change anything. So, don't return an error after this. + */ + if (!device_link_add(dev, sup_dev, 0)) + dev_warn(dev, "device_link_add() failed\n"); + + put_device(sup_dev); + + return 0; +} + +static const struct fwnode_operations efifb_fwnode_ops = { + .add_links = efifb_add_links, +}; + +static struct fwnode_handle efifb_fwnode = { + .ops = &efifb_fwnode_ops, +}; + static int __init register_gop_device(void) { - void *pd; + struct platform_device *pd; + int err; if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) return 0; - pd = platform_device_register_data(NULL, "efi-framebuffer", 0, - &screen_info, sizeof(screen_info)); - return PTR_ERR_OR_ZERO(pd); + pd = platform_device_alloc("efi-framebuffer", 0); + if (!pd) + return -ENOMEM; + + if (IS_ENABLED(CONFIG_PCI)) + pd->dev.fwnode = &efifb_fwnode; + + err = platform_device_add_data(pd, &screen_info, sizeof(screen_info)); + if (err) + return err; + + return platform_device_add(pd); } subsys_initcall(register_gop_device); -- cgit v1.2.3-59-g8ed1b From 62b605b53ad4dc6d9ec11ab4c7aa61df10b76af6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 13 Jan 2020 18:22:41 +0100 Subject: efi: Fix comment for efi_mem_type() wrt absent physical addresses A previous commit f99afd08a45f ("efi: Update efi_mem_type() to return an error rather than 0") changed the return value from EFI_RESERVED_TYPE to -EINVAL when the searched physical address is not present in any memory descriptor. But the comment preceding the function never changed. Let's change the comment now to reflect the new return value -EINVAL. Signed-off-by: Anshuman Khandual Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-10-ardb@kernel.org --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2b02cb165f16..621220ab3d0e 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -908,7 +908,7 @@ u64 efi_mem_attributes(unsigned long phys_addr) * * Search in the EFI memory map for the region covering @phys_addr. * Returns the EFI memory type if the region was found in the memory - * map, EFI_RESERVED_TYPE (zero) otherwise. + * map, -EINVAL otherwise. */ int efi_mem_type(unsigned long phys_addr) { -- cgit v1.2.3-59-g8ed1b From 26c0e44a213b272abec0e8fba4a5a2801f95208e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:42 +0100 Subject: efi: Add a flags parameter to efi_memory_map In preparation for garbage collecting dynamically allocated EFI memory maps, where the allocation method of memblock vs slab needs to be recalled, convert the existing 'late' flag into a 'flags' bitmask. Arrange for the flag to be passed via 'struct efi_memory_map_data'. This structure grows additional flags in follow-on changes. Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-11-ardb@kernel.org --- drivers/firmware/efi/memmap.c | 31 +++++++++++++++++-------------- include/linux/efi.h | 4 +++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 38b686c67b17..4f98eb12c172 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -52,21 +52,20 @@ phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) /** * __efi_memmap_init - Common code for mapping the EFI memory map * @data: EFI memory map data - * @late: Use early or late mapping function? * * This function takes care of figuring out which function to use to * map the EFI memory map in efi.memmap based on how far into the boot * we are. * - * During bootup @late should be %false since we only have access to - * the early_memremap*() functions as the vmalloc space isn't setup. - * Once the kernel is fully booted we can fallback to the more robust - * memremap*() API. + * During bootup EFI_MEMMAP_LATE in data->flags should be clear since we + * only have access to the early_memremap*() functions as the vmalloc + * space isn't setup. Once the kernel is fully booted we can fallback + * to the more robust memremap*() API. * * Returns zero on success, a negative error code on failure. */ static int __init -__efi_memmap_init(struct efi_memory_map_data *data, bool late) +__efi_memmap_init(struct efi_memory_map_data *data) { struct efi_memory_map map; phys_addr_t phys_map; @@ -76,7 +75,7 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late) phys_map = data->phys_map; - if (late) + if (data->flags & EFI_MEMMAP_LATE) map.map = memremap(phys_map, data->size, MEMREMAP_WB); else map.map = early_memremap(phys_map, data->size); @@ -92,7 +91,7 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late) map.desc_version = data->desc_version; map.desc_size = data->desc_size; - map.late = late; + map.flags = data->flags; set_bit(EFI_MEMMAP, &efi.flags); @@ -111,9 +110,10 @@ __efi_memmap_init(struct efi_memory_map_data *data, bool late) int __init efi_memmap_init_early(struct efi_memory_map_data *data) { /* Cannot go backwards */ - WARN_ON(efi.memmap.late); + WARN_ON(efi.memmap.flags & EFI_MEMMAP_LATE); - return __efi_memmap_init(data, false); + data->flags = 0; + return __efi_memmap_init(data); } void __init efi_memmap_unmap(void) @@ -121,7 +121,7 @@ void __init efi_memmap_unmap(void) if (!efi_enabled(EFI_MEMMAP)) return; - if (!efi.memmap.late) { + if (!(efi.memmap.flags & EFI_MEMMAP_LATE)) { unsigned long size; size = efi.memmap.desc_size * efi.memmap.nr_map; @@ -162,13 +162,14 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) struct efi_memory_map_data data = { .phys_map = addr, .size = size, + .flags = EFI_MEMMAP_LATE, }; /* Did we forget to unmap the early EFI memmap? */ WARN_ON(efi.memmap.map); /* Were we already called? */ - WARN_ON(efi.memmap.late); + WARN_ON(efi.memmap.flags & EFI_MEMMAP_LATE); /* * It makes no sense to allow callers to register different @@ -178,7 +179,7 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) data.desc_version = efi.memmap.desc_version; data.desc_size = efi.memmap.desc_size; - return __efi_memmap_init(&data, true); + return __efi_memmap_init(&data); } /** @@ -195,6 +196,7 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map) { struct efi_memory_map_data data; + unsigned long flags; efi_memmap_unmap(); @@ -202,8 +204,9 @@ int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map) data.size = efi.memmap.desc_size * nr_map; data.desc_version = efi.memmap.desc_version; data.desc_size = efi.memmap.desc_size; + data.flags = efi.memmap.flags & EFI_MEMMAP_LATE; - return __efi_memmap_init(&data, efi.memmap.late); + return __efi_memmap_init(&data); } /** diff --git a/include/linux/efi.h b/include/linux/efi.h index 7e8e25b1d11c..f117d68c314e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -767,6 +767,7 @@ struct efi_memory_map_data { unsigned long size; unsigned long desc_version; unsigned long desc_size; + unsigned long flags; }; struct efi_memory_map { @@ -776,7 +777,8 @@ struct efi_memory_map { int nr_map; unsigned long desc_version; unsigned long desc_size; - bool late; +#define EFI_MEMMAP_LATE (1UL << 0) + unsigned long flags; }; struct efi_mem_range { -- cgit v1.2.3-59-g8ed1b From 1db91035d01aa8bfa2350c00ccb63d629b4041ad Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:43 +0100 Subject: efi: Add tracking for dynamically allocated memmaps In preparation for fixing efi_memmap_alloc() leaks, add support for recording whether the memmap was dynamically allocated from slab, memblock, or is the original physical memmap provided by the platform. Given this tracking is established in efi_memmap_alloc() and needs to be carried to efi_memmap_install(), use 'struct efi_memory_map_data' to convey the flags. Some small cleanups result from this reorganization, specifically the removal of local variables for 'phys' and 'size' that are already tracked in @data. Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-12-ardb@kernel.org --- arch/x86/platform/efi/efi.c | 10 +++++++-- arch/x86/platform/efi/quirks.c | 23 ++++++++------------ drivers/firmware/efi/fake_mem.c | 14 ++++++------ drivers/firmware/efi/memmap.c | 47 +++++++++++++++++++++++------------------ include/linux/efi.h | 11 ++++++---- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4e46d2d24352..59f7f6d60cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -304,10 +304,16 @@ static void __init efi_clean_memmap(void) } if (n_removal > 0) { - u64 size = efi.memmap.nr_map - n_removal; + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = data.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; pr_warn("Removing %d invalid memory map entries.\n", n_removal); - efi_memmap_install(efi.memmap.phys_map, size); + efi_memmap_install(&data); } } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index fe46ddf6c761..46807b7606da 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -243,7 +243,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store); */ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; struct efi_mem_range mr; efi_memory_desc_t md; int num_entries; @@ -271,24 +271,21 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) num_entries = efi_memmap_split_count(&md, &mr.range); num_entries += efi.memmap.nr_map; - new_size = efi.memmap.desc_size * num_entries; - - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Could not allocate boot services memmap\n"); return; } - new = early_memremap(new_phys, new_size); + new = early_memremap(data.phys_map, data.size); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; } efi_memmap_insert(&efi.memmap, new, &mr); - early_memunmap(new, new_size); + early_memunmap(new, data.size); - efi_memmap_install(new_phys, num_entries); + efi_memmap_install(&data); e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__update_table(e820_table); } @@ -407,7 +404,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) void __init efi_free_boot_services(void) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; efi_memory_desc_t *md; int num_entries = 0; void *new, *new_md; @@ -462,14 +459,12 @@ void __init efi_free_boot_services(void) if (!num_entries) return; - new_size = efi.memmap.desc_size * num_entries; - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Failed to allocate new EFI memmap\n"); return; } - new = memremap(new_phys, new_size, MEMREMAP_WB); + new = memremap(data.phys_map, data.size, MEMREMAP_WB); if (!new) { pr_err("Failed to map new EFI memmap\n"); return; @@ -493,7 +488,7 @@ void __init efi_free_boot_services(void) memunmap(new); - if (efi_memmap_install(new_phys, num_entries)) { + if (efi_memmap_install(&data) != 0) { pr_err("Could not install new EFI memmap\n"); return; } diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index bb9fc70d0cfa..a8d20568d532 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -36,9 +36,9 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) void __init efi_fake_memmap(void) { + struct efi_memory_map_data data = { 0 }; int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; - phys_addr_t new_memmap_phy; void *new_memmap; int i; @@ -55,15 +55,13 @@ void __init efi_fake_memmap(void) } /* allocate memory for new EFI memmap */ - new_memmap_phy = efi_memmap_alloc(new_nr_map); - if (!new_memmap_phy) + if (efi_memmap_alloc(new_nr_map, &data) != 0) return; /* create new EFI memmap */ - new_memmap = early_memremap(new_memmap_phy, - efi.memmap.desc_size * new_nr_map); + new_memmap = early_memremap(data.phys_map, data.size); if (!new_memmap) { - memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map); + memblock_free(data.phys_map, data.size); return; } @@ -71,9 +69,9 @@ void __init efi_fake_memmap(void) efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]); /* swap into new EFI memmap */ - early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); + early_memunmap(new_memmap, data.size); - efi_memmap_install(new_memmap_phy, new_nr_map); + efi_memmap_install(&data); /* print new EFI memmap */ efi_print_memmap(); diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 4f98eb12c172..04dfa56b994b 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -32,6 +32,7 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) /** * efi_memmap_alloc - Allocate memory for the EFI memory map * @num_entries: Number of entries in the allocated map. + * @data: efi memmap installation parameters * * Depending on whether mm_init() has already been invoked or not, * either memblock or "normal" page allocation is used. @@ -39,14 +40,29 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) * Returns the physical address of the allocated memory map on * success, zero on failure. */ -phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) +int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data) { - unsigned long size = num_entries * efi.memmap.desc_size; - - if (slab_is_available()) - return __efi_memmap_alloc_late(size); + /* Expect allocation parameters are zero initialized */ + WARN_ON(data->phys_map || data->size); + + data->size = num_entries * efi.memmap.desc_size; + data->desc_version = efi.memmap.desc_version; + data->desc_size = efi.memmap.desc_size; + data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK); + data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE; + + if (slab_is_available()) { + data->flags |= EFI_MEMMAP_SLAB; + data->phys_map = __efi_memmap_alloc_late(data->size); + } else { + data->flags |= EFI_MEMMAP_MEMBLOCK; + data->phys_map = __efi_memmap_alloc_early(data->size); + } - return __efi_memmap_alloc_early(size); + if (!data->phys_map) + return -ENOMEM; + return 0; } /** @@ -64,8 +80,7 @@ phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) * * Returns zero on success, a negative error code on failure. */ -static int __init -__efi_memmap_init(struct efi_memory_map_data *data) +static int __init __efi_memmap_init(struct efi_memory_map_data *data) { struct efi_memory_map map; phys_addr_t phys_map; @@ -184,8 +199,7 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) /** * efi_memmap_install - Install a new EFI memory map in efi.memmap - * @addr: Physical address of the memory map - * @nr_map: Number of entries in the memory map + * @ctx: map allocation parameters (address, size, flags) * * Unlike efi_memmap_init_*(), this function does not allow the caller * to switch from early to late mappings. It simply uses the existing @@ -193,20 +207,11 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) * * Returns zero on success, a negative error code on failure. */ -int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map) +int __init efi_memmap_install(struct efi_memory_map_data *data) { - struct efi_memory_map_data data; - unsigned long flags; - efi_memmap_unmap(); - data.phys_map = addr; - data.size = efi.memmap.desc_size * nr_map; - data.desc_version = efi.memmap.desc_version; - data.desc_size = efi.memmap.desc_size; - data.flags = efi.memmap.flags & EFI_MEMMAP_LATE; - - return __efi_memmap_init(&data); + return __efi_memmap_init(data); } /** diff --git a/include/linux/efi.h b/include/linux/efi.h index f117d68c314e..adbe421835c1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -759,8 +759,8 @@ typedef union { /* * Architecture independent structure for describing a memory map for the - * benefit of efi_memmap_init_early(), saving us the need to pass four - * parameters. + * benefit of efi_memmap_init_early(), and for passing context between + * efi_memmap_alloc() and efi_memmap_install(). */ struct efi_memory_map_data { phys_addr_t phys_map; @@ -778,6 +778,8 @@ struct efi_memory_map { unsigned long desc_version; unsigned long desc_size; #define EFI_MEMMAP_LATE (1UL << 0) +#define EFI_MEMMAP_MEMBLOCK (1UL << 1) +#define EFI_MEMMAP_SLAB (1UL << 2) unsigned long flags; }; @@ -972,11 +974,12 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); -extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries); +extern int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map); +extern int __init efi_memmap_install(struct efi_memory_map_data *data); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range); extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, -- cgit v1.2.3-59-g8ed1b From f0ef6523475f18ccd213e22ee593dfd131a2c5ea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:44 +0100 Subject: efi: Fix efi_memmap_alloc() leaks With efi_fake_memmap() and efi_arch_mem_reserve() the efi table may be updated and replaced multiple times. When that happens a previous dynamically allocated efi memory map can be garbage collected. Use the new EFI_MEMMAP_{SLAB,MEMBLOCK} flags to detect when a dynamically allocated memory map is being replaced. Debug statements in efi_memmap_free() reveal: efi: __efi_memmap_free:37: phys: 0x23ffdd580 size: 2688 flags: 0x2 efi: __efi_memmap_free:37: phys: 0x9db00 size: 2640 flags: 0x2 efi: __efi_memmap_free:37: phys: 0x9e580 size: 2640 flags: 0x2 ...a savings of 7968 bytes on a qemu boot with 2 entries specified to efi_fake_mem=. [ ardb: added a comment to clarify that efi_memmap_free() does nothing when called from efi_clean_memmap(), i.e., with data->flags == 0x0 ] Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-13-ardb@kernel.org --- drivers/firmware/efi/memmap.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 04dfa56b994b..501672166502 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -29,6 +29,28 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) return PFN_PHYS(page_to_pfn(p)); } +static void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) +{ + if (flags & EFI_MEMMAP_MEMBLOCK) { + if (slab_is_available()) + memblock_free_late(phys, size); + else + memblock_free(phys, size); + } else if (flags & EFI_MEMMAP_SLAB) { + struct page *p = pfn_to_page(PHYS_PFN(phys)); + unsigned int order = get_order(size); + + free_pages((unsigned long) page_address(p), order); + } +} + +static void __init efi_memmap_free(void) +{ + __efi_memmap_free(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map, + efi.memmap.flags); +} + /** * efi_memmap_alloc - Allocate memory for the EFI memory map * @num_entries: Number of entries in the allocated map. @@ -100,6 +122,9 @@ static int __init __efi_memmap_init(struct efi_memory_map_data *data) return -ENOMEM; } + /* NOP if data->flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB) == 0 */ + efi_memmap_free(); + map.phys_map = data->phys_map; map.nr_map = data->size / data->desc_size; map.map_end = map.map + data->size; -- cgit v1.2.3-59-g8ed1b From 484a418d075488c6999528247cc711d12c373447 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:45 +0100 Subject: efi: Fix handling of multiple efi_fake_mem= entries Dave noticed that when specifying multiple efi_fake_mem= entries only the last entry was successfully being reflected in the efi memory map. This is due to the fact that the efi_memmap_insert() is being called multiple times, but on successive invocations the insertion should be applied to the last new memmap rather than the original map at efi_fake_memmap() entry. Rework efi_fake_memmap() to install the new memory map after each efi_fake_mem= entry is parsed. This also fixes an issue in efi_fake_memmap() that caused it to litter emtpy entries into the end of the efi memory map. An empty entry causes efi_memmap_insert() to attempt more memmap splits / copies than efi_memmap_split_count() accounted for when sizing the new map. When that happens efi_memmap_insert() may overrun its allocation, and if you are lucky will spill over to an unmapped page leading to crash signature like the following rather than silent corruption: BUG: unable to handle page fault for address: ffffffffff281000 [..] RIP: 0010:efi_memmap_insert+0x11d/0x191 [..] Call Trace: ? bgrt_init+0xbe/0xbe ? efi_arch_mem_reserve+0x1cb/0x228 ? acpi_parse_bgrt+0xa/0xd ? acpi_table_parse+0x86/0xb8 ? acpi_boot_init+0x494/0x4e3 ? acpi_parse_x2apic+0x87/0x87 ? setup_acpi_sci+0xa2/0xa2 ? setup_arch+0x8db/0x9e1 ? start_kernel+0x6a/0x547 ? secondary_startup_64+0xb6/0xc0 Commit af1648984828 "x86/efi: Update e820 with reserved EFI boot services data to fix kexec breakage" introduced more occurrences where efi_memmap_insert() is invoked after an efi_fake_mem= configuration has been parsed. Previously the side effects of vestigial empty entries were benign, but with commit af1648984828 that follow-on efi_memmap_insert() invocation triggers efi_memmap_insert() overruns. Reported-by: Dave Young Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20191231014630.GA24942@dhcp-128-65.nay.redhat.com Link: https://lore.kernel.org/r/20200113172245.27925-14-ardb@kernel.org --- drivers/firmware/efi/fake_mem.c | 31 ++++++++++++++++--------------- drivers/firmware/efi/memmap.c | 2 +- include/linux/efi.h | 2 ++ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index a8d20568d532..6e0f34a38171 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -34,25 +34,16 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) return 0; } -void __init efi_fake_memmap(void) +static void __init efi_fake_range(struct efi_mem_range *efi_range) { struct efi_memory_map_data data = { 0 }; int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; void *new_memmap; - int i; - - if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) - return; /* count up the number of EFI memory descriptor */ - for (i = 0; i < nr_fake_mem; i++) { - for_each_efi_memory_desc(md) { - struct range *r = &efi_fake_mems[i].range; - - new_nr_map += efi_memmap_split_count(md, r); - } - } + for_each_efi_memory_desc(md) + new_nr_map += efi_memmap_split_count(md, &efi_range->range); /* allocate memory for new EFI memmap */ if (efi_memmap_alloc(new_nr_map, &data) != 0) @@ -61,17 +52,27 @@ void __init efi_fake_memmap(void) /* create new EFI memmap */ new_memmap = early_memremap(data.phys_map, data.size); if (!new_memmap) { - memblock_free(data.phys_map, data.size); + __efi_memmap_free(data.phys_map, data.size, data.flags); return; } - for (i = 0; i < nr_fake_mem; i++) - efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]); + efi_memmap_insert(&efi.memmap, new_memmap, efi_range); /* swap into new EFI memmap */ early_memunmap(new_memmap, data.size); efi_memmap_install(&data); +} + +void __init efi_fake_memmap(void) +{ + int i; + + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) + return; + + for (i = 0; i < nr_fake_mem; i++) + efi_fake_range(&efi_fake_mems[i]); /* print new EFI memmap */ efi_print_memmap(); diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 501672166502..2ff1883dc788 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -29,7 +29,7 @@ static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) return PFN_PHYS(page_to_pfn(p)); } -static void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) +void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) { if (flags & EFI_MEMMAP_MEMBLOCK) { if (slab_is_available()) diff --git a/include/linux/efi.h b/include/linux/efi.h index adbe421835c1..7efd7072cca5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -976,6 +976,8 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_alloc(unsigned int num_entries, struct efi_memory_map_data *data); +extern void __efi_memmap_free(u64 phys, unsigned long size, + unsigned long flags); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -- cgit v1.2.3-59-g8ed1b From 3cc028619e284188cdde652631e1c3c5a83692b9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Jan 2020 17:57:03 +0100 Subject: efi/x86: avoid KASAN false positives when accessing the 1: 1 mapping When installing the EFI virtual address map during early boot, we access the EFI system table to retrieve the 1:1 mapped address of the SetVirtualAddressMap() EFI runtime service. This memory is not known to KASAN, so on KASAN enabled builds, this may result in a splat like ================================================================== BUG: KASAN: user-memory-access in efi_set_virtual_address_map+0x141/0x354 Read of size 4 at addr 000000003fbeef38 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.5.0-rc5+ #758 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack+0x8b/0xbb ? efi_set_virtual_address_map+0x141/0x354 ? efi_set_virtual_address_map+0x141/0x354 __kasan_report+0x176/0x192 ? efi_set_virtual_address_map+0x141/0x354 kasan_report+0xe/0x20 efi_set_virtual_address_map+0x141/0x354 ? efi_thunk_runtime_setup+0x148/0x148 ? __inc_numa_state+0x19/0x90 ? memcpy+0x34/0x50 efi_enter_virtual_mode+0x5fd/0x67d start_kernel+0x5cd/0x682 ? mem_encrypt_init+0x6/0x6 ? x86_family+0x5/0x20 ? load_ucode_bsp+0x46/0x154 secondary_startup_64+0xa4/0xb0 ================================================================== Since this code runs only a single time during early boot, let's annotate it as __no_sanitize_address so KASAN disregards it entirely. Fixes: 698294704573 ("efi/x86: Split SetVirtualAddresMap() wrappers into ...") Reported-by: Qian Cai Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8d1869ff1033..e2accfe636bd 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -543,7 +543,7 @@ static DEFINE_SPINLOCK(efi_runtime_lock); __s; \ }) -static efi_status_t __init +static efi_status_t __init __no_sanitize_address efi_thunk_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, @@ -882,10 +882,11 @@ void __init efi_thunk_runtime_setup(void) efi.query_capsule_caps = efi_thunk_query_capsule_caps; } -efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { efi_status_t status; unsigned long flags; -- cgit v1.2.3-59-g8ed1b From bc310baf2ba381c648983c7f4748327f17324562 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Jan 2020 17:57:04 +0100 Subject: x86/boot/compressed: Relax sed symbol type regex for LLVM ld.lld The final build stage of the x86 kernel captures some symbol addresses from the decompressor binary and copies them into zoffset.h. It uses sed with a regular expression that matches the address, symbol type and symbol name, and mangles the captured addresses and the names of symbols of interest into #define directives that are added to zoffset.h The symbol type is indicated by a single letter, which we match strictly: only letters in the set 'ABCDGRSTVW' are matched, even though the actual symbol type is relevant and therefore ignored. Commit bc7c9d620 ("efi/libstub/x86: Force 'hidden' visibility for extern declarations") made a change to the way external symbol references are classified, resulting in 'startup_32' now being emitted as a hidden symbol. This prevents the use of GOT entries to refer to this symbol via its absolute address, which recent toolchains (including Clang based ones) already avoid by default, making this change a no-op in the majority of cases. However, as it turns out, the LLVM linker classifies such hidden symbols as symbols with static linkage in fully linked ELF binaries, causing tools such as NM to output a lowercase 't' rather than an upper case 'T' for the type of such symbols. Since our sed expression only matches upper case letters for the symbol type, the line describing startup_32 is disregarded, resulting in a build error like the following arch/x86/boot/header.S:568:18: error: symbol 'ZO_startup_32' can not be undefined in a subtraction expression init_size: .long (0x00000000008fd000 - ZO_startup_32 + (((0x0000000001f6361c + ((0x0000000001f6361c >> 8) + 65536) - 0x00000000008c32e5) + 4095) & ~4095)) # kernel initialization size Given that we are only interested in the value of the symbol, let's match any character in the set 'a-zA-Z' instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Tested-by: Nathan Chancellor --- arch/x86/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 95410d6ee2ff..748b6d28a91d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -88,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ -- cgit v1.2.3-59-g8ed1b From 0779221e7166c6865555bb6d29bf6af76fc316bd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Jan 2020 17:57:05 +0100 Subject: efi/x86: Disallow efi=old_map in mixed mode Before: 1f299fad1e31: ("efi/x86: Limit EFI old memory map to SGI UV machines") enabling the old EFI memory map on mixed mode systems disabled EFI runtime services altogether. Given that efi=old_map is a debug feature designed to work around firmware problems related to EFI runtime services, and disabling them can be achieved more straightforwardly using 'noefi' or 'efi=noruntime', it makes more sense to ignore efi=old_map on mixed mode systems. Currently, we do neither, and try to use the old memory map in combination with mixed mode routines, which results in crashes, so let's fix this by making efi=old_map functional on native systems only. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/bios_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 7c2b8c5d0b49..607f58147311 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -371,7 +371,7 @@ static int __init arch_parse_efi_cmdline(char *str) return -EINVAL; } - if (parse_option_str(str, "old_map")) + if (!efi_is_mixed() && parse_option_str(str, "old_map")) set_bit(EFI_UV1_MEMMAP, &efi.flags); return 0; -- cgit v1.2.3-59-g8ed1b From cada0b6dbb8c07ab353d209437243bec8cc7c64a Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 22 Jan 2020 14:14:30 -0500 Subject: efi/libstub/x86: Fix EFI server boot failure x86_64 EFI systems are unable to boot due to a typo in a recent commit: EFI config tables not found. -- System halted This was probably due to the absense of CONFIG_EFI_MIXED=y in testing. Fixes: 796eb8d26a57 ("efi/libstub/x86: Use const attribute for efi_is_64bit()") Signed-off-by: Qian Cai Acked-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: tglx@linutronix.de Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20200122191430.4888-1-cai@lca.pw --- arch/x86/boot/compressed/eboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 82e26d0ff075..287393d725f0 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -32,7 +32,7 @@ __attribute_const__ bool efi_is_64bit(void) { if (IS_ENABLED(CONFIG_EFI_MIXED)) return efi_is64; - return IS_ENABLED(CONFIG_X64_64); + return IS_ENABLED(CONFIG_X86_64); } static efi_status_t -- cgit v1.2.3-59-g8ed1b From ac6119e7f25b842fc061e8aec88c4f32d3bc28ef Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jan 2020 10:39:12 +0100 Subject: efi/x86: Disable instrumentation in the EFI runtime handling code We already disable KASAN instrumentation specifically for the EFI routines that are known to dereference memory addresses that KASAN does not know about, avoiding false positive KASAN splats. However, as it turns out, having GCOV or KASAN instrumentation enabled interferes with the compiler's ability to optimize away function calls that are guarded by IS_ENABLED() checks that should have resulted in those references to have been const-propagated out of existence. But with instrumenation enabled, we may get build errors like: ld: arch/x86/platform/efi/efi_64.o: in function `efi_thunk_set_virtual_address_map': ld: arch/x86/platform/efi/efi_64.o: in function `efi_set_virtual_address_map': in builds where CONFIG_EFI=y but CONFIG_EFI_MIXED or CONFIG_X86_UV are not defined, even though the invocations are conditional on IS_ENABLED() checks against the respective Kconfig symbols. So let's disable instrumentation entirely for this subdirectory, which isn't that useful here to begin with. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: linux-efi@vger.kernel.org --- arch/x86/platform/efi/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 7ec3a8b31f8b..84b09c230cbd 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y +KASAN_SANITIZE := n +GCOV_PROFILE := n obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o -- cgit v1.2.3-59-g8ed1b