From 43c75f933be26422f166d6d869a19997312f4732 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Mon, 30 Nov 2015 10:47:43 -0600 Subject: x86/mm: Streamline and restore probe_memory_block_size() The cumulative effect of the following two commits: bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems") 982792c782ef ("x86, mm: probe memory block size for generic x86 64bit") ... is some pretty convoluted code. The first commit also removed code for the UV case without stated reason, which might lead to unexpected change in behavior. This commit has no other (intended) functional change; just seeks to simplify and make the code more understandable, beyond restoring the UV behavior. The whole section with the "tail size" doesn't seem to be reachable, since both the >= 64GB and < 64GB case return, so it was removed. Signed-off-by: Seth Jennings Cc: Daniel J Blueman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1448902063-18885-1-git-send-email-sjennings@variantweb.net [ Rewrote the title and changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8829482d69ec..8f18fec74e67 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include "mm_internal.h" @@ -1193,26 +1194,13 @@ int kern_addr_valid(unsigned long addr) static unsigned long probe_memory_block_size(void) { - /* start from 2g */ - unsigned long bz = 1UL<<31; + unsigned long bz = MIN_MEMORY_BLOCK_SIZE; - if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { - pr_info("Using 2GB memory block size for large-memory system\n"); - return 2UL * 1024 * 1024 * 1024; - } - - /* less than 64g installed */ - if ((max_pfn << PAGE_SHIFT) < (16UL << 32)) - return MIN_MEMORY_BLOCK_SIZE; - - /* get the tail size */ - while (bz > MIN_MEMORY_BLOCK_SIZE) { - if (!((max_pfn << PAGE_SHIFT) & (bz - 1))) - break; - bz >>= 1; - } + /* if system is UV or has 64GB of RAM or more, use large blocks */ + if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) + bz = 2UL << 30; /* 2GB */ - printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); + pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); return bz; } -- cgit v1.2.3-59-g8ed1b From 320d25b6a05f8b73c23fc21025d2906ecdd2d4fc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 19 Jan 2016 13:38:58 -0800 Subject: x86/mm/32: Set NX in __supported_pte_mask before enabling paging There's a short window in which very early mappings can end up with NX clear because they are created before we've noticed that we have NX. It turns out that we detect NX very early, so there's no need to defer __supported_pte_mask setup. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Pavel Machek Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2b544627345f7110160545a3f47031eb45c3ad4f.1453239349.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 6 ++++++ arch/x86/mm/setup_nx.c | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae24b6d2..57fc3f8c85fd 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -389,6 +389,12 @@ default_entry: /* Make changes effective */ wrmsr + /* + * And make sure that all the mappings we set up have NX set from + * the beginning. + */ + orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) + enable_paging: /* diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 92e2eacb3321..78f5d5907f98 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -31,9 +31,8 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) - __supported_pte_mask |= _PAGE_NX; - else + /* If disable_nx is set, clear NX on all new mappings going forward. */ + if (disable_nx) __supported_pte_mask &= ~_PAGE_NX; } -- cgit v1.2.3-59-g8ed1b From 7c360572b430a0e9757bafc0c20f26c920f2a07f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 19 Jan 2016 13:38:59 -0800 Subject: x86/mm: Make kmap_prot into a #define The value (once we initialize it) is a foregone conclusion. Make it a #define to save a tiny amount of text and data size and to make it more comprehensible. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Pavel Machek Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/0850eb0213de9da88544ff7fae72dc6d06d2b441.1453239349.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fixmap.h | 2 +- arch/x86/mm/init_32.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6d7d0e52ed5a..8554f960e21b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; +#define kmap_prot PAGE_KERNEL extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cb4ef3de61f9..a4bb1c7ab65e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -388,7 +388,6 @@ repeat: } pte_t *kmap_pte; -pgprot_t kmap_prot; static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) { @@ -405,8 +404,6 @@ static void __init kmap_init(void) */ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3-59-g8ed1b From c1a0bf347c40dd4b0a5bb10fdf4de76a1fbbbe8c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Feb 2016 09:57:34 +0100 Subject: x86/mm/numa: Clean up numa_clear_kernel_node_hotplug() So we fixed an overflow bug in numa_clear_kernel_node_hotplug(): 2b54ab3c66d4 ("x86/mm/numa: Fix memory corruption on 32-bit NUMA kernels") ... and the bug was indirectly caused by poor coding style, such as using start/end local variables unnecessarily, which lost the physaddr_t type. So make the code more readable and try to fully comment all the thinking behind the logic. No change in functionality. Cc: Andrew Morton Cc: Brad Spengler Cc: Chen Tang Cc: "H. Peter Anvin" Cc: Lai Jiangshan Cc: Linus Torvalds Cc: PaX Team Cc: Taku Izumi Cc: Tang Chen Cc: Thomas Gleixner Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: y14sg1 Cc: Zhang Yanfei Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 65 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index d04f8094bc23..ede4506abb18 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -465,46 +465,65 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) return true; } +/* + * Mark all currently memblock-reserved physical memory (which covers the + * kernel's own memory ranges) as hot-unswappable. + */ static void __init numa_clear_kernel_node_hotplug(void) { - int i, nid; - nodemask_t numa_kernel_nodes = NODE_MASK_NONE; - phys_addr_t start, end; - struct memblock_region *r; + nodemask_t reserved_nodemask = NODE_MASK_NONE; + struct memblock_region *mb_region; + int i; /* + * We have to do some preprocessing of memblock regions, to + * make them suitable for reservation. + * * At this time, all memory regions reserved by memblock are - * used by the kernel. Set the nid in memblock.reserved will - * mark out all the nodes the kernel resides in. + * used by the kernel, but those regions are not split up + * along node boundaries yet, and don't necessarily have their + * node ID set yet either. + * + * So iterate over all memory known to the x86 architecture, + * and use those ranges to set the nid in memblock.reserved. + * This will split up the memblock regions along node + * boundaries and will set the node IDs as well. */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - struct numa_memblk *mb = &numa_meminfo.blk[i]; + struct numa_memblk *mb = numa_meminfo.blk + i; - memblock_set_node(mb->start, mb->end - mb->start, - &memblock.reserved, mb->nid); + memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); } /* - * Mark all kernel nodes. + * Now go over all reserved memblock regions, to construct a + * node mask of all kernel reserved memory areas. * - * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo - * may not include all the memblock.reserved memory ranges because - * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. + * [ Note, when booting with mem=nn[kMG] or in a kdump kernel, + * numa_meminfo might not include all memblock.reserved + * memory ranges, because quirks such as trim_snb_memory() + * reserve specific pages for Sandy Bridge graphics. ] */ - for_each_memblock(reserved, r) - if (r->nid != MAX_NUMNODES) - node_set(r->nid, numa_kernel_nodes); + for_each_memblock(reserved, mb_region) { + if (mb_region->nid != MAX_NUMNODES) + node_set(mb_region->nid, reserved_nodemask); + } - /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ + /* + * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory + * belonging to the reserved node mask. + * + * Note that this will include memory regions that reside + * on nodes that contain kernel memory - entire nodes + * become hot-unpluggable: + */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - nid = numa_meminfo.blk[i].nid; - if (!node_isset(nid, numa_kernel_nodes)) - continue; + struct numa_memblk *mb = numa_meminfo.blk + i; - start = numa_meminfo.blk[i].start; - end = numa_meminfo.blk[i].end; + if (!node_isset(mb->nid, reserved_nodemask)) + continue; - memblock_clear_hotplug(start, end - start); + memblock_clear_hotplug(mb->start, mb->end - mb->start); } } -- cgit v1.2.3-59-g8ed1b From 5f7ee246850ba18a6a7bcb3d5eddb6db68354688 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Feb 2016 12:14:55 +0100 Subject: x86/mm/numa: Check for failures in numa_clear_kernel_node_hotplug() numa_clear_kernel_node_hotplug() uses memblock_set_node() without checking for failures. memblock_set_node() is a complex function that might extend the memblock array - which extension might fail - so check for this possibility. It's not supposed to happen (because realistically if we have so little memory that this fails then we likely won't be able to boot anyway), but do the check nevertheless. Cc: Andrew Morton Cc: Brad Spengler Cc: Chen Tang Cc: "H. Peter Anvin" Cc: Lai Jiangshan Cc: Linus Torvalds Cc: PaX Team Cc: Taku Izumi Cc: Tang Chen Cc: Thomas Gleixner Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: y14sg1 Cc: Zhang Yanfei Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ede4506abb18..f70c1ff46125 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -491,8 +491,10 @@ static void __init numa_clear_kernel_node_hotplug(void) */ for (i = 0; i < numa_meminfo.nr_blks; i++) { struct numa_memblk *mb = numa_meminfo.blk + i; + int ret; - memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); + ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); + WARN_ON_ONCE(ret); } /* -- cgit v1.2.3-59-g8ed1b From 69e0210fd01ff157d332102219aaf5c26ca8069b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 11 Jan 2016 15:51:18 +0300 Subject: x86/kasan: Clear kasan_zero_page after TLB flush Currently we clear kasan_zero_page before __flush_tlb_all(). This works with current implementation of native_flush_tlb[_global]() because it doesn't cause do any writes to kasan shadow memory. But any subtle change made in native_flush_tlb*() could break this. Also current code seems doesn't work for paravirt guests (lguest). Only after the TLB flush we can be sure that kasan_zero_page is not used as early shadow anymore (instrumented code will not write to it). So it should cleared it only after the TLB flush. Signed-off-by: Andrey Ryabinin Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1452516679-32040-2-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf219a2d..303e47045864 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -120,11 +120,16 @@ void __init kasan_init(void) kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); - memset(kasan_zero_page, 0, PAGE_SIZE); - load_cr3(init_level4_pgt); __flush_tlb_all(); - init_task.kasan_depth = 0; + /* + * kasan_zero_page has been used as early shadow memory, thus it may + * contain some garbage. Now we can clear it, since after the TLB flush + * no one should write to it. + */ + memset(kasan_zero_page, 0, PAGE_SIZE); + + init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); } -- cgit v1.2.3-59-g8ed1b From 063fb3e56f6dd29b2633b678b837e1d904200e6f Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 11 Jan 2016 15:51:19 +0300 Subject: x86/kasan: Write protect kasan zero shadow After kasan_init() executed, no one is allowed to write to kasan_zero_page, so write protect it. Signed-off-by: Andrey Ryabinin Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1452516679-32040-3-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 303e47045864..1b1110fa0057 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -125,10 +125,16 @@ void __init kasan_init(void) /* * kasan_zero_page has been used as early shadow memory, thus it may - * contain some garbage. Now we can clear it, since after the TLB flush - * no one should write to it. + * contain some garbage. Now we can clear and write protect it, since + * after the TLB flush no one should write to it. */ memset(kasan_zero_page, 0, PAGE_SIZE); + for (i = 0; i < PTRS_PER_PTE; i++) { + pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); + set_pte(&kasan_zero_pte[i], pte); + } + /* Flush TLBs again to be sure that write protection applied. */ + __flush_tlb_all(); init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); -- cgit v1.2.3-59-g8ed1b From 060a402a1ddb551455ee410de2eadd3349f2801b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 29 Jan 2016 11:42:57 -0800 Subject: x86/mm: Add INVPCID helpers This adds helpers for each of the four currently-specified INVPCID modes. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/8a62b23ad686888cee01da134c91409e22064db9.1454096309.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6df2029405a3..8b576832777e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,54 @@ #include #include +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + u64 desc[2] = { pcid, addr }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + #ifdef CONFIG_PARAVIRT #include #else -- cgit v1.2.3-59-g8ed1b From d12a72b844a49d4162f24cefdab30bed3f86730e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 29 Jan 2016 11:42:58 -0800 Subject: x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID This adds a chicken bit to turn off INVPCID in case something goes wrong. It's an early_param() because we do TLB flushes before we parse __setup() parameters. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/f586317ed1bc2b87aee652267e515b90051af385.1454096309.git.luto@kernel.org Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/kernel/cpu/common.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 551ecf09c8dd..e4c4d2a5a28d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2566,6 +2566,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nointroute [IA-64] + noinvpcid [X86] Disable the INVPCID cpu feature. + nojitter [IA-64] Disables jitter checking for ITC timers. no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de8f60a..f4d0aa64d934 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) } __setup("nompx", x86_mpx_setup); +static int __init x86_noinvpcid_setup(char *s) +{ + /* noinvpcid doesn't accept parameters */ + if (s) + return -EINVAL; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_INVPCID)) + return 0; + + setup_clear_cpu_cap(X86_FEATURE_INVPCID); + pr_info("noinvpcid: INVPCID feature disabled\n"); + return 0; +} +early_param("noinvpcid", x86_noinvpcid_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; -- cgit v1.2.3-59-g8ed1b From d8bced79af1db6734f66b42064cc773cada2ce99 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 29 Jan 2016 11:42:59 -0800 Subject: x86/mm: If INVPCID is available, use it to flush global mappings On my Skylake laptop, INVPCID function 2 (flush absolutely everything) takes about 376ns, whereas saving flags, twiddling CR4.PGE to flush global mappings, and restoring flags takes about 539ns. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/ed0ef62581c0ea9c99b9bf6df726015e96d44743.1454096309.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8b576832777e..fc9a2fda1404 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,6 +152,15 @@ static inline void __native_flush_tlb_global(void) { unsigned long flags; + if (static_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. + */ + invpcid_flush_all(); + return; + } + /* * Read-modify-write to CR4 - protect it from preemption and * from interrupts. (Use the raw variant because this code can -- cgit v1.2.3-59-g8ed1b From ce1143aa60273220a9f89012f2aaaed04f97e9a2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 25 Jan 2016 23:06:49 -0800 Subject: x86/dmi: Switch dmi_remap() from ioremap() [uncached] to ioremap_cache() DMI cacheability is very confused on x86. dmi_early_remap() uses early_ioremap(), which uses FIXMAP_PAGE_IO, which is __PAGE_KERNEL_IO, which is __PAGE_KERNEL, which is cached. Don't ask me why this makes any sense. dmi_remap() uses ioremap(), which requests an uncached mapping. However, on non-EFI systems, the DMI data generally lives between 0xf0000 and 0x100000, which is in the legacy ISA range, which triggers a special case in the PAT code that overrides the cache mode requested by ioremap() and forces a WB mapping. On a UEFI boot, however, the DMI table can live at any physical address. On my laptop, it's around 0x77dd0000. That's nowhere near the legacy ISA range, so the ioremap() implicit uncached type is honored and we end up with a UC- mapping. UC- is a very, very slow way to read from main memory, so dmi_walk() is likely to take much longer than necessary. Given that, even on UEFI, we do early cached DMI reads, it seems safe to just ask for cached access. Switch to ioremap_cache(). I haven't tried to benchmark this, but I'd guess it saves several milliseconds of boot time. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jean Delvare Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/3147c38e51f439f3c8911db34c7d4ab22d854915.1453791969.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 535192f6bfad..3c69fed215c5 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) /* Use early IO mappings for DMI because it's initialized early */ #define dmi_early_remap early_ioremap #define dmi_early_unmap early_iounmap -#define dmi_remap ioremap +#define dmi_remap ioremap_cache #define dmi_unmap iounmap #endif /* _ASM_X86_DMI_H */ -- cgit v1.2.3-59-g8ed1b From e2c7698cd61f11d4077fdb28148b2d31b82ac848 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Feb 2016 15:51:16 +0100 Subject: x86/mm: Fix INVPCID asm constraint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we want to specify the dependency on both @pcid and @addr so that the compiler doesn't reorder accesses to them *before* the TLB flush. But for that to work, we need to express this properly in the inline asm and deref the whole desc array, not the pointer to it. See clwb() for an example. This fixes the build error on 32-bit: arch/x86/include/asm/tlbflush.h: In function ‘__invpcid’: arch/x86/include/asm/tlbflush.h:26:18: error: memory input 0 is not directly addressable which gcc4.7 caught but 5.x didn't. Which is strange. :-\ Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Michael Matz Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc9a2fda1404..d0cce90b0855 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,7 +10,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) { - u64 desc[2] = { pcid, addr }; + struct { u64 d[2]; } desc = { { pcid, addr } }; /* * The memory clobber is because the whole point is to invalidate @@ -22,7 +22,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, * invpcid (%rcx), %rax in long mode. */ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (desc) : "memory"); + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); } #define INVPCID_TYPE_INDIV_ADDR 0 -- cgit v1.2.3-59-g8ed1b From b176862fca8625b0a8bee207bca9b611413e5e24 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 18 Feb 2016 21:00:41 +0100 Subject: x86/mm/ptdump: Remove paravirt_enabled() is_hypervisor_range() can simply check if the PGD index is within ffff800000000000 - ffff87ffffffffff which is the range reserved for a hypervisor. That range is practically an ABI, see Documentation/x86/x86_64/mm.txt. Tested-by: Boris Ostrovsky # Under Xen, as PV guest Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1455825641-19585-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 4a6f1d9b5106..99bfb192803f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -#ifdef CONFIG_X86_64 static inline bool is_hypervisor_range(int idx) { +#ifdef CONFIG_X86_64 /* * ffff800000000000 - ffff87ffffffffff is reserved for * the hypervisor. */ - return paravirt_enabled() && - (idx >= pgd_index(__PAGE_OFFSET) - 16) && - (idx < pgd_index(__PAGE_OFFSET)); -} + return (idx >= pgd_index(__PAGE_OFFSET) - 16) && + (idx < pgd_index(__PAGE_OFFSET)); #else -static inline bool is_hypervisor_range(int idx) { return false; } + return false; #endif +} static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, bool checkwx) -- cgit v1.2.3-59-g8ed1b From 405e1133d00e0271cedef75c17ecb773ff3e2732 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 10 Feb 2016 02:03:00 -0700 Subject: x86/mm: Avoid premature success when changing page attributes set_memory_nx() (and set_memory_x()) currently differ in behavior from all other set_memory_*() functions when encountering a virtual address space hole within the kernel address range: They stop processing at the hole, but nevertheless report success (making the caller believe the operation was carried out on the entire range). While observed to be a problem - triggering the CONFIG_DEBUG_WX warning - only with out of tree code, I suspect (but didn't check) that on x86-64 the CONFIG_DEBUG_PAGEALLOC logic in free_init_pages() would, when called from free_initmem(), have the same effect on the set_memory_nx() called from mark_rodata_ro(). This unexpected behavior is a result of change_page_attr_set_clr() special casing changes to only the NX bit, in that it passes "false" as the "checkalias" argument to __change_page_attr_set_clr(). Since this flag becomes the "primary" argument of both __change_page_attr() and __cpa_process_fault(), the latter would so far return success without adjusting cpa->numpages. Success to the higher level callers, however, means that whatever cpa->numpages currently holds is the count of successfully processed pages. The cases when __change_page_attr() calls __cpa_process_fault(), otoh, don't generally mean the entire range got processed (as can be seen from one of the two success return paths in __cpa_process_fault() already adjusting ->numpages). Signed-off-by: Jan Beulich Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/56BB0AD402000078000D05BF@prv-mh.provo.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2440814b0069..3dd6afd2c0e5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1122,8 +1122,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, /* * Ignore all non primary paths. */ - if (!primary) + if (!primary) { + cpa->numpages = 1; return 0; + } /* * Ignore the NULL PTE for kernel identity mapping, as it is expected -- cgit v1.2.3-59-g8ed1b From cfa52c0cfa4d727aa3e457bf29aeff296c528a08 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 3 Mar 2016 02:03:11 +0100 Subject: x86/mm/kmmio: Fix mmiotrace for hugepages Because Linux might use bigger pages than the 4K pages to handle those mmio ioremaps, the kmmio code shouldn't rely on the pade id as it currently does. Using the memory address instead of the page id lets us look up how big the page is and what its base address is, so that we won't get a page fault within the same page twice anymore. Tested-by: Pierre Moreau Signed-off-by: Karol Herbst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Cc: linux-x86_64@vger.kernel.org Cc: nouveau@lists.freedesktop.org Cc: pq@iki.fi Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1456966991-6861-1-git-send-email-nouveau@karolherbst.de Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 88 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -33,7 +33,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ + unsigned long addr; /* the requested address */ pteval_t old_presence; /* page presence prior to arming */ bool armed; @@ -70,9 +70,16 @@ unsigned int kmmio_count; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct list_head *kmmio_page_list(unsigned long page) +static struct list_head *kmmio_page_list(unsigned long addr) { - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; } /* Accessed per-cpu */ @@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) } /* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) { struct list_head *head; struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); - page &= PAGE_MASK; - head = kmmio_page_list(page); + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); list_for_each_entry_rcu(f, head, list) { - if (f->page == page) + if (f->addr == addr) return f; } return NULL; @@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(f->page, &level); + pte_t *pte = lookup_address(f->addr, &level); if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); + pr_err("no pte for addr 0x%08lx\n", f->addr); return -1; } @@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->page); + __flush_tlb_one(f->addr); return 0; } @@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); f->armed = true; return ret; } @@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); f->armed = false; } @@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); /* * Preemption is now disabled to prevent process switch during @@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) preempt_disable(); rcu_read_lock(); - faultpage = get_kmmio_fault_page(addr); + faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { /* * Either this page fault is not caused by kmmio, or @@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - if (addr == ctx->addr) { + if (page_base == ctx->addr) { /* * A second fault on the same page means some other * condition needs handling by do_page_fault(), the @@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->active++; ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); + ctx->probe = get_kmmio_probe(page_base); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; + ctx->addr = page_base; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -354,12 +371,11 @@ out: } /* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) +static int add_kmmio_fault_page(unsigned long addr) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (f) { if (!f->count) arm_kmmio_fault_page(f); @@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) return -1; f->count = 1; - f->page = page; + f->addr = addr; if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + list_add_rcu(&f->list, kmmio_page_list(f->addr)); return 0; } /* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, +static void release_kmmio_fault_page(unsigned long addr, struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (!f) return; @@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) int ret = 0; unsigned long size = 0; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + + pte = lookup_address(p->addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; + size += page_level_size(l); } out: spin_unlock_irqrestore(&kmmio_lock, flags); @@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(p->addr, &l); + if (!pte) + return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; + size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; -- cgit v1.2.3-59-g8ed1b From 8b8addf891de8a00e4d39fc32f93f7c5eb8feceb Mon Sep 17 00:00:00 2001 From: Hector Marco-Gisbert Date: Thu, 10 Mar 2016 20:51:00 +0100 Subject: x86/mm/32: Enable full randomization on i386 and X86_32 Currently on i386 and on X86_64 when emulating X86_32 in legacy mode, only the stack and the executable are randomized but not other mmapped files (libraries, vDSO, etc.). This patch enables randomization for the libraries, vDSO and mmap requests on i386 and in X86_32 in legacy mode. By default on i386 there are 8 bits for the randomization of the libraries, vDSO and mmaps which only uses 1MB of VA. This patch preserves the original randomness, using 1MB of VA out of 3GB or 4GB. We think that 1MB out of 3GB is not a big cost for having the ASLR. The first obvious security benefit is that all objects are randomized (not only the stack and the executable) in legacy mode which highly increases the ASLR effectiveness, otherwise the attackers may use these non-randomized areas. But also sensitive setuid/setgid applications are more secure because currently, attackers can disable the randomization of these applications by setting the ulimit stack to "unlimited". This is a very old and widely known trick to disable the ASLR in i386 which has been allowed for too long. Another trick used to disable the ASLR was to set the ADDR_NO_RANDOMIZE personality flag, but fortunately this doesn't work on setuid/setgid applications because there is security checks which clear Security-relevant flags. This patch always randomizes the mmap_legacy_base address, removing the possibility to disable the ASLR by setting the stack to "unlimited". Signed-off-by: Hector Marco-Gisbert Acked-by: Ismael Ripoll Ripoll Acked-by: Kees Cook Acked-by: Arjan van de Ven Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: kees Cook Link: http://lkml.kernel.org/r/1457639460-5242-1-git-send-email-hecmargi@upv.es Signed-off-by: Ingo Molnar --- arch/x86/mm/mmap.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 96bd1e2bffaf..389939f74dd5 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -93,18 +93,6 @@ static unsigned long mmap_base(unsigned long rnd) return PAGE_ALIGN(TASK_SIZE - gap - rnd); } -/* - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 - * does, but not when emulating X86_32 - */ -static unsigned long mmap_legacy_base(unsigned long rnd) -{ - if (mmap_is_ia32()) - return TASK_UNMAPPED_BASE; - else - return TASK_UNMAPPED_BASE + rnd; -} - /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - mm->mmap_legacy_base = mmap_legacy_base(random_factor); + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; -- cgit v1.2.3-59-g8ed1b