aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/dump_pagetables.c11
-rw-r--r--arch/x86/mm/extable.c100
-rw-r--r--arch/x86/mm/fault.c17
-rw-r--r--arch/x86/mm/gup.c4
-rw-r--r--arch/x86/mm/init.c36
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c27
-rw-r--r--arch/x86/mm/kasan_init_64.c17
-rw-r--r--arch/x86/mm/kmmio.c88
-rw-r--r--arch/x86/mm/mmap.c20
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/pageattr.c34
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/setup_nx.c6
15 files changed, 261 insertions, 178 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 4a6f1d9b5106..99bfb192803f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-#ifdef CONFIG_X86_64
static inline bool is_hypervisor_range(int idx)
{
+#ifdef CONFIG_X86_64
/*
* ffff800000000000 - ffff87ffffffffff is reserved for
* the hypervisor.
*/
- return paravirt_enabled() &&
- (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
- (idx < pgd_index(__PAGE_OFFSET));
-}
+ return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+ (idx < pgd_index(__PAGE_OFFSET));
#else
-static inline bool is_hypervisor_range(int idx) { return false; }
+ return false;
#endif
+}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e9c326..9dd7e4b7fcde 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -3,6 +3,9 @@
#include <linux/sort.h>
#include <asm/uaccess.h>
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+ struct pt_regs *, int);
+
static inline unsigned long
ex_insn_addr(const struct exception_table_entry *x)
{
@@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+ return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
-int fixup_exception(struct pt_regs *regs)
+bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
- const struct exception_table_entry *fixup;
- unsigned long new_ip;
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_default);
+
+bool ex_handler_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fault);
+
+bool ex_handler_ext(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ /* Special hack for uaccess_err */
+ current_thread_info()->uaccess_err = 1;
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_ext);
+
+bool ex_has_fault_handler(unsigned long ip)
+{
+ const struct exception_table_entry *e;
+ ex_handler_t handler;
+
+ e = search_exception_tables(ip);
+ if (!e)
+ return false;
+ handler = ex_fixup_handler(e);
+
+ return handler == ex_handler_fault;
+}
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
+{
+ const struct exception_table_entry *e;
+ ex_handler_t handler;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs)
}
#endif
- fixup = search_exception_tables(regs->ip);
- if (fixup) {
- new_ip = ex_fixup_addr(fixup);
-
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* Special hack for uaccess_err */
- current_thread_info()->uaccess_err = 1;
- new_ip -= 0x7ffffff0;
- }
- regs->ip = new_ip;
- return 1;
- }
+ e = search_exception_tables(regs->ip);
+ if (!e)
+ return 0;
- return 0;
+ handler = ex_fixup_handler(e);
+ return handler(e, regs, trapnr);
}
/* Restricted version used during very early boot */
int __init early_fixup_exception(unsigned long *ip)
{
- const struct exception_table_entry *fixup;
+ const struct exception_table_entry *e;
unsigned long new_ip;
+ ex_handler_t handler;
- fixup = search_exception_tables(*ip);
- if (fixup) {
- new_ip = ex_fixup_addr(fixup);
+ e = search_exception_tables(*ip);
+ if (!e)
+ return 0;
- if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
- /* uaccess handling not supported during early boot */
- return 0;
- }
+ new_ip = ex_fixup_addr(e);
+ handler = ex_fixup_handler(e);
- *ip = new_ip;
- return 1;
- }
+ /* special handling not supported during early boot */
+ if (handler != ex_handler_default)
+ return 0;
- return 0;
+ *ip = new_ip;
+ return 1;
}
/*
@@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4;
p->fixup += i;
i += 4;
+ p->handler += i;
+ i += 4;
}
sort(start, finish - start, sizeof(struct exception_table_entry),
@@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4;
p->fixup -= i;
i += 4;
+ p->handler -= i;
+ i += 4;
}
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index eef44d9a3f77..03898aea6e0f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -287,6 +287,9 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
+ if (pmd_huge(*pmd_k))
+ return 0;
+
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
return -1;
@@ -360,8 +363,6 @@ void vmalloc_sync_all(void)
* 64-bit:
*
* Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
*/
static noinline int vmalloc_fault(unsigned long address)
{
@@ -403,17 +404,23 @@ static noinline int vmalloc_fault(unsigned long address)
if (pud_none(*pud_ref))
return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
+ if (pud_huge(*pud))
+ return 0;
+
pmd = pmd_offset(pud, address);
pmd_ref = pmd_offset(pud_ref, address);
if (pmd_none(*pmd_ref))
return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
+ if (pmd_huge(*pmd))
+ return 0;
+
pte_ref = pte_offset_kernel(pmd_ref, address);
if (!pte_present(*pte_ref))
return -1;
@@ -656,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs)) {
+ if (fixup_exception(regs, X86_TRAP_PF)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6d5eb5900372..f8d0b5e8bdfd 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -102,7 +102,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
return 0;
}
- page = pte_page(pte);
if (pte_devmap(pte)) {
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
@@ -115,6 +114,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
get_page(page);
put_dev_pagemap(pgmap);
SetPageReferenced(page);
@@ -131,7 +131,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
{
VM_BUG_ON_PAGE(page != compound_head(page), page);
VM_BUG_ON_PAGE(page_count(page) == 0, page);
- atomic_add(nr, &page->_count);
+ page_ref_add(page, nr);
SetPageReferenced(page);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 493f54172b4a..9d56f271d519 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -150,13 +150,14 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_KMEMCHECK)
/*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
+ * use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (cpu_has_pse)
+ if (cpu_has_pse && !debug_pagealloc_enabled())
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
@@ -666,21 +667,22 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
* mark them not present - any buggy init-section access will
* create a kernel page fault:
*/
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
- begin, end - 1);
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
-#else
- /*
- * We just marked the kernel text read only above, now that
- * we are going to free part of that, we need to make that
- * writeable and non-executable first.
- */
- set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
- set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+ if (debug_pagealloc_enabled()) {
+ pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
+ begin, end - 1);
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+ } else {
+ /*
+ * We just marked the kernel text read only above, now that
+ * we are going to free part of that, we need to make that
+ * writeable and non-executable first.
+ */
+ set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
+ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
- free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);
-#endif
+ free_reserved_area((void *)begin, (void *)end,
+ POISON_FREE_INITMEM, what);
+ }
}
void free_initmem(void)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cb4ef3de61f9..bd7a9b9e2e14 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -388,7 +388,6 @@ repeat:
}
pte_t *kmap_pte;
-pgprot_t kmap_prot;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
@@ -405,8 +404,6 @@ static void __init kmap_init(void)
*/
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
}
#ifdef CONFIG_HIGHMEM
@@ -871,7 +868,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -960,5 +956,3 @@ void mark_rodata_ro(void)
if (__supported_pte_mask & _PAGE_NX)
debug_checkwx();
}
-#endif
-
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5488d21123bd..214afda97911 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
#include <asm/init.h>
+#include <asm/uv/uv.h>
#include <asm/setup.h>
#include "mm_internal.h"
@@ -1074,7 +1075,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -1166,8 +1166,6 @@ void mark_rodata_ro(void)
debug_checkwx();
}
-#endif
-
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1206,26 +1204,13 @@ int kern_addr_valid(unsigned long addr)
static unsigned long probe_memory_block_size(void)
{
- /* start from 2g */
- unsigned long bz = 1UL<<31;
-
- if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) {
- pr_info("Using 2GB memory block size for large-memory system\n");
- return 2UL * 1024 * 1024 * 1024;
- }
-
- /* less than 64g installed */
- if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
- return MIN_MEMORY_BLOCK_SIZE;
+ unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
- /* get the tail size */
- while (bz > MIN_MEMORY_BLOCK_SIZE) {
- if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
- break;
- bz >>= 1;
- }
+ /* if system is UV or has 64GB of RAM or more, use large blocks */
+ if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+ bz = 2UL << 30; /* 2GB */
- printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+ pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz;
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d470cf219a2d..1b1110fa0057 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -120,11 +120,22 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
- memset(kasan_zero_page, 0, PAGE_SIZE);
-
load_cr3(init_level4_pgt);
__flush_tlb_all();
- init_task.kasan_depth = 0;
+ /*
+ * kasan_zero_page has been used as early shadow memory, thus it may
+ * contain some garbage. Now we can clear and write protect it, since
+ * after the TLB flush no one should write to it.
+ */
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+ set_pte(&kasan_zero_pte[i], pte);
+ }
+ /* Flush TLBs again to be sure that write protection applied. */
+ __flush_tlb_all();
+
+ init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34ed632..ddb2244b06a1 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
struct kmmio_fault_page {
struct list_head list;
struct kmmio_fault_page *release_next;
- unsigned long page; /* location of the fault page */
+ unsigned long addr; /* the requested address */
pteval_t old_presence; /* page presence prior to arming */
bool armed;
@@ -70,9 +70,16 @@ unsigned int kmmio_count;
static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
static LIST_HEAD(kmmio_probes);
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
{
- return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+
+ return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
}
/* Accessed per-cpu */
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
}
/* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
{
struct list_head *head;
struct kmmio_fault_page *f;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
- page &= PAGE_MASK;
- head = kmmio_page_list(page);
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+ head = kmmio_page_list(addr);
list_for_each_entry_rcu(f, head, list) {
- if (f->page == page)
+ if (f->addr == addr)
return f;
}
return NULL;
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
{
unsigned int level;
- pte_t *pte = lookup_address(f->page, &level);
+ pte_t *pte = lookup_address(f->addr, &level);
if (!pte) {
- pr_err("no pte for page 0x%08lx\n", f->page);
+ pr_err("no pte for addr 0x%08lx\n", f->addr);
return -1;
}
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
return -1;
}
- __flush_tlb_one(f->page);
+ __flush_tlb_one(f->addr);
return 0;
}
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
- f->page, f->count, !!f->old_presence);
+ pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
- WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
- f->page);
+ WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+ f->addr);
f->armed = true;
return ret;
}
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{
int ret = clear_page_presence(f, false);
WARN_ONCE(ret < 0,
- KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+ KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
f->armed = false;
}
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
struct kmmio_context *ctx;
struct kmmio_fault_page *faultpage;
int ret = 0; /* default to fault not handled */
+ unsigned long page_base = addr;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+ if (!pte)
+ return -EINVAL;
+ page_base &= page_level_mask(l);
/*
* Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
preempt_disable();
rcu_read_lock();
- faultpage = get_kmmio_fault_page(addr);
+ faultpage = get_kmmio_fault_page(page_base);
if (!faultpage) {
/*
* Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
- if (addr == ctx->addr) {
+ if (page_base == ctx->addr) {
/*
* A second fault on the same page means some other
* condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx->active++;
ctx->fpage = faultpage;
- ctx->probe = get_kmmio_probe(addr);
+ ctx->probe = get_kmmio_probe(page_base);
ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- ctx->addr = addr;
+ ctx->addr = page_base;
if (ctx->probe && ctx->probe->pre_handler)
ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@ out:
}
/* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (f) {
if (!f->count)
arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
return -1;
f->count = 1;
- f->page = page;
+ f->addr = addr;
if (arm_kmmio_fault_page(f)) {
kfree(f);
return -1;
}
- list_add_rcu(&f->list, kmmio_page_list(f->page));
+ list_add_rcu(&f->list, kmmio_page_list(f->addr));
return 0;
}
/* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
struct kmmio_fault_page **release_list)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (!f)
return;
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
int ret = 0;
unsigned long size = 0;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ unsigned int l;
+ pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
if (get_kmmio_probe(p->addr)) {
ret = -EEXIST;
goto out;
}
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte) {
+ ret = -EINVAL;
+ goto out;
+ }
+
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
pr_err("Unable to set page fault.\n");
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
out:
spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
+ unsigned int l;
+ pte_t *pte;
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte)
+ return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
release_kmmio_fault_page(p->addr + size, &release_list);
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
list_del_rcu(&p->list);
kmmio_count--;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 96bd1e2bffaf..d2dc0438d654 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)
if (mmap_is_ia32())
#ifdef CONFIG_COMPAT
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
#else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
#endif
else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
}
/*
- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
- * does, but not when emulating X86_32
- */
-static unsigned long mmap_legacy_base(unsigned long rnd)
-{
- if (mmap_is_ia32())
- return TASK_UNMAPPED_BASE;
- else
- return TASK_UNMAPPED_BASE + rnd;
-}
-
-/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
- mm->mmap_legacy_base = mmap_legacy_base(random_factor);
+ mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b2fd67da1701..ef05755a1900 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -123,7 +123,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
break;
}
- if (regno > nr_registers) {
+ if (regno >= nr_registers) {
WARN_ONCE(1, "decoded an instruction with an invalid register");
return -EINVAL;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index d04f8094bc23..f70c1ff46125 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
return true;
}
+/*
+ * Mark all currently memblock-reserved physical memory (which covers the
+ * kernel's own memory ranges) as hot-unswappable.
+ */
static void __init numa_clear_kernel_node_hotplug(void)
{
- int i, nid;
- nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
- phys_addr_t start, end;
- struct memblock_region *r;
+ nodemask_t reserved_nodemask = NODE_MASK_NONE;
+ struct memblock_region *mb_region;
+ int i;
/*
+ * We have to do some preprocessing of memblock regions, to
+ * make them suitable for reservation.
+ *
* At this time, all memory regions reserved by memblock are
- * used by the kernel. Set the nid in memblock.reserved will
- * mark out all the nodes the kernel resides in.
+ * used by the kernel, but those regions are not split up
+ * along node boundaries yet, and don't necessarily have their
+ * node ID set yet either.
+ *
+ * So iterate over all memory known to the x86 architecture,
+ * and use those ranges to set the nid in memblock.reserved.
+ * This will split up the memblock regions along node
+ * boundaries and will set the node IDs as well.
*/
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = &numa_meminfo.blk[i];
+ struct numa_memblk *mb = numa_meminfo.blk + i;
+ int ret;
- memblock_set_node(mb->start, mb->end - mb->start,
- &memblock.reserved, mb->nid);
+ ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
+ WARN_ON_ONCE(ret);
}
/*
- * Mark all kernel nodes.
+ * Now go over all reserved memblock regions, to construct a
+ * node mask of all kernel reserved memory areas.
*
- * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
- * may not include all the memblock.reserved memory ranges because
- * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+ * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+ * numa_meminfo might not include all memblock.reserved
+ * memory ranges, because quirks such as trim_snb_memory()
+ * reserve specific pages for Sandy Bridge graphics. ]
*/
- for_each_memblock(reserved, r)
- if (r->nid != MAX_NUMNODES)
- node_set(r->nid, numa_kernel_nodes);
+ for_each_memblock(reserved, mb_region) {
+ if (mb_region->nid != MAX_NUMNODES)
+ node_set(mb_region->nid, reserved_nodemask);
+ }
- /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+ /*
+ * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
+ * belonging to the reserved node mask.
+ *
+ * Note that this will include memory regions that reside
+ * on nodes that contain kernel memory - entire nodes
+ * become hot-unpluggable:
+ */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- nid = numa_meminfo.blk[i].nid;
- if (!node_isset(nid, numa_kernel_nodes))
- continue;
+ struct numa_memblk *mb = numa_meminfo.blk + i;
- start = numa_meminfo.blk[i].start;
- end = numa_meminfo.blk[i].end;
+ if (!node_isset(mb->nid, reserved_nodemask))
+ continue;
- memblock_clear_hotplug(start, end - start);
+ memblock_clear_hotplug(mb->start, mb->end - mb->start);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2440814b0069..4d0b26253042 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,12 +106,6 @@ static inline unsigned long highmap_end_pfn(void)
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-# define debug_pagealloc 1
-#else
-# define debug_pagealloc 0
-#endif
-
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -283,7 +277,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
* kernel text mappings for the large page aligned text, rodata sections
@@ -419,24 +413,30 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
- unsigned long phys_addr, offset;
+ phys_addr_t phys_addr;
+ unsigned long offset;
enum pg_level level;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
+ /*
+ * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+ * before being left-shifted PAGE_SHIFT bits -- this trick is to
+ * make 32-PAE kernel work correctly.
+ */
switch (level) {
case PG_LEVEL_1G:
- phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PUD_PAGE_MASK;
break;
case PG_LEVEL_2M:
- phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PMD_PAGE_MASK;
break;
default:
- phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
offset = virt_addr & ~PAGE_MASK;
}
@@ -708,10 +708,10 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
{
struct page *base;
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
if (!base)
return -ENOMEM;
@@ -1122,8 +1122,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
/*
* Ignore all non primary paths.
*/
- if (!primary)
+ if (!primary) {
+ cpa->numpages = 1;
return 0;
+ }
/*
* Ignore the NULL PTE for kernel identity mapping, as it is expected
@@ -1331,10 +1333,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
cpa->numpages = 1;
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
ret = __change_page_attr(cpa, checkalias);
- if (!debug_pagealloc)
+ if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
if (ret)
return ret;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f4ae536b0914..04e2e7144bee 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
@@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
/* Set prot based on lookup */
pcm = lookup_memtype(pfn_t_to_phys(pfn));
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eacb3321..8bea84724a7d 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
+#include <asm/cpufeature.h>
static int disable_nx;
@@ -31,9 +32,8 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
- __supported_pte_mask |= _PAGE_NX;
- else
+ /* If disable_nx is set, clear NX on all new mappings going forward. */
+ if (disable_nx)
__supported_pte_mask &= ~_PAGE_NX;
}