aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-25 16:40:10 +0100
committerIngo Molnar <mingo@elte.hu>2009-02-25 16:40:10 +0100
commit2e31add2a7e2a15d07f592c21ba35870fa9a1d1f (patch)
tree814e2c70fb5528f108114c0da6e5a1e219e6a0e1 /arch/x86/mm
parentx86 PAT: ioremap_wc should take resource_size_t parameter (diff)
parentgpu/drm, x86, PAT: PAT support for io_mapping_* (diff)
downloadlinux-dev-2e31add2a7e2a15d07f592c21ba35870fa9a1d1f.tar.xz
linux-dev-2e31add2a7e2a15d07f592c21ba35870fa9a1d1f.zip
Merge branch 'x86/urgent' into x86/pat
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init_32.c48
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c66
-rw-r--r--arch/x86/mm/ioremap.c44
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c79
-rw-r--r--arch/x86/mm/pat.c104
8 files changed, 242 insertions, 111 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..c76ef1d701c9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
si_code = SEGV_MAPERR;
- if (notify_page_fault(regs))
- return;
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (spurious_fault(address, error_code))
return;
+ /* kprobes don't want to hook the spurious faults. */
+ if (notify_page_fault(regs))
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
@@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto bad_area_nosemaphore;
}
+ /* kprobes don't want to hook the spurious faults. */
+ if (notify_page_fault(regs))
+ return;
/*
* It's safe to allow irq's after cr2 has been saved and the
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 88f1b10de3be..2cef05074413 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -138,6 +138,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
return pte_offset_kernel(pmd, 0);
}
+static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
+ unsigned long vaddr, pte_t *lastpte)
+{
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Something (early fixmap) may already have put a pte
+ * page here, which causes the page table allocation
+ * to become nonlinear. Attempt to fix it, and if it
+ * is still nonlinear then we have to bug.
+ */
+ int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+ int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+
+ if (pmd_idx_kmap_begin != pmd_idx_kmap_end
+ && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
+ && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
+ && ((__pa(pte) >> PAGE_SHIFT) < table_start
+ || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
+ pte_t *newpte;
+ int i;
+
+ BUG_ON(after_init_bootmem);
+ newpte = alloc_low_page();
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(newpte + i, pte[i]);
+
+ paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
+ set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
+ BUG_ON(newpte != pte_offset_kernel(pmd, 0));
+ __flush_tlb_all();
+
+ paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
+ pte = newpte;
+ }
+ BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
+ && vaddr > fix_to_virt(FIX_KMAP_END)
+ && lastpte && lastpte + PTRS_PER_PTE != pte);
+#endif
+ return pte;
+}
+
/*
* This function initializes a certain range of kernel virtual memory
* with new bootmem page tables, everywhere page tables are missing in
@@ -154,6 +195,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
unsigned long vaddr;
pgd_t *pgd;
pmd_t *pmd;
+ pte_t *pte = NULL;
vaddr = start;
pgd_idx = pgd_index(vaddr);
@@ -165,7 +207,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
pmd = pmd + pmd_index(vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
pmd++, pmd_idx++) {
- one_page_table_init(pmd);
+ pte = page_table_kmap_check(one_page_table_init(pmd),
+ pmd, vaddr, pte);
vaddr += PMD_SIZE;
}
@@ -508,7 +551,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
* Fixed mappings, only the page table structure has to be
* created - mappings will be set by set_fixmap():
*/
- early_ioremap_clear();
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
page_table_range_init(vaddr, end, pgd_base);
@@ -801,7 +843,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse)
tables += PAGE_ALIGN(ptes * sizeof(pte_t));
/* for fixmap */
- tables += PAGE_SIZE * 2;
+ tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
/*
* RED-PEN putting page tables only on node 0 could
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 23f68e77ad1f..e6d36b490250 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -596,7 +596,7 @@ static void __init init_gbpages(void)
direct_gbpages = 0;
}
-static unsigned long __init kernel_physical_mapping_init(unsigned long start,
+static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask)
{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index d0151d8ce452..94596f794b1b 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -17,8 +17,65 @@
*/
#include <asm/iomap.h>
+#include <asm/pat.h>
#include <linux/module.h>
+#ifdef CONFIG_X86_PAE
+static int
+is_io_mapping_possible(resource_size_t base, unsigned long size)
+{
+ return 1;
+}
+#else
+static int
+is_io_mapping_possible(resource_size_t base, unsigned long size)
+{
+ /* There is no way to map greater than 1 << 32 address without PAE */
+ if (base + size > 0x100000000ULL)
+ return 0;
+
+ return 1;
+}
+#endif
+
+int
+reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot)
+{
+ unsigned long ret_flag;
+
+ if (!is_io_mapping_possible(base, size))
+ goto out_err;
+
+ if (!pat_enabled) {
+ *prot = pgprot_noncached(PAGE_KERNEL);
+ return 0;
+ }
+
+ if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag))
+ goto out_err;
+
+ if (ret_flag == _PAGE_CACHE_WB)
+ goto out_free;
+
+ if (kernel_map_sync_memtype(base, size, ret_flag))
+ goto out_free;
+
+ *prot = __pgprot(__PAGE_KERNEL | ret_flag);
+ return 0;
+
+out_free:
+ free_memtype(base, base + size);
+out_err:
+ return -EINVAL;
+}
+
+void
+free_io_memtype(u64 base, unsigned long size)
+{
+ if (pat_enabled)
+ free_memtype(base, base + size);
+}
+
/* Map 'pfn' using fixed map 'type' and protections 'prot'
*/
void *
@@ -29,6 +86,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
pagefault_disable();
+ /*
+ * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
+ * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
+ * MTRR is UC or WC. UC_MINUS gets the real intention, of the
+ * user, which is "WC if the MTRR is WC, UC if you can't do that."
+ */
+ if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
+ prot = PAGE_KERNEL_UC_MINUS;
+
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2ddb1e79a195..433f7bd4648a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
return 0;
}
-int pagerange_is_ram(unsigned long start, unsigned long end)
-{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
-
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- if (page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
- }
-
- return ram_page;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -557,34 +538,9 @@ void __init early_ioremap_init(void)
}
}
-void __init early_ioremap_clear(void)
-{
- pmd_t *pmd;
-
- if (early_ioremap_debug)
- printk(KERN_INFO "early_ioremap_clear()\n");
-
- pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
- pmd_clear(pmd);
- paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
- __flush_tlb_all();
-}
-
void __init early_ioremap_reset(void)
{
- enum fixed_addresses idx;
- unsigned long addr, phys;
- pte_t *pte;
-
after_paging_init = 1;
- for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
- addr = fix_to_virt(idx);
- pte = early_ioremap_pte(addr);
- if (pte_present(*pte)) {
- phys = pte_val(*pte) & PAGE_MASK;
- set_fixmap(idx, phys);
- }
- }
}
static void __init __early_set_fixmap(enum fixed_addresses idx,
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..f3516da035d1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
return shift;
}
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
{
return phys_to_nid(pfn << PAGE_SHIFT);
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f26..7be47d1a97e4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -508,18 +508,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
#endif
/*
- * Install the new, split up pagetable. Important details here:
+ * Install the new, split up pagetable.
*
- * On Intel the NX bit of all levels must be cleared to make a
- * page executable. See section 4.13.2 of Intel 64 and IA-32
- * Architectures Software Developer's Manual).
- *
- * Mark the entry present. The current mapping might be
- * set to not present, which we preserved above.
+ * We use the standard kernel pagetable protections for the new
+ * pagetable protections, the actual ptes set above control the
+ * primary protection behavior:
*/
- ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
- pgprot_val(ref_prot) |= _PAGE_PRESENT;
- __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+ __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
base = NULL;
out_unlock:
@@ -534,6 +529,36 @@ out_unlock:
return 0;
}
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
+ int primary)
+{
+ /*
+ * Ignore all non primary paths.
+ */
+ if (!primary)
+ return 0;
+
+ /*
+ * Ignore the NULL PTE for kernel identity mapping, as it is expected
+ * to have holes.
+ * Also set numpages to '1' indicating that we processed cpa req for
+ * one virtual address page and its pfn. TBD: numpages can be set based
+ * on the initial value and the level returned by lookup_address().
+ */
+ if (within(vaddr, PAGE_OFFSET,
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+ cpa->numpages = 1;
+ cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
+ return 0;
+ } else {
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
+ *cpa->vaddr);
+
+ return -EFAULT;
+ }
+}
+
static int __change_page_attr(struct cpa_data *cpa, int primary)
{
unsigned long address;
@@ -545,21 +570,14 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
- return 0;
+ return __cpa_process_fault(cpa, address, primary);
old_pte = *kpte;
- if (!pte_val(old_pte)) {
- if (!primary)
- return 0;
- WARN(1, KERN_WARNING "CPA: called for zero pte. "
- "vaddr = %lx cpa->vaddr = %lx\n", address,
- *cpa->vaddr);
- return -EINVAL;
- }
+ if (!pte_val(old_pte))
+ return __cpa_process_fault(cpa, address, primary);
if (level == PG_LEVEL_4K) {
pte_t new_pte;
@@ -657,12 +675,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
vaddr = *cpa->vaddr;
if (!(within(vaddr, PAGE_OFFSET,
- PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
-#ifdef CONFIG_X86_64
- || within(vaddr, PAGE_OFFSET + (1UL<<32),
- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
-#endif
- )) {
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
@@ -793,6 +806,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
vm_unmap_aliases();
+ /*
+ * If we're called with lazy mmu updates enabled, the
+ * in-memory pte state may be stale. Flush pending updates to
+ * bring them up to date.
+ */
+ arch_flush_lazy_mmu_mode();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
@@ -835,6 +855,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
} else
cpa_flush_all(cache);
+ /*
+ * If we've been called with lazy mmu updates enabled, then
+ * make sure that everything gets flushed out before we
+ * return.
+ */
+ arch_flush_lazy_mmu_mode();
+
out:
return ret;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 8b08fb955274..399383c6f614 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
static struct memtype *cached_entry;
static u64 cached_start;
+static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
+{
+ int ram_page = 0, not_rampage = 0;
+ unsigned long page_nr;
+
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+ ++page_nr) {
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
+ page_is_ram(page_nr))
+ ram_page = 1;
+ else
+ not_rampage = 1;
+
+ if (ram_page == not_rampage)
+ return -1;
+ }
+
+ return ram_page;
+}
+
/*
* For RAM pages, mark the pages as non WB memory type using
* PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -333,9 +360,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
req_type & _PAGE_CACHE_MASK);
}
- is_range_ram = pagerange_is_ram(start, end);
+ if (new_type)
+ *new_type = actual_type;
+
+ is_range_ram = pat_pagerange_is_ram(start, end);
if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type, new_type);
+ return reserve_ram_pages_type(start, end, req_type,
+ new_type);
else if (is_range_ram < 0)
return -EINVAL;
@@ -347,9 +378,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
new->end = end;
new->type = actual_type;
- if (new_type)
- *new_type = actual_type;
-
spin_lock(&memtype_lock);
if (cached_entry && start >= cached_start)
@@ -437,7 +465,7 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
- is_range_ram = pagerange_is_ram(start, end);
+ is_range_ram = pat_pagerange_is_ram(start, end);
if (is_range_ram == 1)
return free_ram_pages_type(start, end);
else if (is_range_ram < 0)
@@ -597,6 +625,33 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
}
/*
+ * Change the memory type for the physial address range in kernel identity
+ * mapping space if that range is a part of identity map.
+ */
+int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
+{
+ unsigned long id_sz;
+
+ if (!pat_enabled || base >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < base + size) ?
+ __pa(high_memory) - base :
+ size;
+
+ if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
+ printk(KERN_INFO
+ "%s:%d ioremap_change_attr failed %s "
+ "for %Lx-%Lx\n",
+ current->comm, current->pid,
+ cattr_name(flags),
+ base, (unsigned long long)(base + size));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
* Internal interface to reserve a range of physical memory with prot.
* Reserved non RAM regions only and after successful reserve_memtype,
* this func also keeps identity mapping (if any) in sync with this new prot.
@@ -605,21 +660,17 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
int strict_prot)
{
int is_ram = 0;
- int id_sz, ret;
+ int ret;
unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
- if (is_ram != 0) {
- /*
- * For mapping RAM pages, drivers need to call
- * set_memory_[uc|wc|wb] directly, for reserve and free, before
- * setting up the PTE.
- */
- WARN_ON_ONCE(1);
- return 0;
- }
+ /*
+ * reserve_pfn_range() doesn't support RAM pages.
+ */
+ if (is_ram != 0)
+ return -EINVAL;
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret)
@@ -646,23 +697,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
flags);
}
- /* Need to keep identity mapping in sync */
- if (paddr >= __pa(high_memory))
- return 0;
-
- id_sz = (__pa(high_memory) < paddr + size) ?
- __pa(high_memory) - paddr :
- size;
-
- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+ if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR
- "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
- "for %Lx-%Lx\n",
- current->comm, current->pid,
- cattr_name(flags),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size));
return -EINVAL;
}
return 0;
@@ -676,7 +712,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
{
int is_ram;
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
free_memtype(paddr, paddr + size);
}