aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/copro_fault.c149
-rw-r--r--arch/powerpc/mm/fault.c48
-rw-r--r--arch/powerpc/mm/hash_native_64.c6
-rw-r--r--arch/powerpc/mm/hash_utils_64.c160
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/mm/mem.c68
-rw-r--r--arch/powerpc/mm/numa.c68
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/slb.c3
-rw-r--r--arch/powerpc/mm/slice.c12
12 files changed, 387 insertions, 137 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d0130fff20e5..325e861616a1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
+obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
new file mode 100644
index 000000000000..0f9939e693df
--- /dev/null
+++ b/arch/powerpc/mm/copro_fault.c
@@ -0,0 +1,149 @@
+/*
+ * CoProcessor (SPU/AFU) mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
+#include <asm/spu.h>
+#include <misc/cxl.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+ unsigned long dsisr, unsigned *flt)
+{
+ struct vm_area_struct *vma;
+ unsigned long is_write;
+ int ret;
+
+ if (mm == NULL)
+ return -EFAULT;
+
+ if (mm->pgd == NULL)
+ return -EFAULT;
+
+ down_read(&mm->mmap_sem);
+ ret = -EFAULT;
+ vma = find_vma(mm, ea);
+ if (!vma)
+ goto out_unlock;
+
+ if (ea < vma->vm_start) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_unlock;
+ if (expand_stack(vma, ea))
+ goto out_unlock;
+ }
+
+ is_write = dsisr & DSISR_ISSTORE;
+ if (is_write) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out_unlock;
+ } else {
+ if (dsisr & DSISR_PROTFAULT)
+ goto out_unlock;
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto out_unlock;
+ }
+
+ ret = 0;
+ *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ if (unlikely(*flt & VM_FAULT_ERROR)) {
+ if (*flt & VM_FAULT_OOM) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ } else if (*flt & VM_FAULT_SIGBUS) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ BUG();
+ }
+
+ if (*flt & VM_FAULT_MAJOR)
+ current->maj_flt++;
+ else
+ current->min_flt++;
+
+out_unlock:
+ up_read(&mm->mmap_sem);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
+{
+ u64 vsid;
+ int psize, ssize;
+
+ slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+ switch (REGION_ID(ea)) {
+ case USER_REGION_ID:
+ pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+ psize = get_slice_psize(mm, ea);
+ ssize = user_segment_size(ea);
+ vsid = get_vsid(mm->context.id, ea, ssize);
+ break;
+ case VMALLOC_REGION_ID:
+ pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
+ if (ea < VMALLOC_END)
+ psize = mmu_vmalloc_psize;
+ else
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ break;
+ case KERNEL_REGION_ID:
+ pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
+ psize = mmu_linear_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ break;
+ default:
+ pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
+ return 1;
+ }
+
+ vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
+
+ vsid |= mmu_psize_defs[psize].sllp |
+ ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
+
+ slb->vsid = vsid;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copro_calculate_slb);
+
+void copro_flush_all_slbs(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+ cxl_slbia(mm);
+}
+EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7e6c39..08d659a9fcdb 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,9 +30,9 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_event.h>
-#include <linux/magic.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/hugetlb.h>
#include <asm/firmware.h>
#include <asm/page.h>
@@ -114,22 +114,37 @@ static int store_updates_sp(struct pt_regs *regs)
#define MM_FAULT_CONTINUE -1
#define MM_FAULT_ERR(sig) (sig)
-static int do_sigbus(struct pt_regs *regs, unsigned long address)
+static int do_sigbus(struct pt_regs *regs, unsigned long address,
+ unsigned int fault)
{
siginfo_t info;
+ unsigned int lsb = 0;
up_read(&current->mm->mmap_sem);
- if (user_mode(regs)) {
- current->thread.trap_nr = BUS_ADRERR;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return MM_FAULT_RETURN;
+ if (!user_mode(regs))
+ return MM_FAULT_ERR(SIGBUS);
+
+ current->thread.trap_nr = BUS_ADRERR;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ current->comm, current->pid, address);
+ info.si_code = BUS_MCEERR_AR;
}
- return MM_FAULT_ERR(SIGBUS);
+
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+#endif
+ info.si_addr_lsb = lsb;
+ force_sig_info(SIGBUS, &info, current);
+ return MM_FAULT_RETURN;
}
static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
@@ -170,11 +185,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
return MM_FAULT_RETURN;
}
- /* Bus error. x86 handles HWPOISON here, we'll add this if/when
- * we support the feature in HW
- */
- if (fault & VM_FAULT_SIGBUS)
- return do_sigbus(regs, addr);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
+ return do_sigbus(regs, addr, fault);
/* We don't understand the fault code, this is fatal */
BUG();
@@ -508,7 +520,6 @@ bail:
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
const struct exception_table_entry *entry;
- unsigned long *stackend;
/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -537,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
regs->nip);
- stackend = end_of_stack(current);
- if (current != &init_task && *stackend != STACK_END_MAGIC)
+ if (task_stack_end_corrupted(current))
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
die("Kernel access of bad area", regs, sig);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index afc0a8295f84..ae4962a06476 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -29,6 +29,8 @@
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
+#include <misc/cxl.h>
+
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
@@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
static inline void tlbie(unsigned long vpn, int psize, int apsize,
int ssize, int local)
{
- unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+ unsigned int use_local;
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
+
if (use_local)
use_local = mmu_psize_defs[psize].tlbiel;
if (lock_tlbie && !use_local)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index daee7f4e5a14..d5339a3b9945 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,7 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/sections.h>
-#include <asm/spu.h>
+#include <asm/copro.h>
#include <asm/udbg.h>
#include <asm/code-patching.h>
#include <asm/fadump.h>
@@ -92,12 +92,14 @@ extern unsigned long dart_tablebase;
static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
EXPORT_SYMBOL_GPL(htab_hash_mask);
int mmu_linear_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_linear_psize);
int mmu_virtual_psize = MMU_PAGE_4K;
int mmu_vmalloc_psize = MMU_PAGE_4K;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K;
#endif
int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
+EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
int mmu_highuser_ssize = MMU_SEGSIZE_256M;
u16 mmu_slb_size = 64;
EXPORT_SYMBOL_GPL(mmu_slb_size);
@@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
return 0;
prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
- if (prop != NULL) {
- pr_info("Page sizes from device-tree:\n");
- size /= 4;
- cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
- while(size > 0) {
- unsigned int base_shift = be32_to_cpu(prop[0]);
- unsigned int slbenc = be32_to_cpu(prop[1]);
- unsigned int lpnum = be32_to_cpu(prop[2]);
- struct mmu_psize_def *def;
- int idx, base_idx;
-
- size -= 3; prop += 3;
- base_idx = get_idx_from_shift(base_shift);
- if (base_idx < 0) {
- /*
- * skip the pte encoding also
- */
- prop += lpnum * 2; size -= lpnum * 2;
+ if (!prop)
+ return 0;
+
+ pr_info("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int base_shift = be32_to_cpu(prop[0]);
+ unsigned int slbenc = be32_to_cpu(prop[1]);
+ unsigned int lpnum = be32_to_cpu(prop[2]);
+ struct mmu_psize_def *def;
+ int idx, base_idx;
+
+ size -= 3; prop += 3;
+ base_idx = get_idx_from_shift(base_shift);
+ if (base_idx < 0) {
+ /* skip the pte encoding also */
+ prop += lpnum * 2; size -= lpnum * 2;
+ continue;
+ }
+ def = &mmu_psize_defs[base_idx];
+ if (base_idx == MMU_PAGE_16M)
+ cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
+
+ def->shift = base_shift;
+ if (base_shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (base_shift - 23)) - 1;
+ def->sllp = slbenc;
+ /*
+ * We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ while (size > 0 && lpnum) {
+ unsigned int shift = be32_to_cpu(prop[0]);
+ int penc = be32_to_cpu(prop[1]);
+
+ prop += 2; size -= 2;
+ lpnum--;
+
+ idx = get_idx_from_shift(shift);
+ if (idx < 0)
continue;
- }
- def = &mmu_psize_defs[base_idx];
- if (base_idx == MMU_PAGE_16M)
- cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
-
- def->shift = base_shift;
- if (base_shift <= 23)
- def->avpnm = 0;
- else
- def->avpnm = (1 << (base_shift - 23)) - 1;
- def->sllp = slbenc;
- /*
- * We don't know for sure what's up with tlbiel, so
- * for now we only set it for 4K and 64K pages
- */
- if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
- def->tlbiel = 1;
- else
- def->tlbiel = 0;
-
- while (size > 0 && lpnum) {
- unsigned int shift = be32_to_cpu(prop[0]);
- int penc = be32_to_cpu(prop[1]);
-
- prop += 2; size -= 2;
- lpnum--;
-
- idx = get_idx_from_shift(shift);
- if (idx < 0)
- continue;
-
- if (penc == -1)
- pr_err("Invalid penc for base_shift=%d "
- "shift=%d\n", base_shift, shift);
-
- def->penc[idx] = penc;
- pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
- " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
- base_shift, shift, def->sllp,
- def->avpnm, def->tlbiel, def->penc[idx]);
- }
+
+ if (penc == -1)
+ pr_err("Invalid penc for base_shift=%d "
+ "shift=%d\n", base_shift, shift);
+
+ def->penc[idx] = penc;
+ pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+ " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+ base_shift, shift, def->sllp,
+ def->avpnm, def->tlbiel, def->penc[idx]);
}
- return 1;
}
- return 0;
+
+ return 1;
}
#ifdef CONFIG_HUGETLB_PAGE
@@ -867,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
}
#ifdef CONFIG_PPC_MM_SLICES
-unsigned int get_paca_psize(unsigned long addr)
+static unsigned int get_paca_psize(unsigned long addr)
{
u64 lpsizes;
unsigned char *hpsizes;
@@ -901,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
return;
slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
- if (get_paca_psize(addr) != MMU_PAGE_4K) {
+ copro_flush_all_slbs(mm);
+ if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
get_paca()->context = mm->context;
slb_flush_and_rebolt();
}
@@ -989,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
* -1 - critical hash insertion error
* -2 - access not permitted by subpage protection mechanism
*/
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap)
{
enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
- struct mm_struct *mm;
pte_t *ptep;
unsigned hugeshift;
const struct cpumask *tmp;
@@ -1008,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
- mm = current->mm;
if (! mm) {
DBG_LOW(" user region with no mm !\n");
rc = 1;
@@ -1019,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
vsid = get_vsid(mm->context.id, ea, ssize);
break;
case VMALLOC_REGION_ID:
- mm = &init_mm;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
if (ea < VMALLOC_END)
psize = mmu_vmalloc_psize;
@@ -1104,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
WARN_ON(1);
}
#endif
- check_paca_psize(ea, mm, psize, user_region);
+ if (current->mm == mm)
+ check_paca_psize(ea, mm, psize, user_region);
goto bail;
}
@@ -1141,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
"to 4kB pages because of "
"non-cacheable mapping\n");
psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
+ copro_flush_all_slbs(mm);
}
}
- check_paca_psize(ea, mm, psize, user_region);
+ if (current->mm == mm)
+ check_paca_psize(ea, mm, psize, user_region);
#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_PPC_HAS_HASH_64K
@@ -1182,6 +1179,17 @@ bail:
exception_exit(prev_state);
return rc;
}
+EXPORT_SYMBOL_GPL(hash_page_mm);
+
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+{
+ struct mm_struct *mm = current->mm;
+
+ if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ mm = &init_mm;
+
+ return hash_page_mm(mm, ea, access, trap);
+}
EXPORT_SYMBOL_GPL(hash_page);
void hash_preload(struct mm_struct *mm, unsigned long ea,
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index cff59f1bec23..cad68ff8eca5 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM;
void MMU_setup(void)
{
/* Check for nobats option (used in mapin_ram). */
- if (strstr(cmd_line, "nobats")) {
+ if (strstr(boot_command_line, "nobats")) {
__map_without_bats = 1;
}
- if (strstr(cmd_line, "noltlbs")) {
+ if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 253b4b971c8a..3481556a1880 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -233,9 +233,6 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
}
#ifdef CONFIG_MEMORY_HOTPLUG
-extern int htab_remove_mapping(unsigned long vstart, unsigned long vend,
- int psize, int ssize);
-
static void vmemmap_remove_mapping(unsigned long start,
unsigned long page_size)
{
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e0f7a189c48e..8ebaac75c940 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -260,6 +260,60 @@ static int __init mark_nonram_nosave(void)
}
return 0;
}
+#else /* CONFIG_NEED_MULTIPLE_NODES */
+static int __init mark_nonram_nosave(void)
+{
+ return 0;
+}
+#endif
+
+static bool zone_limits_final;
+
+static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+ [0 ... MAX_NR_ZONES - 1] = ~0UL
+};
+
+/*
+ * Restrict the specified zone and all more restrictive zones
+ * to be below the specified pfn. May not be called after
+ * paging_init().
+ */
+void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
+{
+ int i;
+
+ if (WARN_ON(zone_limits_final))
+ return;
+
+ for (i = zone; i >= 0; i--) {
+ if (max_zone_pfns[i] > pfn_limit)
+ max_zone_pfns[i] = pfn_limit;
+ }
+}
+
+/*
+ * Find the least restrictive zone that is entirely below the
+ * specified pfn limit. Returns < 0 if no suitable zone is found.
+ *
+ * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
+ * systems -- the DMA limit can be higher than any possible real pfn.
+ */
+int dma_pfn_limit_to_zone(u64 pfn_limit)
+{
+ enum zone_type top_zone = ZONE_NORMAL;
+ int i;
+
+#ifdef CONFIG_HIGHMEM
+ top_zone = ZONE_HIGHMEM;
+#endif
+
+ for (i = top_zone; i >= 0; i--) {
+ if (max_zone_pfns[i] <= pfn_limit)
+ return i;
+ }
+
+ return -EPERM;
+}
/*
* paging_init() sets up the page tables - in fact we've already done this.
@@ -268,7 +322,7 @@ void __init paging_init(void)
{
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
- unsigned long max_zone_pfns[MAX_NR_ZONES];
+ enum zone_type top_zone;
#ifdef CONFIG_PPC32
unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
@@ -290,18 +344,20 @@ void __init paging_init(void)
(unsigned long long)top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
#ifdef CONFIG_HIGHMEM
- max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT;
- max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT;
+ top_zone = ZONE_HIGHMEM;
+ limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
#else
- max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+ top_zone = ZONE_NORMAL;
#endif
+
+ limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
+ zone_limits_final = true;
free_area_init_nodes(max_zone_pfns);
mark_nonram_nosave();
}
-#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
static void __init register_page_bootmem_info(void)
{
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d7737a542fd7..e5236c24dc07 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -8,6 +8,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "numa: " fmt
+
#include <linux/threads.h>
#include <linux/bootmem.h>
#include <linux/init.h>
@@ -538,7 +540,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
*/
static int numa_setup_cpu(unsigned long lcpu)
{
- int nid;
+ int nid = -1;
struct device_node *cpu;
/*
@@ -555,19 +557,21 @@ static int numa_setup_cpu(unsigned long lcpu)
if (!cpu) {
WARN_ON(1);
- nid = 0;
- goto out;
+ if (cpu_present(lcpu))
+ goto out_present;
+ else
+ goto out;
}
nid = of_node_to_nid_single(cpu);
+out_present:
if (nid < 0 || !node_online(nid))
nid = first_online_node;
-out:
- map_cpu_to_node(lcpu, nid);
+ map_cpu_to_node(lcpu, nid);
of_node_put(cpu);
-
+out:
return nid;
}
@@ -1127,20 +1131,11 @@ void __init do_init_bootmem(void)
* even before we online them, so that we can use cpu_to_{node,mem}
* early in boot, cf. smp_prepare_cpus().
*/
- for_each_possible_cpu(cpu) {
- cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
- (void *)(unsigned long)cpu);
+ for_each_present_cpu(cpu) {
+ numa_setup_cpu((unsigned long)cpu);
}
}
-void __init paging_init(void)
-{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
- free_area_init_nodes(max_zone_pfns);
-}
-
static int __init early_numa(char *p)
{
if (!p)
@@ -1160,6 +1155,22 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
+static bool topology_updates_enabled = true;
+
+static int __init early_topology_updates(char *p)
+{
+ if (!p)
+ return 0;
+
+ if (!strcmp(p, "off")) {
+ pr_info("Disabling topology updates\n");
+ topology_updates_enabled = false;
+ }
+
+ return 0;
+}
+early_param("topology_updates", early_topology_updates);
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1449,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
u64 flags = 1;
int hwcpu = get_hard_smp_processor_id(cpu);
+ int i;
rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ for (i = 0; i < 6; i++)
+ retbuf[i] = cpu_to_be64(retbuf[i]);
vphn_unpack_associativity(retbuf, associativity);
return rc;
@@ -1546,6 +1560,9 @@ int arch_update_cpu_topology(void)
struct device *dev;
int weight, new_nid, i = 0;
+ if (!prrn_enabled && !vphn_enabled)
+ return 0;
+
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
return 0;
@@ -1599,6 +1616,15 @@ int arch_update_cpu_topology(void)
cpu = cpu_last_thread_sibling(cpu);
}
+ pr_debug("Topology update for the following CPUs:\n");
+ if (cpumask_weight(&updated_cpus)) {
+ for (ud = &updates[0]; ud; ud = ud->next) {
+ pr_debug("cpu %d moving from node %d "
+ "to %d\n", ud->cpu,
+ ud->old_nid, ud->new_nid);
+ }
+ }
+
/*
* In cases where we have nothing to update (because the updates list
* is too short or because the new topology is same as the old one),
@@ -1807,8 +1833,12 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
- start_topology_update();
- proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
+ /* Do not poll for changes if disabled at boot */
+ if (topology_updates_enabled)
+ start_topology_update();
+
+ if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+ return -ENOMEM;
return 0;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c695943a513c..c90e602677c9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte)
(_PAGE_PRESENT | _PAGE_USER);
}
-struct page * maybe_pte_to_page(pte_t pte)
+static struct page *maybe_pte_to_page(pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
struct page *page;
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0399a6702958..6e450ca66526 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
}
-#define slb_vsid_shift(ssize) \
- ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T)
-
static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
unsigned long flags)
{
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index b0c75cc15efc..8d7bda94d196 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -30,9 +30,11 @@
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/export.h>
+#include <linux/hugetlb.h>
#include <asm/mman.h>
#include <asm/mmu.h>
-#include <asm/spu.h>
+#include <asm/copro.h>
+#include <asm/hugetlb.h>
/* some sanity checks */
#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
@@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
spin_unlock_irqrestore(&slice_convert_lock, flags);
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
+ copro_flush_all_slbs(mm);
}
/*
@@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address,
spin_unlock_irqrestore(&slice_convert_lock, flags);
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
+ copro_flush_all_slbs(mm);
}
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,