14 files changed, 288 insertions, 80 deletions
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index bd68df5fa78a..ddceefc06ecc 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -283,6 +283,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
 #define PTEG_SIZE	64
 #define LG_PTEG_SIZE	6
 #define LDPTEu		lwzu
+#define LDPTE		lwz
 #define STPTE		stw
 #define CMPPTE		cmpw
 #define PTE_H		0x40
@@ -389,13 +390,30 @@ _GLOBAL(hash_page_patch_C)
 	 * and we know there is a definite (although small) speed
 	 * advantage to putting the PTE in the primary PTEG, we always
 	 * put the PTE in the primary PTEG.
+	 *
+	 * In addition, we skip any slot that is mapping kernel text in
+	 * order to avoid a deadlock when not using BAT mappings if
+	 * trying to hash in the kernel hash code itself after it has
+	 * already taken the hash table lock. This works in conjunction
+	 * with pre-faulting of the kernel text.
+	 *
+	 * If the hash table bucket is full of kernel text entries, we'll
+	 * lockup here but that shouldn't happen
 	 */
-	addis	r4,r7,next_slot@ha
+
+1:	addis	r4,r7,next_slot@ha		/* get next evict slot */
 	lwz	r6,next_slot@l(r4)
-	addi	r6,r6,PTE_SIZE
+	addi	r6,r6,PTE_SIZE			/* search for candidate */
 	andi.	r6,r6,7*PTE_SIZE
 	stw	r6,next_slot@l(r4)
 	add	r4,r3,r6
+	LDPTE	r0,PTE_SIZE/2(r4)		/* get PTE second word */
+	clrrwi	r0,r0,12
+	lis	r6,etext@h
+	ori	r6,r6,etext@l			/* get etext */
+	tophys(r6,r6)
+	cmpl	cr0,r0,r6			/* compare and try again */
+	blt	1b
 
 #ifndef CONFIG_SMP
 	/* Store PTE in PTEG */
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 9bc0a9c2b9bc..e64ce3eec36e 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -445,9 +445,12 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 htab_insert_pte:
 	/* real page number in r5, PTE RPN value + index */
-	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+	andis.	r0,r31,_PAGE_4K_PFN@h
+	srdi	r5,r31,PTE_RPN_SHIFT
+	bne-	htab_special_pfn
 	sldi	r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
 	add	r5,r5,r25
+htab_special_pfn:
 	sldi	r5,r5,HW_PAGE_SHIFT
 
 	/* Calculate primary group hash */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 6f1016acdbf6..79aedaf36f2b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -505,7 +505,7 @@ static inline int tlb_batching_enabled(void)
 	int enabled = 1;
 
 	if (root) {
-		const char *model = get_property(root, "model", NULL);
+		const char *model = of_get_property(root, "model", NULL);
 		if (model && !strcmp(model, "IBM,9076-N81"))
 			enabled = 0;
 		of_node_put(root);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c0d2a694fa30..49618461defb 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -100,6 +100,11 @@ unsigned int HPAGE_SHIFT;
 #ifdef CONFIG_PPC_64K_PAGES
 int mmu_ci_restrictions;
 #endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static u8 *linear_map_hash_slots;
+static unsigned long linear_map_hash_count;
+static spinlock_t linear_map_hash_lock;
+#endif /* CONFIG_DEBUG_PAGEALLOC */
 
 /* There are definitions of page sizes arrays to be used when none
  * is provided by the firmware.
@@ -152,11 +157,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 
 	for (vaddr = vstart, paddr = pstart; vaddr < vend;
 	     vaddr += step, paddr += step) {
-		unsigned long vpn, hash, hpteg;
+		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr);
 		unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
 
-		vpn = va >> shift;
 		tmp_mode = mode;
 		
 		/* Make non-kernel text non-executable */
@@ -174,6 +178,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 
 		if (ret < 0)
 			break;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
+			linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
+#endif /* CONFIG_DEBUG_PAGEALLOC */
 	}
 	return ret < 0 ? ret : 0;
 }
@@ -281,6 +289,7 @@ static void __init htab_init_page_sizes(void)
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
+#ifndef CONFIG_DEBUG_PAGEALLOC
 	/*
 	 * Pick a size for the linear mapping. Currently, we only support
 	 * 16M, 1M and 4K which is the default
@@ -289,6 +298,7 @@ static void __init htab_init_page_sizes(void)
 		mmu_linear_psize = MMU_PAGE_16M;
 	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
 		mmu_linear_psize = MMU_PAGE_1M;
+#endif /* CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef CONFIG_PPC_64K_PAGES
 	/*
@@ -303,12 +313,14 @@ static void __init htab_init_page_sizes(void)
 	if (mmu_psize_defs[MMU_PAGE_64K].shift) {
 		mmu_virtual_psize = MMU_PAGE_64K;
 		mmu_vmalloc_psize = MMU_PAGE_64K;
+		if (mmu_linear_psize == MMU_PAGE_4K)
+			mmu_linear_psize = MMU_PAGE_64K;
 		if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
 			mmu_io_psize = MMU_PAGE_64K;
 		else
 			mmu_ci_restrictions = 1;
 	}
-#endif
+#endif /* CONFIG_PPC_64K_PAGES */
 
 	printk(KERN_DEBUG "Page orders: linear mapping = %d, "
 	       "virtual = %d, io = %d\n",
@@ -476,6 +488,13 @@ void __init htab_initialize(void)
 
 	mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count,
+						    1, lmb.rmo_size));
+	memset(linear_map_hash_slots, 0, linear_map_hash_count);
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
 	/* On U3 based machines, we need to reserve the DART area and
 	 * _NOT_ map it to avoid cache paradoxes as it's remapped non
 	 * cacheable later on
@@ -573,6 +592,27 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 	return pp;
 }
 
+/*
+ * Demote a segment to using 4k pages.
+ * For now this makes the whole process use 4k pages.
+ */
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	if (mm->context.user_psize == MMU_PAGE_4K)
+		return;
+	mm->context.user_psize = MMU_PAGE_4K;
+	mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
+	get_paca()->context = mm->context;
+	slb_flush_and_rebolt();
+#ifdef CONFIG_SPE_BASE
+	spu_flush_all_slbs(mm);
+#endif
+#endif
+}
+
+EXPORT_SYMBOL_GPL(demote_segment_4k);
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -665,15 +705,19 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 #ifndef CONFIG_PPC_64K_PAGES
 	rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
 #else
+	/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
+	if (pte_val(*ptep) & _PAGE_4K_PFN) {
+		demote_segment_4k(mm, ea);
+		psize = MMU_PAGE_4K;
+	}
+
 	if (mmu_ci_restrictions) {
 		/* If this PTE is non-cacheable, switch to 4k */
 		if (psize == MMU_PAGE_64K &&
 		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
 			if (user_region) {
+				demote_segment_4k(mm, ea);
 				psize = MMU_PAGE_4K;
-				mm->context.user_psize = MMU_PAGE_4K;
-				mm->context.sllp = SLB_VSID_USER |
-					mmu_psize_defs[MMU_PAGE_4K].sllp;
 			} else if (ea < VMALLOC_END) {
 				/*
 				 * some driver did a non-cacheable mapping
@@ -685,6 +729,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 				       "non-cacheable mapping\n");
 				psize = mmu_vmalloc_psize = MMU_PAGE_4K;
 			}
+#ifdef CONFIG_SPE_BASE
+			spu_flush_all_slbs(mm);
+#endif
 		}
 		if (user_region) {
 			if (psize != get_paca()->context.user_psize) {
@@ -753,13 +800,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	if (mmu_ci_restrictions) {
 		/* If this PTE is non-cacheable, switch to 4k */
 		if (mm->context.user_psize == MMU_PAGE_64K &&
-		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
-			mm->context.user_psize = MMU_PAGE_4K;
-			mm->context.sllp = SLB_VSID_USER |
-				mmu_psize_defs[MMU_PAGE_4K].sllp;
-			get_paca()->context = mm->context;
-			slb_flush_and_rebolt();
-		}
+		    (pte_val(*ptep) & _PAGE_NO_CACHE))
+			demote_segment_4k(mm, ea);
 	}
 	if (mm->context.user_psize == MMU_PAGE_64K)
 		__hash_page_64K(ea, access, vsid, ptep, trap, local);
@@ -819,3 +861,62 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address)
 	}
 	bad_page_fault(regs, address, SIGBUS);
 }
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
+{
+	unsigned long hash, hpteg, vsid = get_kernel_vsid(vaddr);
+	unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+	unsigned long mode = _PAGE_ACCESSED | _PAGE_DIRTY |
+		_PAGE_COHERENT | PP_RWXX | HPTE_R_N;
+	int ret;
+
+	hash = hpt_hash(va, PAGE_SHIFT);
+	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+	ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr),
+				 mode, HPTE_V_BOLTED, mmu_linear_psize);
+	BUG_ON (ret < 0);
+	spin_lock(&linear_map_hash_lock);
+	BUG_ON(linear_map_hash_slots[lmi] & 0x80);
+	linear_map_hash_slots[lmi] = ret | 0x80;
+	spin_unlock(&linear_map_hash_lock);
+}
+
+static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
+{
+	unsigned long hash, hidx, slot, vsid = get_kernel_vsid(vaddr);
+	unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+
+	hash = hpt_hash(va, PAGE_SHIFT);
+	spin_lock(&linear_map_hash_lock);
+	BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
+	hidx = linear_map_hash_slots[lmi] & 0x7f;
+	linear_map_hash_slots[lmi] = 0;
+	spin_unlock(&linear_map_hash_lock);
+	if (hidx & _PTEIDX_SECONDARY)
+		hash = ~hash;
+	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot += hidx & _PTEIDX_GROUP_IX;
+	ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, 0);
+}
+
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long flags, vaddr, lmi;
+	int i;
+
+	local_irq_save(flags);
+	for (i = 0; i < numpages; i++, page++) {
+		vaddr = (unsigned long)page_address(page);
+		lmi = __pa(vaddr) >> PAGE_SHIFT;
+		if (lmi >= linear_map_hash_count)
+			continue;
+		if (enable)
+			kernel_map_linear_page(vaddr, lmi);
+		else
+			kernel_unmap_linear_page(vaddr, lmi);
+	}
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8c77c791f87e..8508f973d9cc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,7 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/tlb.h>
+#include <asm/spu.h>
 
 #include <linux/sysctl.h>
 
@@ -315,12 +316,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 {
 	if (pte_present(*ptep)) {
 		/* We open-code pte_clear because we need to pass the right
-		 * argument to hpte_update (huge / !huge)
+		 * argument to hpte_need_flush (huge / !huge). Might not be
+		 * necessary anymore if we make hpte_need_flush() get the
+		 * page size from the slices
 		 */
-		unsigned long old = pte_update(ptep, ~0UL);
-		if (old & _PAGE_HASHPTE)
-			hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
-		flush_tlb_pending();
+		pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1);
 	}
 	*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 }
@@ -328,12 +328,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep)
 {
-	unsigned long old = pte_update(ptep, ~0UL);
-
-	if (old & _PAGE_HASHPTE)
-		hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
-	*ptep = __pte(0);
-
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
 	return __pte(old);
 }
 
@@ -513,6 +508,9 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
 	if ((addr + len) > 0x100000000UL)
 		err = open_high_hpage_areas(current->mm,
 					    HTLB_AREA_MASK(addr, len));
+#ifdef CONFIG_SPE_BASE
+	spu_flush_all_slbs(current->mm);
+#endif
 	if (err) {
 		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
 		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 0e53ca8f02fb..5fce6ccecb8d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -115,6 +115,10 @@ void MMU_setup(void)
 	if (strstr(cmd_line, "noltlbs")) {
 		__map_without_ltlbs = 1;
 	}
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	__map_without_bats = 1;
+	__map_without_ltlbs = 1;
+#endif
 }
 
 /*
diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
index 716a2906a24d..e3a1e8dc536a 100644
--- a/arch/powerpc/mm/lmb.c
+++ b/arch/powerpc/mm/lmb.c
@@ -146,6 +146,10 @@ static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base,
 		unsigned long rgnbase = rgn->region[i].base;
 		unsigned long rgnsize = rgn->region[i].size;
 
+		if ((rgnbase == base) && (rgnsize == size))
+			/* Already have this region, so we're done */
+			return 0;
+
 		adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
 		if ( adjacent > 0 ) {
 			rgn->region[i].base -= size;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52f397c108a7..c4bcd7546424 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -58,9 +58,6 @@ int init_bootmem_done;
 int mem_init_done;
 unsigned long memory_limit;
 
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
-			 unsigned long access, unsigned long trap);
-
 int page_is_ram(unsigned long pfn)
 {
 	unsigned long paddr = (pfn << PAGE_SHIFT);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index bea2d21ac6f7..9c4538bb04b0 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -19,9 +19,14 @@
  *  2 of the License, or (at your option) any later version.
  *
  */
+#include <linux/mm.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+			 unsigned long access, unsigned long trap);
+
+
 #ifdef CONFIG_PPC32
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 262790910ff2..b3a592b25ab3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -74,7 +74,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 
 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
 		/* Try interrupt server first */
-		interrupt_server = get_property(cpu_node,
+		interrupt_server = of_get_property(cpu_node,
 					"ibm,ppc-interrupt-server#s", &len);
 
 		len = len / sizeof(u32);
@@ -85,7 +85,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 					return cpu_node;
 			}
 		} else {
-			reg = get_property(cpu_node, "reg", &len);
+			reg = of_get_property(cpu_node, "reg", &len);
 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
 				return cpu_node;
 		}
@@ -97,7 +97,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 /* must hold reference to node during call */
 static const int *of_get_associativity(struct device_node *dev)
 {
-	return get_property(dev, "ibm,associativity", NULL);
+	return of_get_property(dev, "ibm,associativity", NULL);
 }
 
 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
@@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
  * characteristics relative to its multiple connections.  We ignore
  * this for now.  We also assume that all cpu and memory sets have
  * their distances represented at a common level.  This won't be
- * true for heirarchical NUMA.
+ * true for hierarchical NUMA.
  *
  * In any case the ibm,associativity-reference-points should give
  * the correct depth for a normal NUMA system.
@@ -179,7 +179,7 @@ static int __init find_min_common_depth(void)
 	 * configuration (should be all 0's) and the second is for a normal
 	 * NUMA configuration.
 	 */
-	ref_points = get_property(rtas_root,
+	ref_points = of_get_property(rtas_root,
 			"ibm,associativity-reference-points", &len);
 
 	if ((len >= 1) && ref_points) {
@@ -201,8 +201,8 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
 	if (!memory)
 		panic("numa.c: No memory nodes found!");
 
-	*n_addr_cells = prom_n_addr_cells(memory);
-	*n_size_cells = prom_n_size_cells(memory);
+	*n_addr_cells = of_n_addr_cells(memory);
+	*n_size_cells = of_n_size_cells(memory);
 	of_node_put(memory);
 }
 
@@ -308,9 +308,9 @@ static void __init parse_drconf_memory(struct device_node *memory)
 	int nid, default_nid = 0;
 	unsigned int start, ai, flags;
 
-	lm = get_property(memory, "ibm,lmb-size", &ls);
-	dm = get_property(memory, "ibm,dynamic-memory", &ld);
-	aa = get_property(memory, "ibm,associativity-lookup-arrays", &la);
+	lm = of_get_property(memory, "ibm,lmb-size", &ls);
+	dm = of_get_property(memory, "ibm,dynamic-memory", &ld);
+	aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la);
 	if (!lm || !dm || !aa ||
 	    ls < sizeof(unsigned int) || ld < sizeof(unsigned int) ||
 	    la < 2 * sizeof(unsigned int))
@@ -404,10 +404,10 @@ static int __init parse_numa_properties(void)
 		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = get_property(memory,
+		memcell_buf = of_get_property(memory,
 			"linux,usable-memory", &len);
 		if (!memcell_buf || len <= 0)
-			memcell_buf = get_property(memory, "reg", &len);
+			memcell_buf = of_get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
@@ -725,7 +725,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = get_property(memory, "reg", &len);
+		memcell_buf = of_get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index c284bdac9947..bca560374927 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -183,8 +183,8 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
 	if (mem_init_done && (p < virt_to_phys(high_memory))) {
-		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
-		       __builtin_return_address(0));
+		printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
+		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
 	}
 
@@ -266,9 +266,12 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
 	pg = pte_alloc_kernel(pd, va);
 	if (pg != 0) {
 		err = 0;
-		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
-		if (mem_init_done)
-			flush_HPTE(0, va, pmd_val(*pd));
+		/* The PTE should never be already set nor present in the
+		 * hash table
+		 */
+		BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE));
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
+						     __pgprot(flags)));
 	}
 	return err;
 }
@@ -279,16 +282,19 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
 void __init mapin_ram(void)
 {
 	unsigned long v, p, s, f;
+	int ktext;
 
 	s = mmu_mapin_ram();
 	v = KERNELBASE + s;
 	p = PPC_MEMSTART + s;
 	for (; s < total_lowmem; s += PAGE_SIZE) {
-		if ((char *) v >= _stext && (char *) v < etext)
-			f = _PAGE_RAM_TEXT;
-		else
-			f = _PAGE_RAM;
+		ktext = ((char *) v >= _stext && (char *) v < etext);
+		f = ktext ?_PAGE_RAM_TEXT : _PAGE_RAM;
 		map_page(v, p, f);
+#ifdef CONFIG_PPC_STD_MMU_32
+		if (ktext)
+			hash_preload(&init_mm, v, 0, 0x300);
+#endif
 		v += PAGE_SIZE;
 		p += PAGE_SIZE;
 	}
@@ -445,3 +451,55 @@ exit:
 	return ret;
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+
+static int __change_page_attr(struct page *page, pgprot_t prot)
+{
+	pte_t *kpte;
+	pmd_t *kpmd;
+	unsigned long address;
+
+	BUG_ON(PageHighMem(page));
+	address = (unsigned long)page_address(page);
+
+	if (v_mapped_by_bats(address) || v_mapped_by_tlbcam(address))
+		return 0;
+	if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
+		return -EINVAL;
+	set_pte_at(&init_mm, address, kpte, mk_pte(page, prot));
+	wmb();
+	flush_HPTE(0, address, pmd_val(*kpmd));
+	pte_unmap(kpte);
+
+	return 0;
+}
+
+/*
+ * Change the page attributes of an page in the linear mapping.
+ *
+ * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
+ */
+static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+	int i, err = 0;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (i = 0; i < numpages; i++, page++) {
+		err = __change_page_attr(page, prot);
+		if (err)
+			break;
+	}
+	local_irq_restore(flags);
+	return err;
+}
+
+
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+
+	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 16e4ee1c2318..1d443407423c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -103,7 +103,7 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 		 *
 		 */
 		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
-				      mmu_virtual_psize)) {
+				      mmu_io_psize)) {
 			printk(KERN_ERR "Failed to do bolted mapping IO "
 			       "memory at %016lx !\n", pa);
 			return -ENOMEM;
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 7cceb2c44cb9..05066674a7a0 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -85,8 +85,10 @@ unsigned long __init mmu_mapin_ram(void)
 	unsigned long max_size = (256<<20);
 	unsigned long align;
 
-	if (__map_without_bats)
+	if (__map_without_bats) {
+		printk(KERN_DEBUG "RAM mapped without BATs\n");
 		return 0;
+	}
 
 	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
 
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index b58baa65c4a7..fd8d08c325eb 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -120,17 +120,20 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 }
 
 /*
- * Update the MMU hash table to correspond with a change to
- * a Linux PTE.  If wrprot is true, it is permissible to
- * change the existing HPTE to read-only rather than removing it
- * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
+ * A linux PTE was changed and the corresponding hash table entry
+ * neesd to be flushed. This function will either perform the flush
+ * immediately or will batch it up if the current CPU has an active
+ * batch on it.
+ *
+ * Must be called from within some kind of spinlock/non-preempt region...
  */
-void hpte_update(struct mm_struct *mm, unsigned long addr,
-		 pte_t *ptep, unsigned long pte, int huge)
+void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, unsigned long pte, int huge)
 {
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	unsigned long vsid;
+	unsigned long vsid, vaddr;
 	unsigned int psize;
+	real_pte_t rpte;
 	int i;
 
 	i = batch->index;
@@ -151,6 +154,26 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 	} else
 		psize = pte_pagesize_index(pte);
 
+	/* Build full vaddr */
+	if (!is_kernel_addr(addr)) {
+		vsid = get_vsid(mm->context.id, addr);
+		WARN_ON(vsid == 0);
+	} else
+		vsid = get_kernel_vsid(addr);
+	vaddr = (vsid << 28 ) | (addr & 0x0fffffff);
+	rpte = __real_pte(__pte(pte), ptep);
+
+	/*
+	 * Check if we have an active batch on this CPU. If not, just
+	 * flush now and return. For now, we don global invalidates
+	 * in that case, might be worth testing the mm cpu mask though
+	 * and decide to use local invalidates instead...
+	 */
+	if (!batch->active) {
+		flush_hash_page(vaddr, rpte, psize, 0);
+		return;
+	}
+
 	/*
 	 * This can happen when we are in the middle of a TLB batch and
 	 * we encounter memory pressure (eg copy_page_range when it tries
@@ -162,47 +185,42 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 	 * batch
 	 */
 	if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
-		flush_tlb_pending();
+		__flush_tlb_pending(batch);
 		i = 0;
 	}
 	if (i == 0) {
 		batch->mm = mm;
 		batch->psize = psize;
 	}
-	if (!is_kernel_addr(addr)) {
-		vsid = get_vsid(mm->context.id, addr);
-		WARN_ON(vsid == 0);
-	} else
-		vsid = get_kernel_vsid(addr);
-	batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
-	batch->pte[i] = __real_pte(__pte(pte), ptep);
+	batch->pte[i] = rpte;
+	batch->vaddr[i] = vaddr;
 	batch->index = ++i;
 	if (i >= PPC64_TLB_BATCH_NR)
-		flush_tlb_pending();
+		__flush_tlb_pending(batch);
 }
 
+/*
+ * This function is called when terminating an mmu batch or when a batch
+ * is full. It will perform the flush of all the entries currently stored
+ * in a batch.
+ *
+ * Must be called from within some kind of spinlock/non-preempt region...
+ */
 void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 {
-	int i;
-	int cpu;
 	cpumask_t tmp;
-	int local = 0;
+	int i, local = 0;
 
-	BUG_ON(in_interrupt());
-
-	cpu = get_cpu();
 	i = batch->index;
-	tmp = cpumask_of_cpu(cpu);
+	tmp = cpumask_of_cpu(smp_processor_id());
 	if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
 		local = 1;
-
 	if (i == 1)
 		flush_hash_page(batch->vaddr[0], batch->pte[0],
 				batch->psize, local);
 	else
 		flush_hash_range(i, local);
 	batch->index = 0;
-	put_cpu();
 }
 
 void pte_free_finish(void)