29 files changed, 243 insertions, 1510 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 5c8a2ebfc720..411fdc0901f7 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -15,11 +15,10 @@ config MMU
 
 config PAGE_OFFSET
 	hex
-	default "0x80000000" if MMU && SUPERH32
-	default "0x20000000" if MMU && SUPERH64
+	default "0x80000000" if MMU
 	default "0x00000000"
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 9 64 if PAGE_SIZE_16KB
 	default "9" if PAGE_SIZE_16KB
@@ -45,7 +44,7 @@ config FORCE_MAX_ZONEORDER
 config MEMORY_START
 	hex "Physical memory start address"
 	default "0x08000000"
-	---help---
+	help
 	  Computers built with Hitachi SuperH processors always
 	  map the ROM starting at address zero.  But the processor
 	  does not specify the range that RAM takes.
@@ -72,12 +71,11 @@ config MEMORY_SIZE
 
 config 29BIT
 	def_bool !32BIT
-	depends on SUPERH32
 	select UNCACHED_MAPPING
 
 config 32BIT
 	bool
-	default y if CPU_SH5 || !MMU
+	default !MMU
 
 config PMB
 	bool "Support 32-bit physical addressing through PMB"
@@ -107,7 +105,7 @@ config VSYSCALL
 	  (the default value) say Y.
 
 config NUMA
-	bool "Non Uniform Memory Access (NUMA) Support"
+	bool "Non-Uniform Memory Access (NUMA) Support"
 	depends on MMU && SYS_SUPPORTS_NUMA
 	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
@@ -122,7 +120,7 @@ config NODES_SHIFT
 	int
 	default "3" if CPU_SUBTYPE_SHX3
 	default "1"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
@@ -138,21 +136,13 @@ config ARCH_SPARSEMEM_DEFAULT
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 
-config ARCH_ENABLE_MEMORY_HOTPLUG
-	def_bool y
-	depends on SPARSEMEM && MMU
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
-	def_bool y
-	depends on SPARSEMEM && MMU
-
 config ARCH_MEMORY_PROBE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
 config IOREMAP_FIXED
        def_bool y
-       depends on X2TLB || SUPERH64
+       depends on X2TLB
 
 config UNCACHED_MAPPING
 	bool
@@ -184,7 +174,7 @@ config PAGE_SIZE_16KB
 
 config PAGE_SIZE_64KB
 	bool "64kB"
-	depends on !MMU || CPU_SH4 || CPU_SH5
+	depends on !MMU || CPU_SH4
 	help
 	  This enables support for 64kB pages, possible on all SH-4
 	  CPUs and later.
@@ -216,10 +206,6 @@ config HUGETLB_PAGE_SIZE_64MB
 	bool "64MB"
 	depends on X2TLB
 
-config HUGETLB_PAGE_SIZE_512MB
-	bool "512MB"
-	depends on CPU_SH5
-
 endchoice
 
 config SCHED_MC
@@ -242,7 +228,7 @@ config SH7705_CACHE_32KB
 
 choice
 	prompt "Cache mode"
-	default CACHE_WRITEBACK if CPU_SH2A || CPU_SH3 || CPU_SH4 || CPU_SH5
+	default CACHE_WRITEBACK if CPU_SH2A || CPU_SH3 || CPU_SH4
 	default CACHE_WRITETHROUGH if (CPU_SH2 && !CPU_SH2A)
 
 config CACHE_WRITEBACK
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 5051b38fd5b6..f69ddc70b146 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -10,15 +10,14 @@ cacheops-$(CONFIG_CPU_SUBTYPE_SH7619)	:= cache-sh2.o
 cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
 cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
 cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
-cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
 cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
 cacheops-$(CONFIG_CPU_SHX3)		+= cache-shx3.o
 
 obj-y			+= $(cacheops-y)
 
 mmu-y			:= nommu.o extable_32.o
-mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault.o ioremap.o kmap.o \
-			   pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
+mmu-$(CONFIG_MMU)	:= extable_32.o fault.o ioremap.o kmap.o \
+			   pgtable.o tlbex_32.o tlbflush_32.o
 
 obj-y			+= $(mmu-y)
 
@@ -31,7 +30,6 @@ ifdef CONFIG_MMU
 debugfs-$(CONFIG_CPU_SH4)	+= tlb-debugfs.o
 tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
 tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o tlb-urb.o
-tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
 tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o tlb-urb.o
 obj-y				+= $(tlb-y)
 endif
@@ -45,30 +43,3 @@ obj-$(CONFIG_UNCACHED_MAPPING)	+= uncached.o
 obj-$(CONFIG_HAVE_SRAM_POOL)	+= sram.o
 
 GCOV_PROFILE_pmb.o := n
-
-# Special flags for tlbex_64.o.  This puts restrictions on the number of
-# caller-save registers that the compiler can target when building this file.
-# This is required because the code is called from a context in entry.S where
-# very few registers have been saved in the exception handler (for speed
-# reasons).
-# The caller save registers that have been saved and which can be used are
-# r2,r3,r4,r5 : argument passing
-# r15, r18 : SP and LINK
-# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
-#         use of them, so it's probably beneficial to performance to save them
-#         and have them available for it.
-#
-# The resources not listed below are callee save, i.e. the compiler is free to
-# use any of them and will spill them to the stack itself.
-
-CFLAGS_tlbex_64.o += -ffixed-r7 \
-	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
-	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
-	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
-	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
-	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
-	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
-	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
-	-fomit-frame-pointer
-
-ccflags-y := -Werror
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
index fb517b82a87b..3a76a766f423 100644
--- a/arch/sh/mm/alignment.c
+++ b/arch/sh/mm/alignment.c
@@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file)
 static ssize_t alignment_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
-	int *data = PDE_DATA(file_inode(file));
+	int *data = pde_data(file_inode(file));
 	char mode;
 
 	if (count > 0) {
@@ -161,7 +161,7 @@ static const struct proc_ops alignment_proc_ops = {
 };
 
 /*
- * This needs to be done after sysctl_init, otherwise sys/ will be
+ * This needs to be done after sysctl_init_bases(), otherwise sys/ will be
  * overwritten.  Actually, this shouldn't be in sys/ at all since
  * it isn't a sysctl, and it doesn't contain sysctl information.
  * We now locate it in /proc/cpu/alignment instead.
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 4c1ca197e9c5..d16d6f5ec774 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -26,7 +26,7 @@
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
 
-static int asids_seq_show(struct seq_file *file, void *iter)
+static int asids_debugfs_show(struct seq_file *file, void *iter)
 {
 	struct task_struct *p;
 
@@ -48,18 +48,7 @@ static int asids_seq_show(struct seq_file *file, void *iter)
 	return 0;
 }
 
-static int asids_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, asids_seq_show, inode->i_private);
-}
-
-static const struct file_operations asids_debugfs_fops = {
-	.owner		= THIS_MODULE,
-	.open		= asids_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(asids_debugfs);
 
 static int __init asids_debugfs_init(void)
 {
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 17d780794497..b0f185169dfa 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -22,7 +22,7 @@ enum cache_type {
 	CACHE_TYPE_UNIFIED,
 };
 
-static int cache_seq_show(struct seq_file *file, void *iter)
+static int cache_debugfs_show(struct seq_file *file, void *iter)
 {
 	unsigned int cache_type = (unsigned int)file->private;
 	struct cache_info *cache;
@@ -94,18 +94,7 @@ static int cache_seq_show(struct seq_file *file, void *iter)
 	return 0;
 }
 
-static int cache_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cache_seq_show, inode->i_private);
-}
-
-static const struct file_operations cache_debugfs_fops = {
-	.owner		= THIS_MODULE,
-	.open		= cache_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(cache_debugfs);
 
 static int __init cache_debugfs_init(void)
 {
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index 8172a171d727..bc595982d396 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -12,12 +12,10 @@
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index eee911422cf9..72c2e1b46c08 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -16,7 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
-#include <asm/pgtable.h>
+#include <linux/pagemap.h>
 #include <asm/mmu_context.h>
 #include <asm/cache_insns.h>
 #include <asm/cacheflush.h>
@@ -183,7 +183,7 @@ static void sh4_flush_cache_all(void *unused)
  * accessed with (hence cache set) is in accord with the physical
  * address (i.e. tag).  It's no different here.
  *
- * Caller takes mm->mmap_sem.
+ * Caller takes mm->mmap_lock.
  */
 static void sh4_flush_cache_mm(void *arg)
 {
@@ -208,8 +208,6 @@ static void sh4_flush_cache_page(void *args)
 	struct page *page;
 	unsigned long address, pfn, phys;
 	int map_coherent = 0;
-	pgd_t *pgd;
-	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	void *vaddr;
@@ -223,9 +221,7 @@ static void sh4_flush_cache_page(void *args)
 	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 		return;
 
-	pgd = pgd_offset(vma->vm_mm, address);
-	pud = pud_offset(pgd, address);
-	pmd = pmd_offset(pud, address);
+	pmd = pmd_off(vma->vm_mm, address);
 	pte = pte_offset_kernel(pmd, address);
 
 	/* If the page isn't present, there is nothing to do here. */
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
deleted file mode 100644
index 445b5e69b73c..000000000000
--- a/arch/sh/mm/cache-sh5.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * arch/sh/mm/cache-sh5.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2002  Benedict Gaster
- * Copyright (C) 2003  Richard Curnow
- * Copyright (C) 2003 - 2008  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <asm/tlb.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/pgalloc.h>
-#include <linux/uaccess.h>
-#include <asm/mmu_context.h>
-
-extern void __weak sh4__flush_region_init(void);
-
-/* Wired TLB entry for the D-cache */
-static unsigned long long dtlb_cache_slot;
-
-/*
- * The following group of functions deal with mapping and unmapping a
- * temporary page into a DTLB slot that has been set aside for exclusive
- * use.
- */
-static inline void
-sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
-			   unsigned long paddr)
-{
-	local_irq_disable();
-	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
-}
-
-static inline void sh64_teardown_dtlb_cache_slot(void)
-{
-	sh64_teardown_tlb_slot(dtlb_cache_slot);
-	local_irq_enable();
-}
-
-static inline void sh64_icache_inv_all(void)
-{
-	unsigned long long addr, flag, data;
-	unsigned long flags;
-
-	addr = ICCR0;
-	flag = ICCR0_ICI;
-	data = 0;
-
-	/* Make this a critical section for safety (probably not strictly necessary.) */
-	local_irq_save(flags);
-
-	/* Without %1 it gets unexplicably wrong */
-	__asm__ __volatile__ (
-		"getcfg	%3, 0, %0\n\t"
-		"or	%0, %2, %0\n\t"
-		"putcfg	%3, 0, %0\n\t"
-		"synci"
-		: "=&r" (data)
-		: "0" (data), "r" (flag), "r" (addr));
-
-	local_irq_restore(flags);
-}
-
-static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
-{
-	/* Invalidate range of addresses [start,end] from the I-cache, where
-	 * the addresses lie in the kernel superpage. */
-
-	unsigned long long ullend, addr, aligned_start;
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) end;
-
-	while (addr <= ullend) {
-		__asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
-{
-	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
-	   Also, eaddr is page-aligned. */
-	unsigned int cpu = smp_processor_id();
-	unsigned long long addr, end_addr;
-	unsigned long flags = 0;
-	unsigned long running_asid, vma_asid;
-	addr = eaddr;
-	end_addr = addr + PAGE_SIZE;
-
-	/* Check whether we can use the current ASID for the I-cache
-	   invalidation.  For example, if we're called via
-	   access_process_vm->flush_cache_page->here, (e.g. when reading from
-	   /proc), 'running_asid' will be that of the reader, not of the
-	   victim.
-
-	   Also, note the risk that we might get pre-empted between the ASID
-	   compare and blocking IRQs, and before we regain control, the
-	   pid->ASID mapping changes.  However, the whole cache will get
-	   invalidated when the mapping is renewed, so the worst that can
-	   happen is that the loop below ends up invalidating somebody else's
-	   cache entries.
-	*/
-
-	running_asid = get_asid();
-	vma_asid = cpu_asid(cpu, vma->vm_mm);
-	if (running_asid != vma_asid) {
-		local_irq_save(flags);
-		switch_and_save_asid(vma_asid);
-	}
-	while (addr < end_addr) {
-		/* Worth unrolling a little */
-		__asm__ __volatile__("icbi %0,  0" : : "r" (addr));
-		__asm__ __volatile__("icbi %0, 32" : : "r" (addr));
-		__asm__ __volatile__("icbi %0, 64" : : "r" (addr));
-		__asm__ __volatile__("icbi %0, 96" : : "r" (addr));
-		addr += 128;
-	}
-	if (running_asid != vma_asid) {
-		switch_and_save_asid(running_asid);
-		local_irq_restore(flags);
-	}
-}
-
-static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
-			  unsigned long start, unsigned long end)
-{
-	/* Used for invalidating big chunks of I-cache, i.e. assume the range
-	   is whole pages.  If 'start' or 'end' is not page aligned, the code
-	   is conservative and invalidates to the ends of the enclosing pages.
-	   This is functionally OK, just a performance loss. */
-
-	/* See the comments below in sh64_dcache_purge_user_range() regarding
-	   the choice of algorithm.  However, for the I-cache option (2) isn't
-	   available because there are no physical tags so aliases can't be
-	   resolved.  The icbi instruction has to be used through the user
-	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
-	   would be cheaper to use the selective code for a large range than is
-	   possible with the D-cache.  Just assume 64 for now as a working
-	   figure.
-	   */
-	int n_pages;
-
-	if (!mm)
-		return;
-
-	n_pages = ((end - start) >> PAGE_SHIFT);
-	if (n_pages >= 64) {
-		sh64_icache_inv_all();
-	} else {
-		unsigned long aligned_start;
-		unsigned long eaddr;
-		unsigned long after_last_page_start;
-		unsigned long mm_asid, current_asid;
-		unsigned long flags = 0;
-
-		mm_asid = cpu_asid(smp_processor_id(), mm);
-		current_asid = get_asid();
-
-		if (mm_asid != current_asid) {
-			/* Switch ASID and run the invalidate loop under cli */
-			local_irq_save(flags);
-			switch_and_save_asid(mm_asid);
-		}
-
-		aligned_start = start & PAGE_MASK;
-		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
-
-		while (aligned_start < after_last_page_start) {
-			struct vm_area_struct *vma;
-			unsigned long vma_end;
-			vma = find_vma(mm, aligned_start);
-			if (!vma || (aligned_start <= vma->vm_end)) {
-				/* Avoid getting stuck in an error condition */
-				aligned_start += PAGE_SIZE;
-				continue;
-			}
-			vma_end = vma->vm_end;
-			if (vma->vm_flags & VM_EXEC) {
-				/* Executable */
-				eaddr = aligned_start;
-				while (eaddr < vma_end) {
-					sh64_icache_inv_user_page(vma, eaddr);
-					eaddr += PAGE_SIZE;
-				}
-			}
-			aligned_start = vma->vm_end; /* Skip to start of next region */
-		}
-
-		if (mm_asid != current_asid) {
-			switch_and_save_asid(current_asid);
-			local_irq_restore(flags);
-		}
-	}
-}
-
-static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
-{
-	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
-	   cache hit on the virtual tag the instruction ends there, without a
-	   TLB lookup. */
-
-	unsigned long long aligned_start;
-	unsigned long long ull_end;
-	unsigned long long addr;
-
-	ull_end = end;
-
-	/* Just invalidate over the range using the natural addresses.  TLB
-	   miss handling will be OK (TBC).  Since it's for the current process,
-	   either we're already in the right ASID context, or the ASIDs have
-	   been recycled since we were last active in which case we might just
-	   invalidate another processes I-cache entries : no worries, just a
-	   performance drop for him. */
-	aligned_start = L1_CACHE_ALIGN(start);
-	addr = aligned_start;
-	while (addr < ull_end) {
-		__asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
-		__asm__ __volatile__ ("nop");
-		__asm__ __volatile__ ("nop");
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-/* Buffer used as the target of alloco instructions to purge data from cache
-   sets by natural eviction. -- RPC */
-#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
-static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
-
-static inline void sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
-{
-	/* Purge all ways in a particular block of sets, specified by the base
-	   set number and number of sets.  Can handle wrap-around, if that's
-	   needed.  */
-
-	int dummy_buffer_base_set;
-	unsigned long long eaddr, eaddr0, eaddr1;
-	int j;
-	int set_offset;
-
-	dummy_buffer_base_set = ((int)&dummy_alloco_area &
-				 cpu_data->dcache.entry_mask) >>
-				 cpu_data->dcache.entry_shift;
-	set_offset = sets_to_purge_base - dummy_buffer_base_set;
-
-	for (j = 0; j < n_sets; j++, set_offset++) {
-		set_offset &= (cpu_data->dcache.sets - 1);
-		eaddr0 = (unsigned long long)dummy_alloco_area +
-			(set_offset << cpu_data->dcache.entry_shift);
-
-		/*
-		 * Do one alloco which hits the required set per cache
-		 * way.  For write-back mode, this will purge the #ways
-		 * resident lines.  There's little point unrolling this
-		 * loop because the allocos stall more if they're too
-		 * close together.
-		 */
-		eaddr1 = eaddr0 + cpu_data->dcache.way_size *
-				  cpu_data->dcache.ways;
-
-		for (eaddr = eaddr0; eaddr < eaddr1;
-		     eaddr += cpu_data->dcache.way_size) {
-			__asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
-			__asm__ __volatile__ ("synco"); /* TAKum03020 */
-		}
-
-		eaddr1 = eaddr0 + cpu_data->dcache.way_size *
-				  cpu_data->dcache.ways;
-
-		for (eaddr = eaddr0; eaddr < eaddr1;
-		     eaddr += cpu_data->dcache.way_size) {
-			/*
-			 * Load from each address.  Required because
-			 * alloco is a NOP if the cache is write-through.
-			 */
-			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
-				__raw_readb((unsigned long)eaddr);
-		}
-	}
-
-	/*
-	 * Don't use OCBI to invalidate the lines.  That costs cycles
-	 * directly.  If the dummy block is just left resident, it will
-	 * naturally get evicted as required.
-	 */
-}
-
-/*
- * Purge the entire contents of the dcache.  The most efficient way to
- * achieve this is to use alloco instructions on a region of unused
- * memory equal in size to the cache, thereby causing the current
- * contents to be discarded by natural eviction.  The alternative, namely
- * reading every tag, setting up a mapping for the corresponding page and
- * doing an OCBP for the line, would be much more expensive.
- */
-static void sh64_dcache_purge_all(void)
-{
-
-	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
-}
-
-
-/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
-   anything else in the kernel */
-#define MAGIC_PAGE0_START 0xffffffffec000000ULL
-
-/* Purge the physical page 'paddr' from the cache.  It's known that any
- * cache lines requiring attention have the same page colour as the the
- * address 'eaddr'.
- *
- * This relies on the fact that the D-cache matches on physical tags when
- * no virtual tag matches.  So we create an alias for the original page
- * and purge through that.  (Alternatively, we could have done this by
- * switching ASID to match the original mapping and purged through that,
- * but that involves ASID switching cost + probably a TLBMISS + refill
- * anyway.)
- */
-static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
-					        unsigned long eaddr)
-{
-	unsigned long long magic_page_start;
-	unsigned long long magic_eaddr, magic_eaddr_end;
-
-	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
-
-	/* As long as the kernel is not pre-emptible, this doesn't need to be
-	   under cli/sti. */
-	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
-
-	magic_eaddr = magic_page_start;
-	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
-
-	while (magic_eaddr < magic_eaddr_end) {
-		/* Little point in unrolling this loop - the OCBPs are blocking
-		   and won't go any quicker (i.e. the loop overhead is parallel
-		   to part of the OCBP execution.) */
-		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
-		magic_eaddr += L1_CACHE_BYTES;
-	}
-
-	sh64_teardown_dtlb_cache_slot();
-}
-
-/*
- * Purge a page given its physical start address, by creating a temporary
- * 1 page mapping and purging across that.  Even if we know the virtual
- * address (& vma or mm) of the page, the method here is more elegant
- * because it avoids issues of coping with page faults on the purge
- * instructions (i.e. no special-case code required in the critical path
- * in the TLB miss handling).
- */
-static void sh64_dcache_purge_phy_page(unsigned long paddr)
-{
-	unsigned long long eaddr_start, eaddr, eaddr_end;
-	int i;
-
-	/* As long as the kernel is not pre-emptible, this doesn't need to be
-	   under cli/sti. */
-	eaddr_start = MAGIC_PAGE0_START;
-	for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
-		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
-
-		eaddr = eaddr_start;
-		eaddr_end = eaddr + PAGE_SIZE;
-		while (eaddr < eaddr_end) {
-			__asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
-			eaddr += L1_CACHE_BYTES;
-		}
-
-		sh64_teardown_dtlb_cache_slot();
-		eaddr_start += PAGE_SIZE;
-	}
-}
-
-static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
-				unsigned long addr, unsigned long end)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-	spinlock_t *ptl;
-	unsigned long paddr;
-
-	if (!mm)
-		return; /* No way to find physical address of page */
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_bad(*pgd))
-		return;
-
-	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud) || pud_bad(*pud))
-		return;
-
-	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd) || pmd_bad(*pmd))
-		return;
-
-	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
-	do {
-		entry = *pte;
-		if (pte_none(entry) || !pte_present(entry))
-			continue;
-		paddr = pte_val(entry) & PAGE_MASK;
-		sh64_dcache_purge_coloured_phy_page(paddr, addr);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap_unlock(pte - 1, ptl);
-}
-
-/*
- * There are at least 5 choices for the implementation of this, with
- * pros (+), cons(-), comments(*):
- *
- * 1. ocbp each line in the range through the original user's ASID
- *    + no lines spuriously evicted
- *    - tlbmiss handling (must either handle faults on demand => extra
- *	special-case code in tlbmiss critical path), or map the page in
- *	advance (=> flush_tlb_range in advance to avoid multiple hits)
- *    - ASID switching
- *    - expensive for large ranges
- *
- * 2. temporarily map each page in the range to a special effective
- *    address and ocbp through the temporary mapping; relies on the
- *    fact that SH-5 OCB* always do TLB lookup and match on ptags (they
- *    never look at the etags)
- *    + no spurious evictions
- *    - expensive for large ranges
- *    * surely cheaper than (1)
- *
- * 3. walk all the lines in the cache, check the tags, if a match
- *    occurs create a page mapping to ocbp the line through
- *    + no spurious evictions
- *    - tag inspection overhead
- *    - (especially for small ranges)
- *    - potential cost of setting up/tearing down page mapping for
- *	every line that matches the range
- *    * cost partly independent of range size
- *
- * 4. walk all the lines in the cache, check the tags, if a match
- *    occurs use 4 * alloco to purge the line (+3 other probably
- *    innocent victims) by natural eviction
- *    + no tlb mapping overheads
- *    - spurious evictions
- *    - tag inspection overhead
- *
- * 5. implement like flush_cache_all
- *    + no tag inspection overhead
- *    - spurious evictions
- *    - bad for small ranges
- *
- * (1) can be ruled out as more expensive than (2).  (2) appears best
- * for small ranges.  The choice between (3), (4) and (5) for large
- * ranges and the range size for the large/small boundary need
- * benchmarking to determine.
- *
- * For now use approach (2) for small ranges and (5) for large ones.
- */
-static void sh64_dcache_purge_user_range(struct mm_struct *mm,
-			  unsigned long start, unsigned long end)
-{
-	int n_pages = ((end - start) >> PAGE_SHIFT);
-
-	if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
-		sh64_dcache_purge_all();
-	} else {
-		/* Small range, covered by a single page table page */
-		start &= PAGE_MASK;	/* should already be so */
-		end = PAGE_ALIGN(end);	/* should already be so */
-		sh64_dcache_purge_user_pages(mm, start, end);
-	}
-}
-
-/*
- * Invalidate the entire contents of both caches, after writing back to
- * memory any dirty data from the D-cache.
- */
-static void sh5_flush_cache_all(void *unused)
-{
-	sh64_dcache_purge_all();
-	sh64_icache_inv_all();
-}
-
-/*
- * Invalidate an entire user-address space from both caches, after
- * writing back dirty data (e.g. for shared mmap etc).
- *
- * This could be coded selectively by inspecting all the tags then
- * doing 4*alloco on any set containing a match (as for
- * flush_cache_range), but fork/exit/execve (where this is called from)
- * are expensive anyway.
- *
- * Have to do a purge here, despite the comments re I-cache below.
- * There could be odd-coloured dirty data associated with the mm still
- * in the cache - if this gets written out through natural eviction
- * after the kernel has reused the page there will be chaos.
- *
- * The mm being torn down won't ever be active again, so any Icache
- * lines tagged with its ASID won't be visible for the rest of the
- * lifetime of this ASID cycle.  Before the ASID gets reused, there
- * will be a flush_cache_all.  Hence we don't need to touch the
- * I-cache.  This is similar to the lack of action needed in
- * flush_tlb_mm - see fault.c.
- */
-static void sh5_flush_cache_mm(void *unused)
-{
-	sh64_dcache_purge_all();
-}
-
-/*
- * Invalidate (from both caches) the range [start,end) of virtual
- * addresses from the user address space specified by mm, after writing
- * back any dirty data.
- *
- * Note, 'end' is 1 byte beyond the end of the range to flush.
- */
-static void sh5_flush_cache_range(void *args)
-{
-	struct flusher_data *data = args;
-	struct vm_area_struct *vma;
-	unsigned long start, end;
-
-	vma = data->vma;
-	start = data->addr1;
-	end = data->addr2;
-
-	sh64_dcache_purge_user_range(vma->vm_mm, start, end);
-	sh64_icache_inv_user_page_range(vma->vm_mm, start, end);
-}
-
-/*
- * Invalidate any entries in either cache for the vma within the user
- * address space vma->vm_mm for the page starting at virtual address
- * 'eaddr'.   This seems to be used primarily in breaking COW.  Note,
- * the I-cache must be searched too in case the page in question is
- * both writable and being executed from (e.g. stack trampolines.)
- *
- * Note, this is called with pte lock held.
- */
-static void sh5_flush_cache_page(void *args)
-{
-	struct flusher_data *data = args;
-	struct vm_area_struct *vma;
-	unsigned long eaddr, pfn;
-
-	vma = data->vma;
-	eaddr = data->addr1;
-	pfn = data->addr2;
-
-	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
-
-	if (vma->vm_flags & VM_EXEC)
-		sh64_icache_inv_user_page(vma, eaddr);
-}
-
-static void sh5_flush_dcache_page(void *page)
-{
-	sh64_dcache_purge_phy_page(page_to_phys((struct page *)page));
-	wmb();
-}
-
-/*
- * Flush the range [start,end] of kernel virtual address space from
- * the I-cache.  The corresponding range must be purged from the
- * D-cache also because the SH-5 doesn't have cache snooping between
- * the caches.  The addresses will be visible through the superpage
- * mapping, therefore it's guaranteed that there no cache entries for
- * the range in cache sets of the wrong colour.
- */
-static void sh5_flush_icache_range(void *args)
-{
-	struct flusher_data *data = args;
-	unsigned long start, end;
-
-	start = data->addr1;
-	end = data->addr2;
-
-	__flush_purge_region((void *)start, end);
-	wmb();
-	sh64_icache_inv_kernel_range(start, end);
-}
-
-/*
- * For the address range [start,end), write back the data from the
- * D-cache and invalidate the corresponding region of the I-cache for the
- * current process.  Used to flush signal trampolines on the stack to
- * make them executable.
- */
-static void sh5_flush_cache_sigtramp(void *vaddr)
-{
-	unsigned long end = (unsigned long)vaddr + L1_CACHE_BYTES;
-
-	__flush_wback_region(vaddr, L1_CACHE_BYTES);
-	wmb();
-	sh64_icache_inv_current_user_range((unsigned long)vaddr, end);
-}
-
-void __init sh5_cache_init(void)
-{
-	local_flush_cache_all		= sh5_flush_cache_all;
-	local_flush_cache_mm		= sh5_flush_cache_mm;
-	local_flush_cache_dup_mm	= sh5_flush_cache_mm;
-	local_flush_cache_page		= sh5_flush_cache_page;
-	local_flush_cache_range		= sh5_flush_cache_range;
-	local_flush_dcache_page		= sh5_flush_dcache_page;
-	local_flush_icache_range	= sh5_flush_icache_range;
-	local_flush_cache_sigtramp	= sh5_flush_cache_sigtramp;
-
-	/* Reserve a slot for dcache colouring in the DTLB */
-	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
-
-	sh4__flush_region_init();
-}
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index ed25eba80667..9b63a53a5e46 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -13,15 +13,14 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/pagemap.h>
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 464f160a9576..3aef78ceb820 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -355,12 +355,6 @@ void __init cpu_cache_init(void)
 		}
 	}
 
-	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
-		extern void __weak sh5_cache_init(void);
-
-		sh5_cache_init();
-	}
-
 skip:
 	emit_cache_params();
 }
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 3169a343a5ab..0de206c1acfe 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
 		return -ENOMEM;
 	}
 
-	memset(buf, 0, memsize);
-
 	r->flags = IORESOURCE_MEM;
 	r->start = dma_handle;
 	r->end = r->start + memsize - 1;
diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c
deleted file mode 100644
index 7a3b4d33d2e7..000000000000
--- a/arch/sh/mm/extable_64.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * arch/sh/mm/extable_64.c
- *
- * Copyright (C) 2003 Richard Curnow
- * Copyright (C) 2003, 2004  Paul Mundt
- *
- * Cloned from the 2.5 SH version..
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/bsearch.h>
-#include <linux/rwsem.h>
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-extern unsigned long copy_user_memcpy, copy_user_memcpy_end;
-extern void __copy_user_fixup(void);
-
-static const struct exception_table_entry __copy_user_fixup_ex = {
-	.fixup = (unsigned long)&__copy_user_fixup,
-};
-
-/*
- * Some functions that may trap due to a bad user-mode address have too
- * many loads and stores in them to make it at all practical to label
- * each one and put them all in the main exception table.
- *
- * In particular, the fast memcpy routine is like this.  It's fix-up is
- * just to fall back to a slow byte-at-a-time copy, which is handled the
- * conventional way.  So it's functionally OK to just handle any trap
- * occurring in the fast memcpy with that fixup.
- */
-static const struct exception_table_entry *check_exception_ranges(unsigned long addr)
-{
-	if ((addr >= (unsigned long)&copy_user_memcpy) &&
-	    (addr <= (unsigned long)&copy_user_memcpy_end))
-		return &__copy_user_fixup_ex;
-
-	return NULL;
-}
-
-static int cmp_ex_search(const void *key, const void *elt)
-{
-	const struct exception_table_entry *_elt = elt;
-	unsigned long _key = *(unsigned long *)key;
-
-	/* avoid overflow */
-	if (_key > _elt->insn)
-		return 1;
-	if (_key < _elt->insn)
-		return -1;
-	return 0;
-}
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *base,
-		 const size_t num,
-		 unsigned long value)
-{
-	const struct exception_table_entry *mid;
-
-	mid = check_exception_ranges(value);
-	if (mid)
-		return mid;
-
-	return bsearch(&value, base, num,
-		       sizeof(struct exception_table_entry), cmp_ex_search);
-}
-
-int fixup_exception(struct pt_regs *regs)
-{
-	const struct exception_table_entry *fixup;
-
-	fixup = search_exception_tables(regs->pc);
-	if (fixup) {
-		regs->pc = fixup->fixup;
-		return 1;
-	}
-
-	return 0;
-}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 5f51456f4fc7..acd2f5e50bfc 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -47,12 +47,13 @@ static void show_pte(struct mm_struct *mm, unsigned long addr)
 			pgd = swapper_pg_dir;
 	}
 
-	printk(KERN_ALERT "pgd = %p\n", pgd);
+	pr_alert("pgd = %p\n", pgd);
 	pgd += pgd_index(addr);
-	printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
-	       (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
+	pr_alert("[%08lx] *pgd=%0*llx", addr, (u32)(sizeof(*pgd) * 2),
+		 (u64)pgd_val(*pgd));
 
 	do {
+		p4d_t *p4d;
 		pud_t *pud;
 		pmd_t *pmd;
 		pte_t *pte;
@@ -61,33 +62,46 @@ static void show_pte(struct mm_struct *mm, unsigned long addr)
 			break;
 
 		if (pgd_bad(*pgd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
-		pud = pud_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (PTRS_PER_P4D != 1)
+			pr_cont(", *p4d=%0*Lx", (u32)(sizeof(*p4d) * 2),
+			        (u64)p4d_val(*p4d));
+
+		if (p4d_none(*p4d))
+			break;
+
+		if (p4d_bad(*p4d)) {
+			pr_cont("(bad)");
+			break;
+		}
+
+		pud = pud_offset(p4d, addr);
 		if (PTRS_PER_PUD != 1)
-			printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
-			       (u64)pud_val(*pud));
+			pr_cont(", *pud=%0*llx", (u32)(sizeof(*pud) * 2),
+				(u64)pud_val(*pud));
 
 		if (pud_none(*pud))
 			break;
 
 		if (pud_bad(*pud)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (PTRS_PER_PMD != 1)
-			printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
-			       (u64)pmd_val(*pmd));
+			pr_cont(", *pmd=%0*llx", (u32)(sizeof(*pmd) * 2),
+				(u64)pmd_val(*pmd));
 
 		if (pmd_none(*pmd))
 			break;
 
 		if (pmd_bad(*pmd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
@@ -96,17 +110,18 @@ static void show_pte(struct mm_struct *mm, unsigned long addr)
 			break;
 
 		pte = pte_offset_kernel(pmd, addr);
-		printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
-		       (u64)pte_val(*pte));
+		pr_cont(", *pte=%0*llx", (u32)(sizeof(*pte) * 2),
+			(u64)pte_val(*pte));
 	} while (0);
 
-	printk("\n");
+	pr_cont("\n");
 }
 
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
 	unsigned index = pgd_index(address);
 	pgd_t *pgd_k;
+	p4d_t *p4d, *p4d_k;
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
@@ -116,8 +131,13 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	if (!pgd_present(*pgd_k))
 		return NULL;
 
-	pud = pud_offset(pgd, address);
-	pud_k = pud_offset(pgd_k, address);
+	p4d = p4d_offset(pgd, address);
+	p4d_k = p4d_offset(pgd_k, address);
+	if (!p4d_present(*p4d_k))
+		return NULL;
+
+	pud = pud_offset(p4d, address);
+	pud_k = pud_offset(p4d_k, address);
 	if (!pud_present(*pud_k))
 		return NULL;
 
@@ -188,14 +208,11 @@ show_fault_oops(struct pt_regs *regs, unsigned long address)
 	if (!oops_may_print())
 		return;
 
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
-
-	printk(KERN_CONT " at %08lx\n", address);
-	printk(KERN_ALERT "PC:");
+	pr_alert("BUG: unable to handle kernel %s at %08lx\n",
+		 address < PAGE_SIZE ? "NULL pointer dereference"
+				     : "paging request",
+		 address);
+	pr_alert("PC:");
 	printk_address(regs->pc, 1);
 
 	show_pte(NULL, address);
@@ -221,8 +238,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	show_fault_oops(regs, address);
 
 	die("Oops", regs, error_code);
-	bust_spinlocks(0);
-	do_exit(SIGKILL);
 }
 
 static void
@@ -261,7 +276,7 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	 * Something tried to access memory that isn't in our memory map..
 	 * Fix it, but check if it's kernel or user first..
 	 */
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 
 	__bad_area_nosemaphore(regs, error_code, address, si_code);
 }
@@ -285,7 +300,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 
 	/* Kernel mode? Handle exceptions or die: */
 	if (!user_mode(regs))
@@ -302,25 +317,25 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	 * Pagefault was interrupted by SIGKILL. We have no reason to
 	 * continue pagefault.
 	 */
-	if (fatal_signal_pending(current)) {
-		if (!(fault & VM_FAULT_RETRY))
-			up_read(&current->mm->mmap_sem);
+	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
 			no_context(regs, error_code, address);
 		return 1;
 	}
 
+	/* Release mmap_lock first if necessary */
+	if (!(fault & VM_FAULT_RETRY))
+		mmap_read_unlock(current->mm);
+
 	if (!(fault & VM_FAULT_ERROR))
 		return 0;
 
 	if (fault & VM_FAULT_OOM) {
 		/* Kernel mode? Handle exceptions or die: */
 		if (!user_mode(regs)) {
-			up_read(&current->mm->mmap_sem);
 			no_context(regs, error_code, address);
 			return 1;
 		}
-		up_read(&current->mm->mmap_sem);
 
 		/*
 		 * We ran out of memory, call the OOM killer, and return the
@@ -355,7 +370,7 @@ static inline int access_error(int error_code, struct vm_area_struct *vma)
 		return 1;
 
 	/* read, not present: */
-	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+	if (unlikely(!vma_is_accessible(vma)))
 		return 1;
 
 	return 0;
@@ -380,7 +395,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	vm_fault_t fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -424,7 +439,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	}
 
 retry:
-	down_read(&mm->mmap_sem);
+	mmap_read_lock(mm);
 
 	vma = find_vma(mm, address);
 	if (unlikely(!vma)) {
@@ -464,34 +479,26 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags, regs);
 
 	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
 		if (mm_fault_error(regs, error_code, address, fault))
 			return;
 
-	if (flags & FAULT_FLAG_ALLOW_RETRY) {
-		if (fault & VM_FAULT_MAJOR) {
-			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-				      regs, address);
-		} else {
-			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-				      regs, address);
-		}
-		if (fault & VM_FAULT_RETRY) {
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
-			flags |= FAULT_FLAG_TRIED;
-
-			/*
-			 * No need to up_read(&mm->mmap_sem) as we would
-			 * have already released it in __lock_page_or_retry
-			 * in mm/filemap.c.
-			 */
-			goto retry;
-		}
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		return;
+
+	if (fault & VM_FAULT_RETRY) {
+		flags |= FAULT_FLAG_TRIED;
+
+		/*
+		 * No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+		goto retry;
 	}
 
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 }
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 960deb1f24a1..999ab5916e69 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -17,26 +17,29 @@
 #include <linux/sysctl.h>
 
 #include <asm/mman.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd) {
-		pud = pud_alloc(mm, pgd, addr);
-		if (pud) {
-			pmd = pmd_alloc(mm, pud, addr);
-			if (pmd)
-				pte = pte_alloc_map(mm, pmd, addr);
+		p4d = p4d_alloc(mm, pgd, addr);
+		if (p4d) {
+			pud = pud_alloc(mm, p4d, addr);
+			if (pud) {
+				pmd = pmd_alloc(mm, pud, addr);
+				if (pmd)
+					pte = pte_alloc_map(mm, pmd, addr);
+			}
 		}
 	}
 
@@ -47,17 +50,21 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd) {
-		pud = pud_offset(pgd, addr);
-		if (pud) {
-			pmd = pmd_offset(pud, addr);
-			if (pmd)
-				pte = pte_offset_map(pmd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (p4d) {
+			pud = pud_offset(p4d, addr);
+			if (pud) {
+				pmd = pmd_offset(pud, addr);
+				if (pmd)
+					pte = pte_offset_map(pmd, addr);
+			}
 		}
 	}
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index d1b1ff2be17a..506784702430 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -27,7 +27,9 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/cache.h>
+#include <asm/pgalloc.h>
 #include <linux/sizes.h>
+#include "ioremap.h"
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
@@ -45,6 +47,7 @@ void __init __weak plat_mem_setup(void)
 static pte_t *__get_pte_phys(unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -54,7 +57,13 @@ static pte_t *__get_pte_phys(unsigned long addr)
 		return NULL;
 	}
 
-	pud = pud_alloc(NULL, pgd, addr);
+	p4d = p4d_alloc(NULL, pgd, addr);
+	if (unlikely(!p4d)) {
+		p4d_ERROR(*p4d);
+		return NULL;
+	}
+
+	pud = pud_alloc(NULL, p4d, addr);
 	if (unlikely(!pud)) {
 		pud_ERROR(*pud);
 		return NULL;
@@ -172,9 +181,9 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 	unsigned long vaddr;
 
 	vaddr = start;
-	i = __pgd_offset(vaddr);
-	j = __pud_offset(vaddr);
-	k = __pmd_offset(vaddr);
+	i = pgd_index(vaddr);
+	j = pud_index(vaddr);
+	k = pmd_index(vaddr);
 	pgd = pgd_base + i;
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
@@ -202,7 +211,7 @@ void __init allocate_pgdat(unsigned int nid)
 
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	NODE_DATA(nid) = memblock_alloc_try_nid(
 				sizeof(struct pglist_data),
 				SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
@@ -217,15 +226,12 @@ void __init allocate_pgdat(unsigned int nid)
 
 static void __init do_init_bootmem(void)
 {
-	struct memblock_region *reg;
+	unsigned long start_pfn, end_pfn;
+	int i;
 
 	/* Add active regions with valid PFNs. */
-	for_each_memblock(memory, reg) {
-		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock_region_memory_base_pfn(reg);
-		end_pfn = memblock_region_memory_end_pfn(reg);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL)
 		__add_active_range(0, start_pfn, end_pfn);
-	}
 
 	/* All of system RAM sits in node 0 for the non-NUMA case */
 	allocate_pgdat(0);
@@ -233,12 +239,6 @@ static void __init do_init_bootmem(void)
 
 	plat_mem_setup();
 
-	for_each_memblock(memory, reg) {
-		int nid = memblock_get_region_node(reg);
-
-		memory_present(nid, memblock_region_memory_base_pfn(reg),
-			memblock_region_memory_end_pfn(reg));
-	}
 	sparse_init();
 }
 
@@ -334,7 +334,7 @@ void __init paging_init(void)
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-	free_area_init_nodes(max_zone_pfns);
+	free_area_init(max_zone_pfns);
 }
 
 unsigned int mem_init_done = 0;
@@ -359,12 +359,8 @@ void __init mem_init(void)
 
 	vsyscall_init();
 
-	mem_init_print_info(NULL);
 	pr_info("virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#ifdef CONFIG_HIGHMEM
-		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#endif
 		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB) (cached)\n"
 #ifdef CONFIG_UNCACHED_MAPPING
@@ -376,11 +372,6 @@ void __init mem_init(void)
 		FIXADDR_START, FIXADDR_TOP,
 		(FIXADDR_TOP - FIXADDR_START) >> 10,
 
-#ifdef CONFIG_HIGHMEM
-		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
-		(LAST_PKMAP*PAGE_SIZE) >> 10,
-#endif
-
 		(unsigned long)VMALLOC_START, VMALLOC_END,
 		(VMALLOC_END - VMALLOC_START) >> 20,
 
@@ -406,31 +397,24 @@ void __init mem_init(void)
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size,
-			struct mhp_restrictions *restrictions)
+		    struct mhp_params *params)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
+	if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+		return -EINVAL;
+
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
+	ret = __add_pages(nid, start_pfn, nr_pages, params);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
 	return ret;
 }
 
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 addr)
-{
-	/* Node 0 for now.. */
-	return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index f6d02246d665..21342581144d 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -18,12 +18,59 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/io.h>
+#include <asm/io_trapped.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/addrspace.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
+#include "ioremap.h"
+
+/*
+ * On 32-bit SH, we traditionally have the whole physical address space mapped
+ * at all times (as MIPS does), so "ioremap()" and "iounmap()" do not need to do
+ * anything but place the address in the proper segment.  This is true for P1
+ * and P2 addresses, as well as some P3 ones.  However, most of the P3 addresses
+ * and newer cores using extended addressing need to map through page tables, so
+ * the ioremap() implementation becomes a bit more complicated.
+ */
+#ifdef CONFIG_29BIT
+static void __iomem *
+__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot)
+{
+	phys_addr_t last_addr = offset + size - 1;
+
+	/*
+	 * For P1 and P2 space this is trivial, as everything is already
+	 * mapped. Uncached access for P1 addresses are done through P2.
+	 * In the P3 case or for addresses outside of the 29-bit space,
+	 * mapping must be done by the PMB or by using page tables.
+	 */
+	if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) {
+		u64 flags = pgprot_val(prot);
+
+		/*
+		 * Anything using the legacy PTEA space attributes needs
+		 * to be kicked down to page table mappings.
+		 */
+		if (unlikely(flags & _PAGE_PCC_MASK))
+			return NULL;
+		if (unlikely(flags & _PAGE_CACHABLE))
+			return (void __iomem *)P1SEGADDR(offset);
+
+		return (void __iomem *)P2SEGADDR(offset);
+	}
+
+	/* P4 above the store queues are always mapped. */
+	if (unlikely(offset >= P3_ADDR_MAX))
+		return (void __iomem *)P4SEGADDR(offset);
+
+	return NULL;
+}
+#else
+#define __ioremap_29bit(offset, size, prot)		NULL
+#endif /* CONFIG_29BIT */
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
@@ -42,6 +89,14 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size,
 	unsigned long offset, last_addr, addr, orig_addr;
 	void __iomem *mapped;
 
+	mapped = __ioremap_trapped(phys_addr, size);
+	if (mapped)
+		return mapped;
+
+	mapped = __ioremap_29bit(phys_addr, size, pgprot);
+	if (mapped)
+		return mapped;
+
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
 	if (!size || last_addr < phys_addr)
diff --git a/arch/sh/mm/ioremap.h b/arch/sh/mm/ioremap.h
new file mode 100644
index 000000000000..f2544e721a35
--- /dev/null
+++ b/arch/sh/mm/ioremap.h
@@ -0,0 +1,23 @@
+#ifndef _SH_MM_IORMEMAP_H
+#define _SH_MM_IORMEMAP_H 1
+
+#ifdef CONFIG_IOREMAP_FIXED
+void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t);
+int iounmap_fixed(void __iomem *);
+void ioremap_fixed_init(void);
+#else
+static inline void __iomem *
+ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
+{
+	BUG();
+	return NULL;
+}
+static inline void ioremap_fixed_init(void)
+{
+}
+static inline int iounmap_fixed(void __iomem *addr)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_IOREMAP_FIXED */
+#endif /* _SH_MM_IORMEMAP_H */
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 07e744d75fa0..136113bcac25 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -18,12 +18,12 @@
 #include <linux/proc_fs.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/addrspace.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
+#include "ioremap.h"
 
 struct ioremap_map {
 	void __iomem *addr;
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 9e6b38b03cf7..73fd7cc99430 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -14,9 +14,6 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-#define kmap_get_fixmap_pte(vaddr)                                     \
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
-
 static pte_t *kmap_coherent_pte;
 
 void __init kmap_coherent_init(void)
@@ -25,7 +22,7 @@ void __init kmap_coherent_init(void)
 
 	/* cache the first coherent kmap pte */
 	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
-	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+	kmap_coherent_pte = virt_to_kpte(vaddr);
 }
 
 void *kmap_coherent(struct page *page, unsigned long addr)
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 6a1a1297baae..b82199878b45 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -19,6 +19,26 @@ unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
 
 #ifdef CONFIG_MMU
+static const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_NONE,
+	[VM_READ]					= PAGE_READONLY,
+	[VM_WRITE]					= PAGE_COPY,
+	[VM_WRITE | VM_READ]				= PAGE_COPY,
+	[VM_EXEC]					= PAGE_EXECREAD,
+	[VM_EXEC | VM_READ]				= PAGE_EXECREAD,
+	[VM_EXEC | VM_WRITE]				= PAGE_COPY,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_COPY,
+	[VM_SHARED]					= PAGE_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_READONLY,
+	[VM_SHARED | VM_WRITE]				= PAGE_WRITEONLY,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_SHARED,
+	[VM_SHARED | VM_EXEC]				= PAGE_EXECREAD,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_EXECREAD,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_RWX,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_RWX
+};
+DECLARE_VM_GET_PAGE_PROT
+
 /*
  * To avoid cache aliases, we map the shared page with same color.
  */
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index dca946f426c6..78c4b6e6d33b 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -10,7 +10,6 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/uaccess.h>
@@ -29,9 +28,9 @@ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
 	return 0;
 }
 
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+__kernel_size_t __clear_user(void __user *to, __kernel_size_t n)
 {
-	memset(to, 0, n);
+	memset((__force void *)to, 0, n);
 	return 0;
 }
 
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index f7e4439deb17..50f0dc1744d0 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -53,7 +53,4 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 
 	/* It's up */
 	node_set_online(nid);
-
-	/* Kick sparsemem */
-	sparse_memory_present_with_active_regions(nid);
 }
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 5c8f9247c3c2..cf7ce4b57359 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -2,8 +2,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO
-
 static struct kmem_cache *pgd_cachep;
 #if PAGETABLE_LEVELS > 2
 static struct kmem_cache *pmd_cachep;
@@ -13,6 +11,7 @@ void pgd_ctor(void *x)
 {
 	pgd_t *pgd = x;
 
+	memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
 	memcpy(pgd + USER_PTRS_PER_PGD,
 	       swapper_pg_dir + USER_PTRS_PER_PGD,
 	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
@@ -32,7 +31,7 @@ void pgtable_cache_init(void)
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP);
+	return kmem_cache_alloc(pgd_cachep, GFP_KERNEL);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -48,7 +47,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP);
+	return kmem_cache_alloc(pmd_cachep, GFP_KERNEL | __GFP_ZERO);
 }
 
 void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index b59bad86b31e..68eb7cc6e564 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -23,10 +23,10 @@
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
+#include <linux/pgtable.h>
 #include <asm/cacheflush.h>
 #include <linux/sizes.h>
 #include <linux/uaccess.h>
-#include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -812,7 +812,7 @@ bool __in_29bit_mode(void)
         return (__raw_readl(PMB_PASCR) & PASCR_SE) == 0;
 }
 
-static int pmb_seq_show(struct seq_file *file, void *iter)
+static int pmb_debugfs_show(struct seq_file *file, void *iter)
 {
 	int i;
 
@@ -846,18 +846,7 @@ static int pmb_seq_show(struct seq_file *file, void *iter)
 	return 0;
 }
 
-static int pmb_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmb_seq_show, NULL);
-}
-
-static const struct file_operations pmb_debugfs_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pmb_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(pmb_debugfs);
 
 static int __init pmb_debugfs_init(void)
 {
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 869243518bb3..fb400afc2a49 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -21,7 +21,6 @@
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
deleted file mode 100644
index e4bb2a8e0a69..000000000000
--- a/arch/sh/mm/tlb-sh5.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * arch/sh/mm/tlb-sh5.c
- *
- * Copyright (C) 2003  Paul Mundt <lethal@linux-sh.org>
- * Copyright (C) 2003  Richard Curnow <richard.curnow@superh.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <asm/page.h>
-#include <asm/tlb.h>
-#include <asm/mmu_context.h>
-
-/**
- * sh64_tlb_init - Perform initial setup for the DTLB and ITLB.
- */
-int sh64_tlb_init(void)
-{
-	/* Assign some sane DTLB defaults */
-	cpu_data->dtlb.entries	= 64;
-	cpu_data->dtlb.step	= 0x10;
-
-	cpu_data->dtlb.first	= DTLB_FIXED | cpu_data->dtlb.step;
-	cpu_data->dtlb.next	= cpu_data->dtlb.first;
-
-	cpu_data->dtlb.last	= DTLB_FIXED |
-				  ((cpu_data->dtlb.entries - 1) *
-				   cpu_data->dtlb.step);
-
-	/* And again for the ITLB */
-	cpu_data->itlb.entries	= 64;
-	cpu_data->itlb.step	= 0x10;
-
-	cpu_data->itlb.first	= ITLB_FIXED | cpu_data->itlb.step;
-	cpu_data->itlb.next	= cpu_data->itlb.first;
-	cpu_data->itlb.last	= ITLB_FIXED |
-				  ((cpu_data->itlb.entries - 1) *
-				   cpu_data->itlb.step);
-
-	return 0;
-}
-
-/**
- * sh64_next_free_dtlb_entry - Find the next available DTLB entry
- */
-unsigned long long sh64_next_free_dtlb_entry(void)
-{
-	return cpu_data->dtlb.next;
-}
-
-/**
- * sh64_get_wired_dtlb_entry - Allocate a wired (locked-in) entry in the DTLB
- */
-unsigned long long sh64_get_wired_dtlb_entry(void)
-{
-	unsigned long long entry = sh64_next_free_dtlb_entry();
-
-	cpu_data->dtlb.first += cpu_data->dtlb.step;
-	cpu_data->dtlb.next  += cpu_data->dtlb.step;
-
-	return entry;
-}
-
-/**
- * sh64_put_wired_dtlb_entry - Free a wired (locked-in) entry in the DTLB.
- *
- * @entry:	Address of TLB slot.
- *
- * Works like a stack, last one to allocate must be first one to free.
- */
-int sh64_put_wired_dtlb_entry(unsigned long long entry)
-{
-	__flush_tlb_slot(entry);
-
-	/*
-	 * We don't do any particularly useful tracking of wired entries,
-	 * so this approach works like a stack .. last one to be allocated
-	 * has to be the first one to be freed.
-	 *
-	 * We could potentially load wired entries into a list and work on
-	 * rebalancing the list periodically (which also entails moving the
-	 * contents of a TLB entry) .. though I have a feeling that this is
-	 * more trouble than it's worth.
-	 */
-
-	/*
-	 * Entry must be valid .. we don't want any ITLB addresses!
-	 */
-	if (entry <= DTLB_FIXED)
-		return -EINVAL;
-
-	/*
-	 * Next, check if we're within range to be freed. (ie, must be the
-	 * entry beneath the first 'free' entry!
-	 */
-	if (entry < (cpu_data->dtlb.first - cpu_data->dtlb.step))
-		return -EINVAL;
-
-	/* If we are, then bring this entry back into the list */
-	cpu_data->dtlb.first	-= cpu_data->dtlb.step;
-	cpu_data->dtlb.next	= entry;
-
-	return 0;
-}
-
-/**
- * sh64_setup_tlb_slot - Load up a translation in a wired slot.
- *
- * @config_addr:	Address of TLB slot.
- * @eaddr:		Virtual address.
- * @asid:		Address Space Identifier.
- * @paddr:		Physical address.
- *
- * Load up a virtual<->physical translation for @eaddr<->@paddr in the
- * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
- */
-void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
-			 unsigned long asid, unsigned long paddr)
-{
-	unsigned long long pteh, ptel;
-
-	pteh = neff_sign_extend(eaddr);
-	pteh &= PAGE_MASK;
-	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-	ptel = neff_sign_extend(paddr);
-	ptel &= PAGE_MASK;
-	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
-
-	asm volatile("putcfg %0, 1, %1\n\t"
-			"putcfg %0, 0, %2\n"
-			: : "r" (config_addr), "r" (ptel), "r" (pteh));
-}
-
-/**
- * sh64_teardown_tlb_slot - Teardown a translation.
- *
- * @config_addr:	Address of TLB slot.
- *
- * Teardown any existing mapping in the TLB slot @config_addr.
- */
-void sh64_teardown_tlb_slot(unsigned long long config_addr)
-	__attribute__ ((alias("__flush_tlb_slot")));
-
-static int dtlb_entry;
-static unsigned long long dtlb_entries[64];
-
-void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
-{
-	unsigned long long entry;
-	unsigned long paddr, flags;
-
-	BUG_ON(dtlb_entry == ARRAY_SIZE(dtlb_entries));
-
-	local_irq_save(flags);
-
-	entry = sh64_get_wired_dtlb_entry();
-	dtlb_entries[dtlb_entry++] = entry;
-
-	paddr = pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK;
-	paddr &= ~PAGE_MASK;
-
-	sh64_setup_tlb_slot(entry, addr, get_asid(), paddr);
-
-	local_irq_restore(flags);
-}
-
-void tlb_unwire_entry(void)
-{
-	unsigned long long entry;
-	unsigned long flags;
-
-	BUG_ON(!dtlb_entry);
-
-	local_irq_save(flags);
-	entry = dtlb_entries[dtlb_entry--];
-
-	sh64_teardown_tlb_slot(entry);
-	sh64_put_wired_dtlb_entry(entry);
-
-	local_irq_restore(flags);
-}
-
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
-{
-	unsigned long long ptel;
-	unsigned long long pteh=0;
-	struct tlb_info *tlbp;
-	unsigned long long next;
-	unsigned int fault_code = get_thread_fault_code();
-
-	/* Get PTEL first */
-	ptel = pte.pte_low;
-
-	/*
-	 * Set PTEH register
-	 */
-	pteh = neff_sign_extend(address & MMU_VPN_MASK);
-
-	/* Set the ASID. */
-	pteh |= get_asid() << PTEH_ASID_SHIFT;
-	pteh |= PTEH_VALID;
-
-	/* Set PTEL register, set_pte has performed the sign extension */
-	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
-
-	if (fault_code & FAULT_CODE_ITLB)
-		tlbp = &cpu_data->itlb;
-	else
-		tlbp = &cpu_data->dtlb;
-
-	next = tlbp->next;
-	__flush_tlb_slot(next);
-	asm volatile ("putcfg %0,1,%2\n\n\t"
-		      "putcfg %0,0,%1\n"
-		      :  : "r" (next), "r" (pteh), "r" (ptel) );
-
-	next += TLB_STEP;
-	if (next > tlbp->last)
-		next = tlbp->first;
-	tlbp->next = next;
-}
diff --git a/arch/sh/mm/tlbex_32.c b/arch/sh/mm/tlbex_32.c
index 382262dc0c4b..1c53868632ee 100644
--- a/arch/sh/mm/tlbex_32.c
+++ b/arch/sh/mm/tlbex_32.c
@@ -23,6 +23,7 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -42,7 +43,10 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long error_code,
 		pgd = pgd_offset(current->mm, address);
 	}
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none_or_clear_bad(p4d))
+		return 1;
+	pud = pud_offset(p4d, address);
 	if (pud_none_or_clear_bad(pud))
 		return 1;
 	pmd = pmd_offset(pud, address);
diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c
deleted file mode 100644
index 8ff966dd0c74..000000000000
--- a/arch/sh/mm/tlbex_64.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * The SH64 TLB miss.
- *
- * Original code from fault.c
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * Fast PTE->TLB refill path
- * Copyright (C) 2003 Richard.Curnow@superh.com
- *
- * IMPORTANT NOTES :
- * The do_fast_page_fault function is called from a context in entry.S
- * where very few registers have been saved.  In particular, the code in
- * this file must be compiled not to use ANY caller-save registers that
- * are not part of the restricted save set.  Also, it means that code in
- * this file must not make calls to functions elsewhere in the kernel, or
- * else the excepting context will see corruption in its caller-save
- * registers.  Plus, the entry.S save area is non-reentrant, so this code
- * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
- * on any exception.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/kprobes.h>
-#include <asm/tlb.h>
-#include <asm/io.h>
-#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-
-static int handle_tlbmiss(unsigned long long protection_flags,
-			  unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	if (is_vmalloc_addr((void *)address)) {
-		pgd = pgd_offset_k(address);
-	} else {
-		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
-
-		pgd = pgd_offset(current->mm, address);
-	}
-
-	pud = pud_offset(pgd, address);
-	if (pud_none(*pud) || !pud_present(*pud))
-		return 1;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd) || !pmd_present(*pmd))
-		return 1;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-	if (pte_none(entry) || !pte_present(entry))
-		return 1;
-
-	/*
-	 * If the page doesn't have sufficient protection bits set to
-	 * service the kind of fault being handled, there's not much
-	 * point doing the TLB refill.  Punt the fault to the general
-	 * handler.
-	 */
-	if ((pte_val(entry) & protection_flags) != protection_flags)
-		return 1;
-
-	update_mmu_cache(NULL, address, pte);
-
-	return 0;
-}
-
-/*
- * Put all this information into one structure so that everything is just
- * arithmetic relative to a single base address.  This reduces the number
- * of movi/shori pairs needed just to load addresses of static data.
- */
-struct expevt_lookup {
-	unsigned short protection_flags[8];
-	unsigned char  is_text_access[8];
-	unsigned char  is_write_access[8];
-};
-
-#define PRU (1<<9)
-#define PRW (1<<8)
-#define PRX (1<<7)
-#define PRR (1<<6)
-
-/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
-   the fault happened in user mode or privileged mode. */
-static struct expevt_lookup expevt_lookup_table = {
-	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
-	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
-};
-
-static inline unsigned int
-expevt_to_fault_code(unsigned long expevt)
-{
-	if (expevt == 0xa40)
-		return FAULT_CODE_ITLB;
-	else if (expevt == 0x060)
-		return FAULT_CODE_WRITE;
-
-	return 0;
-}
-
-/*
-   This routine handles page faults that can be serviced just by refilling a
-   TLB entry from an existing page table entry.  (This case represents a very
-   large majority of page faults.) Return 1 if the fault was successfully
-   handled.  Return 0 if the fault could not be handled.  (This leads into the
-   general fault handling in fault.c which deals with mapping file-backed
-   pages, stack growth, segmentation faults, swapping etc etc)
- */
-asmlinkage int __kprobes
-do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt,
-		   unsigned long address)
-{
-	unsigned long long protection_flags;
-	unsigned long long index;
-	unsigned long long expevt4;
-	unsigned int fault_code;
-
-	/* The next few lines implement a way of hashing EXPEVT into a
-	 * small array index which can be used to lookup parameters
-	 * specific to the type of TLBMISS being handled.
-	 *
-	 * Note:
-	 *	ITLBMISS has EXPEVT==0xa40
-	 *	RTLBMISS has EXPEVT==0x040
-	 *	WTLBMISS has EXPEVT==0x060
-	 */
-	expevt4 = (expevt >> 4);
-	/* TODO : xor ssr_md into this expression too. Then we can check
-	 * that PRU is set when it needs to be. */
-	index = expevt4 ^ (expevt4 >> 5);
-	index &= 7;
-
-	fault_code = expevt_to_fault_code(expevt);
-
-	protection_flags = expevt_lookup_table.protection_flags[index];
-
-	if (expevt_lookup_table.is_text_access[index])
-		fault_code |= FAULT_CODE_ITLB;
-	if (!ssr_md)
-		fault_code |= FAULT_CODE_USER;
-
-	set_thread_fault_code(fault_code);
-
-	return handle_tlbmiss(protection_flags, address);
-}
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
deleted file mode 100644
index bd0715d5dca4..000000000000
--- a/arch/sh/mm/tlbflush_64.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/sh/mm/tlb-flush_64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003  Richard Curnow (/proc/tlb, bug fixes)
- * Copyright (C) 2003 - 2012 Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/signal.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/perf_event.h>
-#include <linux/interrupt.h>
-#include <asm/io.h>
-#include <asm/tlb.h>
-#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-
-void local_flush_tlb_one(unsigned long asid, unsigned long page)
-{
-	unsigned long long match, pteh=0, lpage;
-	unsigned long tlb;
-
-	/*
-	 * Sign-extend based on neff.
-	 */
-	lpage = neff_sign_extend(page);
-	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-	match |= lpage;
-
-	for_each_itlb_entry(tlb) {
-		asm volatile ("getcfg	%1, 0, %0"
-			      : "=r" (pteh)
-			      : "r" (tlb) );
-
-		if (pteh == match) {
-			__flush_tlb_slot(tlb);
-			break;
-		}
-	}
-
-	for_each_dtlb_entry(tlb) {
-		asm volatile ("getcfg	%1, 0, %0"
-			      : "=r" (pteh)
-			      : "r" (tlb) );
-
-		if (pteh == match) {
-			__flush_tlb_slot(tlb);
-			break;
-		}
-
-	}
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
-{
-	unsigned long flags;
-
-	if (vma->vm_mm) {
-		page &= PAGE_MASK;
-		local_irq_save(flags);
-		local_flush_tlb_one(get_asid(), page);
-		local_irq_restore(flags);
-	}
-}
-
-void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			   unsigned long end)
-{
-	unsigned long flags;
-	unsigned long long match, pteh=0, pteh_epn, pteh_low;
-	unsigned long tlb;
-	unsigned int cpu = smp_processor_id();
-	struct mm_struct *mm;
-
-	mm = vma->vm_mm;
-	if (cpu_context(cpu, mm) == NO_CONTEXT)
-		return;
-
-	local_irq_save(flags);
-
-	start &= PAGE_MASK;
-	end &= PAGE_MASK;
-
-	match = (cpu_asid(cpu, mm) << PTEH_ASID_SHIFT) | PTEH_VALID;
-
-	/* Flush ITLB */
-	for_each_itlb_entry(tlb) {
-		asm volatile ("getcfg	%1, 0, %0"
-			      : "=r" (pteh)
-			      : "r" (tlb) );
-
-		pteh_epn = pteh & PAGE_MASK;
-		pteh_low = pteh & ~PAGE_MASK;
-
-		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
-			__flush_tlb_slot(tlb);
-	}
-
-	/* Flush DTLB */
-	for_each_dtlb_entry(tlb) {
-		asm volatile ("getcfg	%1, 0, %0"
-			      : "=r" (pteh)
-			      : "r" (tlb) );
-
-		pteh_epn = pteh & PAGE_MASK;
-		pteh_low = pteh & ~PAGE_MASK;
-
-		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
-			__flush_tlb_slot(tlb);
-	}
-
-	local_irq_restore(flags);
-}
-
-void local_flush_tlb_mm(struct mm_struct *mm)
-{
-	unsigned long flags;
-	unsigned int cpu = smp_processor_id();
-
-	if (cpu_context(cpu, mm) == NO_CONTEXT)
-		return;
-
-	local_irq_save(flags);
-
-	cpu_context(cpu, mm) = NO_CONTEXT;
-	if (mm == current->mm)
-		activate_context(mm, cpu);
-
-	local_irq_restore(flags);
-}
-
-void local_flush_tlb_all(void)
-{
-	/* Invalidate all, including shared pages, excluding fixed TLBs */
-	unsigned long flags, tlb;
-
-	local_irq_save(flags);
-
-	/* Flush each ITLB entry */
-	for_each_itlb_entry(tlb)
-		__flush_tlb_slot(tlb);
-
-	/* Flush each DTLB entry */
-	for_each_dtlb_entry(tlb)
-		__flush_tlb_slot(tlb);
-
-	local_irq_restore(flags);
-}
-
-void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-        /* FIXME: Optimize this later.. */
-        flush_tlb_all();
-}
-
-void __flush_tlb_global(void)
-{
-	flush_tlb_all();
-}