aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig51
-rw-r--r--arch/arm/mm/Makefile6
-rw-r--r--arch/arm/mm/cache-fa.S220
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c54
-rw-r--r--arch/arm/mm/cache-xsc3l2.c107
-rw-r--r--arch/arm/mm/copypage-fa.c86
-rw-r--r--arch/arm/mm/dma-mapping.c72
-rw-r--r--arch/arm/mm/flush.c2
-rw-r--r--arch/arm/mm/highmem.c116
-rw-r--r--arch/arm/mm/init.c21
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/mm/mmu.c43
-rw-r--r--arch/arm/mm/proc-fa526.S248
-rw-r--r--arch/arm/mm/proc-mohawk.S416
-rw-r--r--arch/arm/mm/tlb-fa.S75
15 files changed, 1464 insertions, 56 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c01..20979564e7ee 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -186,6 +186,24 @@ config CPU_ARM926T
Say Y if you want support for the ARM926T processor.
Otherwise, say N.
+# FA526
+config CPU_FA526
+ bool
+ select CPU_32v4
+ select CPU_ABRT_EV4
+ select CPU_PABRT_NOIFAR
+ select CPU_CACHE_VIVT
+ select CPU_CP15_MMU
+ select CPU_CACHE_FA
+ select CPU_COPY_FA if MMU
+ select CPU_TLB_FA if MMU
+ help
+ The FA526 is a version of the ARMv4 compatible processor with
+ Branch Target Buffer, Unified TLB and cache line size 16.
+
+ Say Y if you want support for the FA526 processor.
+ Otherwise, say N.
+
# ARM940T
config CPU_ARM940T
bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -340,6 +358,17 @@ config CPU_XSC3
select CPU_TLB_V4WBI if MMU
select IO_36
+# Marvell PJ1 (Mohawk)
+config CPU_MOHAWK
+ bool
+ select CPU_32v5
+ select CPU_ABRT_EV5T
+ select CPU_PABRT_NOIFAR
+ select CPU_CACHE_VIVT
+ select CPU_CP15_MMU
+ select CPU_TLB_V4WBI if MMU
+ select CPU_COPY_V4WB if MMU
+
# Feroceon
config CPU_FEROCEON
bool
@@ -484,6 +513,9 @@ config CPU_CACHE_VIVT
config CPU_CACHE_VIPT
bool
+config CPU_CACHE_FA
+ bool
+
if MMU
# The copy-page model
config CPU_COPY_V3
@@ -498,6 +530,9 @@ config CPU_COPY_V4WB
config CPU_COPY_FEROCEON
bool
+config CPU_COPY_FA
+ bool
+
config CPU_COPY_V6
bool
@@ -528,6 +563,13 @@ config CPU_TLB_FEROCEON
help
Feroceon TLB (v4wbi with non-outer-cachable page table walks).
+config CPU_TLB_FA
+ bool
+ help
+ Faraday ARM FA526 architecture, unified TLB with writeback cache
+ and invalidate instruction cache entry. Branch target buffer is
+ also supported.
+
config CPU_TLB_V6
bool
@@ -569,7 +611,7 @@ comment "Processor Features"
config ARM_THUMB
bool "Support Thumb user binaries"
- depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7 || CPU_FEROCEON
+ depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON
default y
help
Say Y if you want to include kernel support for running user space
@@ -638,7 +680,7 @@ config CPU_DCACHE_SIZE
config CPU_DCACHE_WRITETHROUGH
bool "Force write through D-cache"
- depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+ depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
default y if CPU_ARM925T
help
Say Y here to use the data cache in writethrough mode. Unless you
@@ -653,7 +695,7 @@ config CPU_CACHE_ROUND_ROBIN
config CPU_BPREDICT_DISABLE
bool "Disable branch prediction"
- depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+ depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526
help
Say Y here to disable branch prediction. If unsure, say N.
@@ -704,7 +746,8 @@ config CACHE_FEROCEON_L2_WRITETHROUGH
config CACHE_L2X0
bool "Enable the L2x0 outer cache controller"
- depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || REALVIEW_EB_A9MP
+ depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \
+ REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31
default y
select OUTER_CACHE
help
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611a..63e3f6dd0e21 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o
obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o
obj-$(CONFIG_DISCONTIGMEM) += discontig.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o
obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o
@@ -32,6 +33,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o
obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o
obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o
obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o
obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
@@ -41,6 +43,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o
obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
@@ -49,6 +52,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o
obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions
obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o
obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o
obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o
obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o
@@ -62,6 +66,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o
obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o
obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o
obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o
+obj-$(CONFIG_CPU_FA526) += proc-fa526.o
obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o
obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o
obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o
@@ -70,6 +75,7 @@ obj-$(CONFIG_CPU_SA110) += proc-sa110.o
obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o
obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o
obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o
+obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o
obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o
obj-$(CONFIG_CPU_V6) += proc-v6.o
obj-$(CONFIG_CPU_V7) += proc-v7.o
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 000000000000..b63a8f7b95cf
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,220 @@
+/*
+ * linux/arch/arm/mm/cache-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ * Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE 16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE 8192
+#else
+#define CACHE_DSIZE 16384
+#endif
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT (CACHE_DSIZE * 2)
+
+/*
+ * flush_user_cache_all()
+ *
+ * Clean and invalidate all cache entries in a particular address
+ * space.
+ */
+ENTRY(fa_flush_user_cache_all)
+ /* FALLTHROUGH */
+/*
+ * flush_kern_cache_all()
+ *
+ * Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+ mov ip, #0
+ mov r2, #VM_EXEC
+__flush_whole_cache:
+ mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_user_cache_range(start, end, flags)
+ *
+ * Invalidate a range of cache entries in the specified
+ * address space.
+ *
+ * - start - start address (inclusive, page aligned)
+ * - end - end address (exclusive, page aligned)
+ * - flags - vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+ mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT @ total size >= limit?
+ bhs __flush_whole_cache @ flush whole D cache
+
+1: tst r2, #VM_EXEC
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * coherent_kern_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+ /* fall through */
+
+/*
+ * coherent_user_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_kern_dcache_page(kaddr)
+ *
+ * Ensure that the data held in the page kaddr is written back
+ * to the page in question.
+ *
+ * - kaddr - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(fa_flush_kern_dcache_page)
+ add r1, r0, #PAGE_SZ
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_inv_range)
+ tst r0, #CACHE_DLINESIZE - 1
+ bic r0, r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ bic r1, r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean (write back) the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_dma_clean_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_cache_fns, #object
+ENTRY(fa_cache_fns)
+ .long fa_flush_kern_cache_all
+ .long fa_flush_user_cache_all
+ .long fa_flush_user_cache_range
+ .long fa_coherent_kern_range
+ .long fa_coherent_user_range
+ .long fa_flush_kern_dcache_page
+ .long fa_dma_inv_range
+ .long fa_dma_clean_range
+ .long fa_dma_flush_range
+ .size fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 80cd207cbaea..d6dd83826f8a 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -14,8 +14,12 @@
#include <linux/init.h>
#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#include <plat/cache-feroceon-l2.h>
-
+#include "mm.h"
/*
* Low-level cache maintenance operations.
@@ -34,14 +38,36 @@
* The range operations require two successive cp15 writes, in
* between which we don't want to be preempted.
*/
+
+static inline unsigned long l2_start_va(unsigned long paddr)
+{
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Let's do our own fixmap stuff in a minimal way here.
+ * Because range ops can't be done on physical addresses,
+ * we simply install a virtual mapping for it only for the
+ * TLB lookup to occur, hence no need to flush the untouched
+ * memory mapping. This is protected with the disabling of
+ * interrupts by the caller.
+ */
+ unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+ unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
+ local_flush_tlb_kernel_page(vaddr);
+ return vaddr + (paddr & ~PAGE_MASK);
+#else
+ return __phys_to_virt(paddr);
+#endif
+}
+
static inline void l2_clean_pa(unsigned long addr)
{
__asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
}
-static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
{
- unsigned long flags;
+ unsigned long va_start, va_end, flags;
/*
* Make sure 'start' and 'end' reference the same page, as
@@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
BUG_ON((start ^ end) >> PAGE_SHIFT);
raw_local_irq_save(flags);
+ va_start = l2_start_va(start);
+ va_end = va_start + (end - start);
__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
"mcr p15, 1, %1, c15, c9, 5"
- : : "r" (start), "r" (end));
+ : : "r" (va_start), "r" (va_end));
raw_local_irq_restore(flags);
}
-static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
-{
- l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end));
-}
-
static inline void l2_clean_inv_pa(unsigned long addr)
{
__asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
@@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr)
__asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr));
}
-static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
{
- unsigned long flags;
+ unsigned long va_start, va_end, flags;
/*
* Make sure 'start' and 'end' reference the same page, as
@@ -84,17 +107,14 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
BUG_ON((start ^ end) >> PAGE_SHIFT);
raw_local_irq_save(flags);
+ va_start = l2_start_va(start);
+ va_end = va_start + (end - start);
__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
"mcr p15, 1, %1, c15, c11, 5"
- : : "r" (start), "r" (end));
+ : : "r" (va_start), "r" (va_end));
raw_local_irq_restore(flags);
}
-static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
-{
- l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end));
-}
-
/*
* Linux primitives.
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index 464de893a988..5d180cb0bd94 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -17,12 +17,14 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-
#include <asm/system.h>
#include <asm/cputype.h>
#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
#define CR_L2 (1 << 26)
@@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr)
__asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr));
}
-static inline void xsc3_l2_clean_pa(unsigned long addr)
-{
- xsc3_l2_clean_mva(__phys_to_virt(addr));
-}
-
static inline void xsc3_l2_inv_mva(unsigned long addr)
{
__asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr));
}
-static inline void xsc3_l2_inv_pa(unsigned long addr)
-{
- xsc3_l2_inv_mva(__phys_to_virt(addr));
-}
-
static inline void xsc3_l2_inv_all(void)
{
unsigned long l2ctype, set_way;
@@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void)
dsb();
}
+#ifdef CONFIG_HIGHMEM
+#define l2_map_save_flags(x) raw_local_save_flags(x)
+#define l2_map_restore_flags(x) raw_local_irq_restore(x)
+#else
+#define l2_map_save_flags(x) ((x) = 0)
+#define l2_map_restore_flags(x) ((void)(x))
+#endif
+
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
+ unsigned long flags)
+{
+#ifdef CONFIG_HIGHMEM
+ unsigned long va = prev_va & PAGE_MASK;
+ unsigned long pa_offset = pa << (32 - PAGE_SHIFT);
+ if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) {
+ /*
+ * Switching to a new page. Because cache ops are
+ * using virtual addresses only, we must put a mapping
+ * in place for it. We also enable interrupts for a
+ * short while and disable them again to protect this
+ * mapping.
+ */
+ unsigned long idx;
+ raw_local_irq_restore(flags);
+ idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+ va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ raw_local_irq_restore(flags | PSR_I_BIT);
+ set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
+ local_flush_tlb_kernel_page(va);
+ }
+ return va + (pa_offset >> (32 - PAGE_SHIFT));
+#else
+ return __phys_to_virt(pa);
+#endif
+}
+
static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
{
+ unsigned long vaddr, flags;
+
if (start == 0 && end == -1ul) {
xsc3_l2_inv_all();
return;
}
+ vaddr = -1; /* to force the first mapping */
+ l2_map_save_flags(flags);
+
/*
* Clean and invalidate partial first cache line.
*/
if (start & (CACHE_LINE_SIZE - 1)) {
- xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1));
- xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+ vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
start = (start | (CACHE_LINE_SIZE - 1)) + 1;
}
/*
- * Clean and invalidate partial last cache line.
+ * Invalidate all full cache lines between 'start' and 'end'.
*/
- if (start < end && (end & (CACHE_LINE_SIZE - 1))) {
- xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1));
- xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
- end &= ~(CACHE_LINE_SIZE - 1);
+ while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
+ vaddr = l2_map_va(start, vaddr, flags);
+ xsc3_l2_inv_mva(vaddr);
+ start += CACHE_LINE_SIZE;
}
/*
- * Invalidate all full cache lines between 'start' and 'end'.
+ * Clean and invalidate partial last cache line.
*/
- while (start < end) {
- xsc3_l2_inv_pa(start);
- start += CACHE_LINE_SIZE;
+ if (start < end) {
+ vaddr = l2_map_va(start, vaddr, flags);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
}
+ l2_map_restore_flags(flags);
+
dsb();
}
static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
{
+ unsigned long vaddr, flags;
+
+ vaddr = -1; /* to force the first mapping */
+ l2_map_save_flags(flags);
+
start &= ~(CACHE_LINE_SIZE - 1);
while (start < end) {
- xsc3_l2_clean_pa(start);
+ vaddr = l2_map_va(start, vaddr, flags);
+ xsc3_l2_clean_mva(vaddr);
start += CACHE_LINE_SIZE;
}
+ l2_map_restore_flags(flags);
+
dsb();
}
@@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void)
static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
{
+ unsigned long vaddr, flags;
+
if (start == 0 && end == -1ul) {
xsc3_l2_flush_all();
return;
}
+ vaddr = -1; /* to force the first mapping */
+ l2_map_save_flags(flags);
+
start &= ~(CACHE_LINE_SIZE - 1);
while (start < end) {
- xsc3_l2_clean_pa(start);
- xsc3_l2_inv_pa(start);
+ vaddr = l2_map_va(start, vaddr, flags);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
start += CACHE_LINE_SIZE;
}
+ l2_map_restore_flags(flags);
+
dsb();
}
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 000000000000..b2a6008b0111
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ * linux/arch/arm/lib/copypage-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+ asm("\
+ stmfd sp!, {r4, lr} @ 2\n\
+ mov r2, %0 @ 1\n\
+1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ subs r2, r2, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\
+ ldmfd sp!, {r4, pc} @ 3"
+ :
+ : "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr)
+{
+ void *kto, *kfrom;
+
+ kto = kmap_atomic(to, KM_USER0);
+ kfrom = kmap_atomic(from, KM_USER1);
+ fa_copy_user_page(kto, kfrom);
+ kunmap_atomic(kfrom, KM_USER1);
+ kunmap_atomic(kto, KM_USER0);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+ void *ptr, *kaddr = kmap_atomic(page, KM_USER0);
+ asm volatile("\
+ mov r1, %2 @ 1\n\
+ mov r2, #0 @ 1\n\
+ mov r3, #0 @ 1\n\
+ mov ip, #0 @ 1\n\
+ mov lr, #0 @ 1\n\
+1: stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ subs r1, r1, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 32)
+ : "r1", "r2", "r3", "ip", "lr");
+ kunmap_atomic(kaddr, KM_USER0);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+ .cpu_clear_user_highpage = fa_clear_user_highpage,
+ .cpu_copy_user_highpage = fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f1ef5613ccd4..510c179b0ac8 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -19,6 +19,7 @@
#include <linux/dma-mapping.h>
#include <asm/memory.h>
+#include <asm/highmem.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/sizes.h>
@@ -517,6 +518,74 @@ void dma_cache_maint(const void *start, size_t size, int direction)
}
EXPORT_SYMBOL(dma_cache_maint);
+static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
+ size_t size, int direction)
+{
+ void *vaddr;
+ unsigned long paddr;
+ void (*inner_op)(const void *, const void *);
+ void (*outer_op)(unsigned long, unsigned long);
+
+ switch (direction) {
+ case DMA_FROM_DEVICE: /* invalidate only */
+ inner_op = dmac_inv_range;
+ outer_op = outer_inv_range;
+ break;
+ case DMA_TO_DEVICE: /* writeback only */
+ inner_op = dmac_clean_range;
+ outer_op = outer_clean_range;
+ break;
+ case DMA_BIDIRECTIONAL: /* writeback and invalidate */
+ inner_op = dmac_flush_range;
+ outer_op = outer_flush_range;
+ break;
+ default:
+ BUG();
+ }
+
+ if (!PageHighMem(page)) {
+ vaddr = page_address(page) + offset;
+ inner_op(vaddr, vaddr + size);
+ } else {
+ vaddr = kmap_high_get(page);
+ if (vaddr) {
+ vaddr += offset;
+ inner_op(vaddr, vaddr + size);
+ kunmap_high(page);
+ }
+ }
+
+ paddr = page_to_phys(page) + offset;
+ outer_op(paddr, paddr + size);
+}
+
+void dma_cache_maint_page(struct page *page, unsigned long offset,
+ size_t size, int dir)
+{
+ /*
+ * A single sg entry may refer to multiple physically contiguous
+ * pages. But we still need to process highmem pages individually.
+ * If highmem is not configured then the bulk of this loop gets
+ * optimized out.
+ */
+ size_t left = size;
+ do {
+ size_t len = left;
+ if (PageHighMem(page) && len + offset > PAGE_SIZE) {
+ if (offset >= PAGE_SIZE) {
+ page += offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+ }
+ len = PAGE_SIZE - offset;
+ }
+ dma_cache_maint_contiguous(page, offset, len, dir);
+ offset = 0;
+ page++;
+ left -= len;
+ } while (left);
+}
+EXPORT_SYMBOL(dma_cache_maint_page);
+
/**
* dma_map_sg - map a set of SG buffers for streaming mode DMA
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -614,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
continue;
if (!arch_is_coherent())
- dma_cache_maint(sg_virt(s), s->length, dir);
+ dma_cache_maint_page(sg_page(s), s->offset,
+ s->length, dir);
}
}
EXPORT_SYMBOL(dma_sync_sg_for_device);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 0fa9bf388f0b..4e283481cee1 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page)
struct address_space *mapping = page_mapping(page);
#ifndef CONFIG_SMP
- if (mapping && !mapping_mapped(mapping))
+ if (!PageHighMem(page) && mapping && !mapping_mapped(mapping))
set_bit(PG_dcache_dirty, &page->flags);
else
#endif
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
new file mode 100644
index 000000000000..a34954d9df7d
--- /dev/null
+++ b/arch/arm/mm/highmem.c
@@ -0,0 +1,116 @@
+/*
+ * arch/arm/mm/highmem.c -- ARM highmem support
+ *
+ * Author: Nicolas Pitre
+ * Created: september 8, 2008
+ * Copyright: Marvell Semiconductors Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
+
+void *kmap(struct page *page)
+{
+ might_sleep();
+ if (!PageHighMem(page))
+ return page_address(page);
+ return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+ BUG_ON(in_interrupt());
+ if (!PageHighMem(page))
+ return;
+ kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+ unsigned int idx;
+ unsigned long vaddr;
+
+ pagefault_disable();
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ /*
+ * With debugging enabled, kunmap_atomic forces that entry to 0.
+ * Make sure it was indeed properly unmapped.
+ */
+ BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+ set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0);
+ /*
+ * When debugging is off, kunmap_atomic leaves the previous mapping
+ * in place, so this TLB flush ensures the TLB is updated with the
+ * new mapping.
+ */
+ local_flush_tlb_kernel_page(vaddr);
+
+ return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ unsigned int idx = type + KM_TYPE_NR * smp_processor_id();
+
+ if (kvaddr >= (void *)FIXADDR_START) {
+ __cpuc_flush_dcache_page((void *)vaddr);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ set_pte_ext(TOP_PTE(vaddr), __pte(0), 0);
+ local_flush_tlb_kernel_page(vaddr);
+#else
+ (void) idx; /* to kill a warning */
+#endif
+ }
+ pagefault_enable();
+}
+EXPORT_SYMBOL(kunmap_atomic);
+
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+ unsigned int idx;
+ unsigned long vaddr;
+
+ pagefault_disable();
+
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+ set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0);
+ local_flush_tlb_kernel_page(vaddr);
+
+ return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(const void *ptr)
+{
+ unsigned long vaddr = (unsigned long)ptr;
+ pte_t *pte;
+
+ if (vaddr < FIXADDR_START)
+ return virt_to_page(ptr);
+
+ pte = TOP_PTE(vaddr);
+ return pte_page(*pte);
+}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 80fd3b69ae1f..8277802ec859 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -15,6 +15,7 @@
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/initrd.h>
+#include <linux/highmem.h>
#include <asm/mach-types.h>
#include <asm/sections.h>
@@ -485,7 +486,7 @@ void __init mem_init(void)
int i, node;
#ifndef CONFIG_DISCONTIGMEM
- max_mapnr = virt_to_page(high_memory) - mem_map;
+ max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
#endif
/* this will put all unused low memory onto the freelists */
@@ -504,6 +505,19 @@ void __init mem_init(void)
__phys_to_pfn(__pa(swapper_pg_dir)), NULL);
#endif
+#ifdef CONFIG_HIGHMEM
+ /* set highmem page free */
+ for_each_online_node(node) {
+ for_each_nodebank (i, &meminfo, node) {
+ unsigned long start = bank_pfn_start(&meminfo.bank[i]);
+ unsigned long end = bank_pfn_end(&meminfo.bank[i]);
+ if (start >= max_low_pfn + PHYS_PFN_OFFSET)
+ totalhigh_pages += free_area(start, end, NULL);
+ }
+ }
+ totalram_pages += totalhigh_pages;
+#endif
+
/*
* Since our memory may not be contiguous, calculate the
* real number of pages we have in this system
@@ -521,9 +535,10 @@ void __init mem_init(void)
initsize = __init_end - __init_begin;
printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
- "%dK data, %dK init)\n",
+ "%dK data, %dK init, %luK highmem)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- codesize >> 10, datasize >> 10, initsize >> 10);
+ codesize >> 10, datasize >> 10, initsize >> 10,
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
extern int sysctl_overcommit_memory;
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 95bbe112965e..c4f6f05198e0 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -1,7 +1,6 @@
-/* the upper-most page table pointer */
-
#ifdef CONFIG_MMU
+/* the upper-most page table pointer */
extern pmd_t *top_pmd;
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d4d082c5c2d4..b438fc4fb77b 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -18,9 +18,11 @@
#include <asm/cputype.h>
#include <asm/mach-types.h>
#include <asm/sections.h>
+#include <asm/cachetype.h>
#include <asm/setup.h>
#include <asm/sizes.h>
#include <asm/tlb.h>
+#include <asm/highmem.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
@@ -243,6 +245,10 @@ static struct mem_type mem_types[] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
},
+ [MT_MEMORY_NONCACHED] = {
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+ .domain = DOMAIN_KERNEL,
+ },
};
const struct mem_type *get_mem_type(unsigned int type)
@@ -406,9 +412,28 @@ static void __init build_mem_type_table(void)
kern_pgprot |= L_PTE_SHARED;
vecs_pgprot |= L_PTE_SHARED;
mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
#endif
}
+ /*
+ * Non-cacheable Normal - intended for memory areas that must
+ * not cause dirty cache line writebacks when used
+ */
+ if (cpu_arch >= CPU_ARCH_ARMv6) {
+ if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+ /* Non-cacheable Normal is XCB = 001 */
+ mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+ PMD_SECT_BUFFERED;
+ } else {
+ /* For both ARMv6 and non-TEX-remapping ARMv7 */
+ mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+ PMD_SECT_TEX(1);
+ }
+ } else {
+ mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+ }
+
for (i = 0; i < 16; i++) {
unsigned long v = pgprot_val(protection_map[i]);
protection_map[i] = __pgprot(v | user_pgprot);
@@ -677,6 +702,10 @@ static void __init sanity_check_meminfo(void)
if (meminfo.nr_banks >= NR_BANKS) {
printk(KERN_CRIT "NR_BANKS too low, "
"ignoring high memory\n");
+ } else if (cache_is_vipt_aliasing()) {
+ printk(KERN_CRIT "HIGHMEM is not yet supported "
+ "with VIPT aliasing cache, "
+ "ignoring high memory\n");
} else {
memmove(bank + 1, bank,
(meminfo.nr_banks - i) * sizeof(*bank));
@@ -694,7 +723,7 @@ static void __init sanity_check_meminfo(void)
* the vmalloc area.
*/
if (__va(bank->start) >= VMALLOC_MIN ||
- __va(bank->start) < PAGE_OFFSET) {
+ __va(bank->start) < (void *)PAGE_OFFSET) {
printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
"(vmalloc region overlap).\n",
bank->start, bank->start + bank->size - 1);
@@ -895,6 +924,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
flush_cache_all();
}
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+ pmd_t *pmd = pmd_off_k(PKMAP_BASE);
+ pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+ BUG_ON(!pmd_none(*pmd) || !pte);
+ __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
+ pkmap_page_table = pte + PTRS_PER_PTE;
+#endif
+}
+
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
@@ -908,6 +948,7 @@ void __init paging_init(struct machine_desc *mdesc)
prepare_page_table();
bootmem_init();
devicemaps_init(mdesc);
+ kmap_init();
top_pmd = pmd_off_k(0xffff0000);
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 000000000000..08b8a955d5d7
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,248 @@
+/*
+ * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ * Written by : Luke Lee
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE 16
+
+ .text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+ mov pc, lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+ stmfd sp!, {lr}
+ mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+ msr cpsr_c, ip
+ bl fa_flush_kern_cache_all
+ mrc p15, 0, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1000 @ ...i............
+ bic r0, r0, #0x000e @ ............wca.
+ mcr p15, 0, r0, c1, c0, 0 @ disable caches
+ nop
+ nop
+ ldmfd sp!, {pc}
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system. Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+ .align 4
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+ mov ip, #0
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+#ifdef CONFIG_MMU
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+#endif
+ mrc p15, 0, ip, c1, c0, 0 @ ctrl register
+ bic ip, ip, #0x000f @ ............wcam
+ bic ip, ip, #0x1100 @ ...i...s........
+ bic ip, ip, #0x0800 @ BTB off
+ mcr p15, 0, ip, c1, c0, 0 @ ctrl register
+ nop
+ nop
+ mov pc, r0
+
+/*
+ * cpu_fa526_do_idle()
+ */
+ .align 4
+ENTRY(cpu_fa526_do_idle)
+ mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
+ mov pc, lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ subs r1, r1, #CACHE_DLINESIZE
+ bhi 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+ .align 4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+ mov ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+ mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
+#else
+ mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache
+#endif
+ mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed
+ mcr p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcr p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
+#endif
+ mov pc, lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+ .align 4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+ armv3_set_pte_ext
+ mov r0, r0
+ mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+#endif
+ mov pc, lr
+
+ __INIT
+
+ .type __fa526_setup, #function
+__fa526_setup:
+ /* On return of this routine, r0 must carry correct flags for CFG register */
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4
+#ifdef CONFIG_MMU
+ mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4
+#endif
+ mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM
+
+ mov r0, #1
+ mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR
+
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All
+ mcr p15, 0, r0, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+
+ mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client
+ mcr p15, 0, r0, c3, c0 @ load domain access register
+
+ mrc p15, 0, r0, c1, c0 @ get control register v4
+ ldr r5, fa526_cr1_clear
+ bic r0, r0, r5
+ ldr r5, fa526_cr1_set
+ orr r0, r0, r5
+ mov pc, lr
+ .size __fa526_setup, . - __fa526_setup
+
+ /*
+ * .RVI ZFRS BLDP WCAM
+ * ..11 1001 .111 1101
+ *
+ */
+ .type fa526_cr1_clear, #object
+ .type fa526_cr1_set, #object
+fa526_cr1_clear:
+ .word 0x3f3f
+fa526_cr1_set:
+ .word 0x397D
+
+ __INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ * come through these
+ */
+ .type fa526_processor_functions, #object
+fa526_processor_functions:
+ .word v4_early_abort
+ .word pabort_noifar
+ .word cpu_fa526_proc_init
+ .word cpu_fa526_proc_fin
+ .word cpu_fa526_reset
+ .word cpu_fa526_do_idle
+ .word cpu_fa526_dcache_clean_area
+ .word cpu_fa526_switch_mm
+ .word cpu_fa526_set_pte_ext
+ .size fa526_processor_functions, . - fa526_processor_functions
+
+ .section ".rodata"
+
+ .type cpu_arch_name, #object
+cpu_arch_name:
+ .asciz "armv4"
+ .size cpu_arch_name, . - cpu_arch_name
+
+ .type cpu_elf_name, #object
+cpu_elf_name:
+ .asciz "v4"
+ .size cpu_elf_name, . - cpu_elf_name
+
+ .type cpu_fa526_name, #object
+cpu_fa526_name:
+ .asciz "FA526"
+ .size cpu_fa526_name, . - cpu_fa526_name
+
+ .align
+
+ .section ".proc.info.init", #alloc, #execinstr
+
+ .type __fa526_proc_info,#object
+__fa526_proc_info:
+ .long 0x66015261
+ .long 0xff01fff1
+ .long PMD_TYPE_SECT | \
+ PMD_SECT_BUFFERABLE | \
+ PMD_SECT_CACHEABLE | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ .long PMD_TYPE_SECT | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ b __fa526_setup
+ .long cpu_arch_name
+ .long cpu_elf_name
+ .long HWCAP_SWP | HWCAP_HALF
+ .long cpu_fa526_name
+ .long fa526_processor_functions
+ .long fa_tlb_fns
+ .long fa_user_fns
+ .long fa_cache_fns
+ .size __fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
new file mode 100644
index 000000000000..540f5078496b
--- /dev/null
+++ b/arch/arm/mm/proc-mohawk.S
@@ -0,0 +1,416 @@
+/*
+ * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core
+ *
+ * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core.
+ *
+ * Heavily based on proc-arm926.S and proc-xsc3.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed. If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define CACHE_DLIMIT 32768
+
+/*
+ * The cache line size of the L1 D cache.
+ */
+#define CACHE_DLINESIZE 32
+
+/*
+ * cpu_mohawk_proc_init()
+ */
+ENTRY(cpu_mohawk_proc_init)
+ mov pc, lr
+
+/*
+ * cpu_mohawk_proc_fin()
+ */
+ENTRY(cpu_mohawk_proc_fin)
+ stmfd sp!, {lr}
+ mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+ msr cpsr_c, ip
+ bl mohawk_flush_kern_cache_all
+ mrc p15, 0, r0, c1, c0, 0 @ ctrl register
+ bic r0, r0, #0x1800 @ ...iz...........
+ bic r0, r0, #0x0006 @ .............ca.
+ mcr p15, 0, r0, c1, c0, 0 @ disable caches
+ ldmfd sp!, {pc}
+
+/*
+ * cpu_mohawk_reset(loc)
+ *
+ * Perform a soft reset of the system. Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ *
+ * (same as arm926)
+ */
+ .align 5
+ENTRY(cpu_mohawk_reset)
+ mov ip, #0
+ mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+ mrc p15, 0, ip, c1, c0, 0 @ ctrl register
+ bic ip, ip, #0x0007 @ .............cam
+ bic ip, ip, #0x1100 @ ...i...s........
+ mcr p15, 0, ip, c1, c0, 0 @ ctrl register
+ mov pc, r0
+
+/*
+ * cpu_mohawk_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+ .align 5
+ENTRY(cpu_mohawk_do_idle)
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt
+ mov pc, lr
+
+/*
+ * flush_user_cache_all()
+ *
+ * Clean and invalidate all cache entries in a particular
+ * address space.
+ */
+ENTRY(mohawk_flush_user_cache_all)
+ /* FALLTHROUGH */
+
+/*
+ * flush_kern_cache_all()
+ *
+ * Clean and invalidate the entire cache.
+ */
+ENTRY(mohawk_flush_kern_cache_all)
+ mov r2, #VM_EXEC
+ mov ip, #0
+__flush_whole_cache:
+ mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer
+ mov pc, lr
+
+/*
+ * flush_user_cache_range(start, end, flags)
+ *
+ * Clean and invalidate a range of cache entries in the
+ * specified address range.
+ *
+ * - start - start address (inclusive)
+ * - end - end address (exclusive)
+ * - flags - vm_flags describing address space
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_flush_user_cache_range)
+ mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT
+ bgt __flush_whole_cache
+1: tst r2, #VM_EXEC
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * coherent_kern_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start, end. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(mohawk_coherent_kern_range)
+ /* FALLTHROUGH */
+
+/*
+ * coherent_user_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start, end. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_coherent_user_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * flush_kern_dcache_page(void *page)
+ *
+ * Ensure no D cache aliasing occurs, either with itself or
+ * the I cache
+ *
+ * - addr - page aligned address
+ */
+ENTRY(mohawk_flush_kern_dcache_page)
+ add r1, r0, #PAGE_SZ
+1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_inv_range)
+ tst r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_clean_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * dma_flush_range(start, end)
+ *
+ * Clean and invalidate the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(mohawk_dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1:
+ mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+ENTRY(mohawk_cache_fns)
+ .long mohawk_flush_kern_cache_all
+ .long mohawk_flush_user_cache_all
+ .long mohawk_flush_user_cache_range
+ .long mohawk_coherent_kern_range
+ .long mohawk_coherent_user_range
+ .long mohawk_flush_kern_dcache_page
+ .long mohawk_dma_inv_range
+ .long mohawk_dma_clean_range
+ .long mohawk_dma_flush_range
+
+ENTRY(cpu_mohawk_dcache_clean_area)
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ subs r1, r1, #CACHE_DLINESIZE
+ bhi 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * cpu_mohawk_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+ .align 5
+ENTRY(cpu_mohawk_switch_mm)
+ mov ip, #0
+ mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache
+ mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB
+ orr r0, r0, #0x18 @ cache the page table in L2
+ mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+ mov pc, lr
+
+/*
+ * cpu_mohawk_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+ .align 5
+ENTRY(cpu_mohawk_set_pte_ext)
+ armv3_set_pte_ext
+ mov r0, r0
+ mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+ __INIT
+
+ .type __mohawk_setup, #function
+__mohawk_setup:
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate I,D caches
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs
+ orr r4, r4, #0x18 @ cache the page table in L2
+ mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
+
+ mov r0, #0 @ don't allow CP access
+ mcr p15, 0, r0, c15, c1, 0 @ write CP access register
+
+ adr r5, mohawk_crval
+ ldmia r5, {r5, r6}
+ mrc p15, 0, r0, c1, c0 @ get control register
+ bic r0, r0, r5
+ orr r0, r0, r6
+ mov pc, lr
+
+ .size __mohawk_setup, . - __mohawk_setup
+
+ /*
+ * R
+ * .RVI ZFRS BLDP WCAM
+ * .011 1001 ..00 0101
+ *
+ */
+ .type mohawk_crval, #object
+mohawk_crval:
+ crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
+
+ __INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ * come through these
+ */
+ .type mohawk_processor_functions, #object
+mohawk_processor_functions:
+ .word v5t_early_abort
+ .word pabort_noifar
+ .word cpu_mohawk_proc_init
+ .word cpu_mohawk_proc_fin
+ .word cpu_mohawk_reset
+ .word cpu_mohawk_do_idle
+ .word cpu_mohawk_dcache_clean_area
+ .word cpu_mohawk_switch_mm
+ .word cpu_mohawk_set_pte_ext
+ .size mohawk_processor_functions, . - mohawk_processor_functions
+
+ .section ".rodata"
+
+ .type cpu_arch_name, #object
+cpu_arch_name:
+ .asciz "armv5te"
+ .size cpu_arch_name, . - cpu_arch_name
+
+ .type cpu_elf_name, #object
+cpu_elf_name:
+ .asciz "v5"
+ .size cpu_elf_name, . - cpu_elf_name
+
+ .type cpu_mohawk_name, #object
+cpu_mohawk_name:
+ .asciz "Marvell 88SV331x"
+ .size cpu_mohawk_name, . - cpu_mohawk_name
+
+ .align
+
+ .section ".proc.info.init", #alloc, #execinstr
+
+ .type __88sv331x_proc_info,#object
+__88sv331x_proc_info:
+ .long 0x56158000 @ Marvell 88SV331x (MOHAWK)
+ .long 0xfffff000
+ .long PMD_TYPE_SECT | \
+ PMD_SECT_BUFFERABLE | \
+ PMD_SECT_CACHEABLE | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ .long PMD_TYPE_SECT | \
+ PMD_BIT4 | \
+ PMD_SECT_AP_WRITE | \
+ PMD_SECT_AP_READ
+ b __mohawk_setup
+ .long cpu_arch_name
+ .long cpu_elf_name
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+ .long cpu_mohawk_name
+ .long mohawk_processor_functions
+ .long v4wbi_tlb_fns
+ .long v4wb_user_fns
+ .long mohawk_cache_fns
+ .size __88sv331x_proc_info, . - __88sv331x_proc_info
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 000000000000..9694f1f6f485
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,75 @@
+/*
+ * linux/arch/arm/mm/tlb-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ * Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ARM architecture version 4, Faraday variation.
+ * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ * flush_user_tlb_range(start, end, mm)
+ *
+ * Invalidate a range of TLB entries in the specified address space.
+ *
+ * - start - range start address
+ * - end - range end address
+ * - mm - mm_struct describing address space
+ */
+ .align 4
+ENTRY(fa_flush_user_tlb_range)
+ vma_vm_mm ip, r2
+ act_mm r3 @ get current->active_mm
+ eors r3, ip, r3 @ == mm ?
+ movne pc, lr @ no, we dont do anything
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mov pc, lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+ mov r3, #0
+ mcr p15, 0, r3, c7, c10, 4 @ drain WB
+ bic r0, r0, #0x0ff
+ bic r0, r0, #0xf00
+1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry
+ add r0, r0, #PAGE_SZ
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r3, c7, c10, 4 @ data write barrier
+ mcr p15, 0, r3, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+ __INITDATA
+
+ .type fa_tlb_fns, #object
+ENTRY(fa_tlb_fns)
+ .long fa_flush_user_tlb_range
+ .long fa_flush_kern_tlb_range
+ .long fa_tlb_flags
+ .size fa_tlb_fns, . - fa_tlb_fns