aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c80
-rw-r--r--arch/powerpc/mm/gup.c13
-rw-r--r--arch/powerpc/mm/hash_low_64.S59
-rw-r--r--arch/powerpc/mm/hash_native_64.c38
-rw-r--r--arch/powerpc/mm/hash_utils_64.c139
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c54
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c14
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c12
-rw-r--r--arch/powerpc/mm/mmu_decl.h2
-rw-r--r--arch/powerpc/mm/numa.c122
-rw-r--r--arch/powerpc/mm/pgtable.c3
-rw-r--r--arch/powerpc/mm/pgtable_32.c1
-rw-r--r--arch/powerpc/mm/pgtable_64.c32
-rw-r--r--arch/powerpc/mm/slb.c14
-rw-r--r--arch/powerpc/mm/slb_low.S14
-rw-r--r--arch/powerpc/mm/slice.c2
-rw-r--r--arch/powerpc/mm/subpage-prot.c2
-rw-r--r--arch/powerpc/mm/tlb_hash64.c1
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S227
-rw-r--r--arch/powerpc/mm/tlb_nohash.c132
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S4
24 files changed, 765 insertions, 227 deletions
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 07ba45b0f07c..94cd728166d3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -52,6 +52,7 @@
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/setup.h>
+#include <asm/paca.h>
#include "mmu_decl.h"
@@ -171,11 +172,10 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
return 1UL << camsize;
}
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+ unsigned long ram, int max_cam_idx)
{
int i;
- unsigned long virt = PAGE_OFFSET;
- phys_addr_t phys = memstart_addr;
unsigned long amount_mapped = 0;
/* Calculate CAM values */
@@ -192,9 +192,23 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
}
tlbcam_index = i;
+#ifdef CONFIG_PPC64
+ get_paca()->tcd.esel_next = i;
+ get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+ get_paca()->tcd.esel_first = i;
+#endif
+
return amount_mapped;
}
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+{
+ unsigned long virt = PAGE_OFFSET;
+ phys_addr_t phys = memstart_addr;
+
+ return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx);
+}
+
#ifdef CONFIG_PPC32
#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
@@ -222,7 +236,9 @@ void __init adjust_total_lowmem(void)
/* adjust lowmem size to __max_low_memory */
ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
+ i = switch_to_as1();
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
+ restore_to_as0(i, 0, 0, 1);
pr_info("Memory CAM mapping: ");
for (i = 0; i < tlbcam_index - 1; i++)
@@ -241,4 +257,62 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* 64M mapped initially according to head_fsl_booke.S */
memblock_set_current_limit(min_t(u64, limit, 0x04000000));
}
+
+#ifdef CONFIG_RELOCATABLE
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
+{
+ unsigned long base = KERNELBASE;
+
+ kernstart_addr = start;
+ if (is_second_reloc) {
+ virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ return;
+ }
+
+ /*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries. Before we get the real memstart_addr,
+ * We will compute the virt_phys_offset like this:
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0x3ffffff) +
+ * (kernstart_addr & 0x3ffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+ * (kernstart_addr & ~0x3ffffff)
+ *
+ */
+ start &= ~0x3ffffff;
+ base &= ~0x3ffffff;
+ virt_phys_offset = base - start;
+ early_get_first_memblock_info(__va(dt_ptr), NULL);
+ /*
+ * We now get the memstart_addr, then we should check if this
+ * address is the same as what the PAGE_OFFSET map to now. If
+ * not we have to change the map of PAGE_OFFSET to memstart_addr
+ * and do a second relocation.
+ */
+ if (start != memstart_addr) {
+ int n;
+ long offset = start - memstart_addr;
+
+ is_second_reloc = 1;
+ n = switch_to_as1();
+ /* map a 64M area for the second relocation */
+ if (memstart_addr > start)
+ map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ else
+ map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
+ 0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ restore_to_as0(n, offset, __va(dt_ptr), 1);
+ /* We should never reach here */
+ panic("Relocation error");
+ }
+}
+#endif
#endif
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index c5f734e20b0f..d8746684f606 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -36,6 +36,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
do {
pte_t pte = ACCESS_ONCE(*ptep);
struct page *page;
+ /*
+ * Similar to the PMD case, NUMA hinting must take slow path
+ */
+ if (pte_numa(pte))
+ return 0;
if ((pte_val(pte) & mask) != result)
return 0;
@@ -75,6 +80,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (pmd_huge(pmd) || pmd_large(pmd)) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_numa(pmd))
+ return 0;
+
if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
write, pages, nr))
return 0;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d3cbda62857b..057cbbb4c576 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -148,7 +148,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
@@ -156,7 +159,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
BEGIN_FTR_SECTION
mr r4,r30
mr r5,r7
- bl .hash_page_do_lazy_icache
+ bl hash_page_do_lazy_icache
END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
/* At this point, r3 contains new PP bits, save them in
@@ -198,7 +201,8 @@ htab_insert_pte:
li r8,MMU_PAGE_4K /* page size */
li r9,MMU_PAGE_4K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(htab_call_hpte_insert1)
+.globl htab_call_hpte_insert1
+htab_call_hpte_insert1:
bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
@@ -222,7 +226,8 @@ _GLOBAL(htab_call_hpte_insert1)
li r8,MMU_PAGE_4K /* page size */
li r9,MMU_PAGE_4K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(htab_call_hpte_insert2)
+.globl htab_call_hpte_insert2
+htab_call_hpte_insert2:
bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
@@ -239,7 +244,8 @@ _GLOBAL(htab_call_hpte_insert2)
2: and r0,r5,r27
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
-_GLOBAL(htab_call_hpte_remove)
+.globl htab_call_hpte_remove
+htab_call_hpte_remove:
bl . /* Patched by htab_finish_init() */
/* Try all again */
@@ -293,7 +299,8 @@ htab_modify_pte:
li r7,MMU_PAGE_4K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-_GLOBAL(htab_call_hpte_updatepp)
+.globl htab_call_hpte_updatepp
+htab_call_hpte_updatepp:
bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
@@ -457,7 +464,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r3,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
@@ -465,7 +475,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
BEGIN_FTR_SECTION
mr r4,r30
mr r5,r7
- bl .hash_page_do_lazy_icache
+ bl hash_page_do_lazy_icache
END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
/* At this point, r3 contains new PP bits, save them in
@@ -520,7 +530,8 @@ htab_special_pfn:
li r8,MMU_PAGE_4K /* page size */
li r9,MMU_PAGE_4K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(htab_call_hpte_insert1)
+.globl htab_call_hpte_insert1
+htab_call_hpte_insert1:
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
@@ -548,7 +559,8 @@ _GLOBAL(htab_call_hpte_insert1)
li r8,MMU_PAGE_4K /* page size */
li r9,MMU_PAGE_4K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(htab_call_hpte_insert2)
+.globl htab_call_hpte_insert2
+htab_call_hpte_insert2:
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
@@ -565,7 +577,8 @@ _GLOBAL(htab_call_hpte_insert2)
2: and r0,r5,r27
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
-_GLOBAL(htab_call_hpte_remove)
+.globl htab_call_hpte_remove
+htab_call_hpte_remove:
bl . /* patched by htab_finish_init() */
/* Try all again */
@@ -582,7 +595,7 @@ htab_inval_old_hpte:
li r6,MMU_PAGE_64K /* psize */
ld r7,STK_PARAM(R9)(r1) /* ssize */
ld r8,STK_PARAM(R8)(r1) /* local */
- bl .flush_hash_page
+ bl flush_hash_page
/* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
lis r0,_PAGE_HPTE_SUB@h
ori r0,r0,_PAGE_HPTE_SUB@l
@@ -654,7 +667,8 @@ htab_modify_pte:
li r7,MMU_PAGE_4K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-_GLOBAL(htab_call_hpte_updatepp)
+.globl htab_call_hpte_updatepp
+htab_call_hpte_updatepp:
bl . /* patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
@@ -795,7 +809,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
@@ -803,7 +820,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
BEGIN_FTR_SECTION
mr r4,r30
mr r5,r7
- bl .hash_page_do_lazy_icache
+ bl hash_page_do_lazy_icache
END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
/* At this point, r3 contains new PP bits, save them in
@@ -848,7 +865,8 @@ ht64_insert_pte:
li r8,MMU_PAGE_64K
li r9,MMU_PAGE_64K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(ht64_call_hpte_insert1)
+.globl ht64_call_hpte_insert1
+ht64_call_hpte_insert1:
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
bge ht64_pte_insert_ok /* Insertion successful */
@@ -872,7 +890,8 @@ _GLOBAL(ht64_call_hpte_insert1)
li r8,MMU_PAGE_64K
li r9,MMU_PAGE_64K /* actual page size */
ld r10,STK_PARAM(R9)(r1) /* segment size */
-_GLOBAL(ht64_call_hpte_insert2)
+.globl ht64_call_hpte_insert2
+ht64_call_hpte_insert2:
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ ht64_pte_insert_ok /* Insertion successful */
@@ -889,7 +908,8 @@ _GLOBAL(ht64_call_hpte_insert2)
2: and r0,r5,r27
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
-_GLOBAL(ht64_call_hpte_remove)
+.globl ht64_call_hpte_remove
+ht64_call_hpte_remove:
bl . /* patched by htab_finish_init() */
/* Try all again */
@@ -943,7 +963,8 @@ ht64_modify_pte:
li r7,MMU_PAGE_64K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-_GLOBAL(ht64_call_hpte_updatepp)
+.globl ht64_call_hpte_updatepp
+ht64_call_hpte_updatepp:
bl . /* patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3ea26c25590b..cf1d325eae8b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -82,17 +82,14 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
- /* Add AVAL part */
- if (psize != apsize) {
- /*
- * MPSS, 64K base page size and 16MB parge page size
- * We don't need all the bits, but rest of the bits
- * must be ignored by the processor.
- * vpn cover upto 65 bits of va. (0...65) and we need
- * 58..64 bits of va.
- */
- va |= (vpn & 0xfe);
- }
+ /*
+ * AVAL bits:
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe); /* AVAL */
va |= 1; /* L */
asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -133,17 +130,14 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
- /* Add AVAL part */
- if (psize != apsize) {
- /*
- * MPSS, 64K base page size and 16MB parge page size
- * We don't need all the bits, but rest of the bits
- * must be ignored by the processor.
- * vpn cover upto 65 bits of va. (0...65) and we need
- * 58..64 bits of va.
- */
- va |= (vpn & 0xfe);
- }
+ /*
+ * AVAL bits:
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe);
va |= 1; /* L */
asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
: : "r"(va) : "memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6176b3cdf579..88fdd9d25077 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -169,9 +169,10 @@ static unsigned long htab_convert_pte_flags(unsigned long pteflags)
if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&
(pteflags & _PAGE_DIRTY)))
rflags |= 1;
-
- /* Always add C */
- return rflags | HPTE_R_C;
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ return rflags | HPTE_R_C | HPTE_R_M;
}
int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
@@ -206,6 +207,24 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
+ /* Make kvm guest trampolines executable */
+ if (overlaps_kvm_tmp(vaddr, vaddr + step))
+ tprot &= ~HPTE_R_N;
+
+ /*
+ * If relocatable, check if it overlaps interrupt vectors that
+ * are copied down to real 0. For relocatable kernel
+ * (e.g. kdump case) we copy interrupt vectors down to real
+ * address 0. Mark that region as executable. This is
+ * because on p8 system with relocation on exception feature
+ * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence
+ * in order to execute the interrupt handlers in virtual
+ * mode the vector region need to be marked as executable.
+ */
+ if ((PHYSICAL_START > MEMORY_START) &&
+ overlaps_interrupt_vector_text(vaddr, vaddr + step))
+ tprot &= ~HPTE_R_N;
+
hash = hpt_hash(vpn, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -250,9 +269,9 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- __be32 *prop;
- unsigned long size = 0;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int size = 0;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -305,9 +324,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
const char *uname, int depth,
void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- __be32 *prop;
- unsigned long size = 0;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int size = 0;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -387,9 +406,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
const char *uname, int depth,
void *data) {
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- __be64 *addr_prop;
- __be32 *page_count_prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be64 *addr_prop;
+ const __be32 *page_count_prop;
unsigned int expected_pages;
long unsigned int phys_addr;
long unsigned int block_size;
@@ -430,6 +449,24 @@ static void mmu_psize_set_default_penc(void)
mmu_psize_defs[bpsize].penc[apsize] = -1;
}
+#ifdef CONFIG_PPC_64K_PAGES
+
+static bool might_have_hea(void)
+{
+ /*
+ * The HEA ethernet adapter requires awareness of the
+ * GX bus. Without that awareness we can easily assume
+ * we will never see an HEA ethernet device.
+ */
+#ifdef CONFIG_IBMEBUS
+ return !cpu_has_feature(CPU_FTR_ARCH_207S);
+#else
+ return false;
+#endif
+}
+
+#endif /* #ifdef CONFIG_PPC_64K_PAGES */
+
static void __init htab_init_page_sizes(void)
{
int rc;
@@ -484,10 +521,11 @@ static void __init htab_init_page_sizes(void)
mmu_linear_psize = MMU_PAGE_64K;
if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
/*
- * Don't use 64k pages for ioremap on pSeries, since
- * that would stop us accessing the HEA ethernet.
+ * When running on pSeries using 64k pages for ioremap
+ * would stop us accessing the HEA ethernet. So if we
+ * have the chance of ever seeing one, stay at 4k.
*/
- if (!machine_is(pseries))
+ if (!might_have_hea() || !machine_is(pseries))
mmu_io_psize = MMU_PAGE_64K;
} else
mmu_ci_restrictions = 1;
@@ -531,8 +569,8 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
const char *uname, int depth,
void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -588,47 +626,43 @@ int remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-#define FUNCTION_TEXT(A) ((*(unsigned long *)(A)))
+extern u32 htab_call_hpte_insert1[];
+extern u32 htab_call_hpte_insert2[];
+extern u32 htab_call_hpte_remove[];
+extern u32 htab_call_hpte_updatepp[];
+extern u32 ht64_call_hpte_insert1[];
+extern u32 ht64_call_hpte_insert2[];
+extern u32 ht64_call_hpte_remove[];
+extern u32 ht64_call_hpte_updatepp[];
static void __init htab_finish_init(void)
{
- extern unsigned int *htab_call_hpte_insert1;
- extern unsigned int *htab_call_hpte_insert2;
- extern unsigned int *htab_call_hpte_remove;
- extern unsigned int *htab_call_hpte_updatepp;
-
#ifdef CONFIG_PPC_HAS_HASH_64K
- extern unsigned int *ht64_call_hpte_insert1;
- extern unsigned int *ht64_call_hpte_insert2;
- extern unsigned int *ht64_call_hpte_remove;
- extern unsigned int *ht64_call_hpte_updatepp;
-
patch_branch(ht64_call_hpte_insert1,
- FUNCTION_TEXT(ppc_md.hpte_insert),
+ ppc_function_entry(ppc_md.hpte_insert),
BRANCH_SET_LINK);
patch_branch(ht64_call_hpte_insert2,
- FUNCTION_TEXT(ppc_md.hpte_insert),
+ ppc_function_entry(ppc_md.hpte_insert),
BRANCH_SET_LINK);
patch_branch(ht64_call_hpte_remove,
- FUNCTION_TEXT(ppc_md.hpte_remove),
+ ppc_function_entry(ppc_md.hpte_remove),
BRANCH_SET_LINK);
patch_branch(ht64_call_hpte_updatepp,
- FUNCTION_TEXT(ppc_md.hpte_updatepp),
+ ppc_function_entry(ppc_md.hpte_updatepp),
BRANCH_SET_LINK);
-
#endif /* CONFIG_PPC_HAS_HASH_64K */
patch_branch(htab_call_hpte_insert1,
- FUNCTION_TEXT(ppc_md.hpte_insert),
+ ppc_function_entry(ppc_md.hpte_insert),
BRANCH_SET_LINK);
patch_branch(htab_call_hpte_insert2,
- FUNCTION_TEXT(ppc_md.hpte_insert),
+ ppc_function_entry(ppc_md.hpte_insert),
BRANCH_SET_LINK);
patch_branch(htab_call_hpte_remove,
- FUNCTION_TEXT(ppc_md.hpte_remove),
+ ppc_function_entry(ppc_md.hpte_remove),
BRANCH_SET_LINK);
patch_branch(htab_call_hpte_updatepp,
- FUNCTION_TEXT(ppc_md.hpte_updatepp),
+ ppc_function_entry(ppc_md.hpte_updatepp),
BRANCH_SET_LINK);
}
@@ -945,6 +979,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
trap, vsid, ssize, psize, lpsize, pte);
}
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+ int psize, bool user_region)
+{
+ if (user_region) {
+ if (psize != get_paca_psize(ea)) {
+ get_paca()->context = mm->context;
+ slb_flush_and_rebolt();
+ }
+ } else if (get_paca()->vmalloc_sllp !=
+ mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+ get_paca()->vmalloc_sllp =
+ mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ slb_vmalloc_update();
+ }
+}
+
/* Result code is:
* 0 - handled
* 1 - normal page fault
@@ -1066,6 +1116,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
WARN_ON(1);
}
#endif
+ check_paca_psize(ea, mm, psize, user_region);
+
goto bail;
}
@@ -1106,17 +1158,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
#endif
}
}
- if (user_region) {
- if (psize != get_paca_psize(ea)) {
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
- }
- } else if (get_paca()->vmalloc_sllp !=
- mmu_psize_defs[mmu_vmalloc_psize].sllp) {
- get_paca()->vmalloc_sllp =
- mmu_psize_defs[mmu_vmalloc_psize].sllp;
- slb_vmalloc_update();
- }
+
+ check_paca_psize(ea, mm, psize, user_region);
#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_PPC_HAS_HASH_64K
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 34de9e0cdc34..826893fcb3a7 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -127,7 +127,11 @@ repeat:
/* Add in WIMG bits */
rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
- _PAGE_COHERENT | _PAGE_GUARDED));
+ _PAGE_GUARDED));
+ /*
+ * enable the memory coherence always
+ */
+ rflags |= HPTE_R_M;
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 74551b5e41e5..5e4ee2573903 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,44 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC64
+static inline int tlb1_next(void)
+{
+ struct paca_struct *paca = get_paca();
+ struct tlb_core_data *tcd;
+ int this, next;
+
+ tcd = paca->tcd_ptr;
+ this = tcd->esel_next;
+
+ next = this + 1;
+ if (next >= tcd->esel_max)
+ next = tcd->esel_first;
+
+ tcd->esel_next = next;
+ return this;
+}
+#else
+static inline int tlb1_next(void)
+{
+ int index, ncams;
+
+ ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ index = __get_cpu_var(next_tlbcam_idx);
+
+ /* Just round-robin the entries and wrap when we hit the end */
+ if (unlikely(index == ncams - 1))
+ __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
+ else
+ __get_cpu_var(next_tlbcam_idx)++;
+
+ return index;
+}
+#endif /* !PPC64 */
+#endif /* FSL */
+
static inline int mmu_get_tsize(int psize)
{
return mmu_psize_defs[psize].enc;
@@ -47,7 +85,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
struct mm_struct *mm;
#ifdef CONFIG_PPC_FSL_BOOK3E
- int index, ncams;
+ int index;
#endif
if (unlikely(is_kernel_addr(ea)))
@@ -77,18 +115,11 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
}
#ifdef CONFIG_PPC_FSL_BOOK3E
- ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
- index = __get_cpu_var(next_tlbcam_idx);
+ index = tlb1_next();
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-
- /* Just round-robin the entries and wrap when we hit the end */
- if (unlikely(index == ncams - 1))
- __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
- else
- __get_cpu_var(next_tlbcam_idx)++;
#endif
+
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
mas2 = ea & ~((1UL << shift) - 1);
mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
@@ -103,7 +134,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
mtspr(SPRN_MAS7_MAS3, mas7_3);
} else {
- mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+ if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+ mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0b7fb6761015..a5bcf9301196 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -99,6 +99,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Add in WIMG bits */
rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
_PAGE_COHERENT | _PAGE_GUARDED));
+ /*
+ * enable the memory coherence always
+ */
+ rflags |= HPTE_R_M;
slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
mmu_psize, ssize);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 90bb6d9409bf..7e70ae968e5f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd)
*/
return ((pgd_val(pgd) & 0x3) != 0x0);
}
-
-int pmd_huge_support(void)
-{
- return 1;
-}
#else
int pmd_huge(pmd_t pmd)
{
@@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd)
{
return 0;
}
-
-int pmd_huge_support(void)
-{
- return 0;
-}
#endif
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
@@ -472,12 +462,13 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
{
struct hugepd_freelist **batchp;
- batchp = &__get_cpu_var(hugepd_freelist_cur);
+ batchp = &get_cpu_var(hugepd_freelist_cur);
if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpumask_equal(mm_cpumask(tlb->mm),
cpumask_of(smp_processor_id()))) {
kmem_cache_free(hugepte_cache, hugepte);
+ put_cpu_var(hugepd_freelist_cur);
return;
}
@@ -491,6 +482,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
*batchp = NULL;
}
+ put_cpu_var(hugepd_freelist_cur);
}
#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3fa93dc7fe75..2c8e90f5789e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -139,9 +139,14 @@ int arch_remove_memory(u64 start, u64 size)
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
+ int ret;
zone = page_zone(pfn_to_page(start_pfn));
- return __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages);
+ if (!ret && (ppc_md.remove_memory))
+ ret = ppc_md.remove_memory(start, size);
+
+ return ret;
}
#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -209,7 +214,7 @@ void __init do_init_bootmem(void)
/* Place all memblock_regions in the same node and merge contiguous
* memblock_regions
*/
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
/* Add all physical memory to the bootmem map, mark each area
* present.
@@ -307,6 +312,12 @@ static void __init register_page_bootmem_info(void)
void __init mem_init(void)
{
+ /*
+ * book3s is limited to 16 page sizes due to encoding this in
+ * a 4-bit field for slices.
+ */
+ BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
+
#ifdef CONFIG_SWIOTLB
swiotlb_init(0);
#endif
@@ -507,7 +518,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
* System memory should not be in /proc/iomem but various tools expect it
* (eg kdump).
*/
-static int add_system_ram_resources(void)
+static int __init add_system_ram_resources(void)
{
struct memblock_region *reg;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index af3d78e19302..928ebe79668b 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -410,17 +410,7 @@ void __init mmu_context_init(void)
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
last_context = 65535;
- } else
-#ifdef CONFIG_PPC_BOOK3E_MMU
- if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
- u32 mmucfg = mfspr(SPRN_MMUCFG);
- u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
- >> MMUCFG_PIDSIZE_SHIFT;
- first_context = 1;
- last_context = (1UL << (pid_bits + 1)) - 1;
- } else
-#endif
- {
+ } else {
first_context = 1;
last_context = 255;
}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 83eb5d5f53d5..9615d82919b8 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
extern void MMU_init_hw(void);
extern unsigned long mmu_mapin_ram(unsigned long top);
extern void adjust_total_lowmem(void);
+extern int switch_to_as1(void);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
#endif
extern void loadcam_entry(unsigned int index);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 078d3e00a616..3b181b22cd46 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -31,6 +31,8 @@
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/topology.h>
#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/hvcall.h>
@@ -152,9 +154,22 @@ static void __init get_node_active_region(unsigned long pfn,
}
}
-static void map_cpu_to_node(int cpu, int node)
+static void reset_numa_cpu_lookup_table(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ numa_cpu_lookup_table[cpu] = -1;
+}
+
+static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
+}
+
+static void map_cpu_to_node(int cpu, int node)
+{
+ update_numa_cpu_lookup_table(cpu, node);
dbg("adding cpu %d to node %d\n", cpu, node);
@@ -217,6 +232,7 @@ int __node_distance(int a, int b)
return distance;
}
+EXPORT_SYMBOL(__node_distance);
static void initialize_distance_lookup_table(int nid,
const __be32 *associativity)
@@ -522,11 +538,24 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
*/
static int numa_setup_cpu(unsigned long lcpu)
{
- int nid = 0;
- struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
+ int nid;
+ struct device_node *cpu;
+
+ /*
+ * If a valid cpu-to-node mapping is already available, use it
+ * directly instead of querying the firmware, since it represents
+ * the most recent mapping notified to us by the platform (eg: VPHN).
+ */
+ if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
+ map_cpu_to_node(lcpu, nid);
+ return nid;
+ }
+
+ cpu = of_get_cpu_node(lcpu, NULL);
if (!cpu) {
WARN_ON(1);
+ nid = 0;
goto out;
}
@@ -542,16 +571,38 @@ out:
return nid;
}
+static void verify_cpu_node_mapping(int cpu, int node)
+{
+ int base, sibling, i;
+
+ /* Verify that all the threads in the core belong to the same node */
+ base = cpu_first_thread_sibling(cpu);
+
+ for (i = 0; i < threads_per_core; i++) {
+ sibling = base + i;
+
+ if (sibling == cpu || cpu_is_offline(sibling))
+ continue;
+
+ if (cpu_to_node(sibling) != node) {
+ WARN(1, "CPU thread siblings %d and %d don't belong"
+ " to the same node!\n", cpu, sibling);
+ break;
+ }
+ }
+}
+
static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
void *hcpu)
{
unsigned long lcpu = (unsigned long)hcpu;
- int ret = NOTIFY_DONE;
+ int ret = NOTIFY_DONE, nid;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- numa_setup_cpu(lcpu);
+ nid = numa_setup_cpu(lcpu);
+ verify_cpu_node_mapping((int)lcpu, nid);
ret = NOTIFY_OK;
break;
#ifdef CONFIG_HOTPLUG_CPU
@@ -670,7 +721,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
node_set_online(nid);
sz = numa_enforce_memory_limit(base, size);
if (sz)
- memblock_set_node(base, sz, nid);
+ memblock_set_node(base, sz,
+ &memblock.memory, nid);
} while (--ranges);
}
}
@@ -760,7 +812,7 @@ new_range:
continue;
}
- memblock_set_node(start, size, nid);
+ memblock_set_node(start, size, &memblock.memory, nid);
if (--ranges)
goto new_range;
@@ -797,7 +849,8 @@ static void __init setup_nonnuma(void)
fake_numa_create_new_node(end_pfn, &nid);
memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), nid);
+ PFN_PHYS(end_pfn - start_pfn),
+ &memblock.memory, nid);
node_set_online(nid);
}
}
@@ -1067,6 +1120,7 @@ void __init do_init_bootmem(void)
*/
setup_node_to_cpumask_map();
+ reset_numa_cpu_lookup_table();
register_cpu_notifier(&ppc64_numa_nb);
cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
(void *)(unsigned long)boot_cpuid);
@@ -1445,6 +1499,33 @@ static int update_cpu_topology(void *data)
return 0;
}
+static int update_lookup_table(void *data)
+{
+ struct topology_update_data *update;
+
+ if (!data)
+ return -EINVAL;
+
+ /*
+ * Upon topology update, the numa-cpu lookup table needs to be updated
+ * for all threads in the core, including offline CPUs, to ensure that
+ * future hotplug operations respect the cpu-to-node associativity
+ * properly.
+ */
+ for (update = data; update; update = update->next) {
+ int nid, base, j;
+
+ nid = update->new_nid;
+ base = cpu_first_thread_sibling(update->cpu);
+
+ for (j = 0; j < threads_per_core; j++) {
+ update_numa_cpu_lookup_table(base + j, nid);
+ }
+ }
+
+ return 0;
+}
+
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
@@ -1511,8 +1592,30 @@ int arch_update_cpu_topology(void)
cpu = cpu_last_thread_sibling(cpu);
}
+ /*
+ * In cases where we have nothing to update (because the updates list
+ * is too short or because the new topology is same as the old one),
+ * skip invoking update_cpu_topology() via stop-machine(). This is
+ * necessary (and not just a fast-path optimization) since stop-machine
+ * can end up electing a random CPU to run update_cpu_topology(), and
+ * thus trick us into setting up incorrect cpu-node mappings (since
+ * 'updates' is kzalloc()'ed).
+ *
+ * And for the similar reason, we will skip all the following updating.
+ */
+ if (!cpumask_weight(&updated_cpus))
+ goto out;
+
stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ /*
+ * Update the numa-cpu lookup table with the new mappings, even for
+ * offline CPUs. It is best to perform this update from the stop-
+ * machine context.
+ */
+ stop_machine(update_lookup_table, &updates[0],
+ cpumask_of(raw_smp_processor_id()));
+
for (ud = &updates[0]; ud; ud = ud->next) {
unregister_cpu_under_node(ud->cpu, ud->old_nid);
register_cpu_under_node(ud->cpu, ud->new_nid);
@@ -1524,6 +1627,7 @@ int arch_update_cpu_topology(void)
changed = 1;
}
+out:
kfree(updates);
return changed;
}
@@ -1697,7 +1801,7 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
start_topology_update();
- proc_create("powerpc/topology_updates", 644, NULL, &topology_ops);
+ proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
return 0;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 841e0d00863c..c695943a513c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -24,7 +24,6 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/mm.h>
-#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
@@ -174,7 +173,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_present(*ptep));
+ WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
#endif
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5b9601715289..343a87fa78b5 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -299,6 +299,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
}
+ smp_wmb();
return err;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9d95786aa80f..f6ce1f111f5b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -33,7 +33,6 @@
#include <linux/swap.h>
#include <linux/stddef.h>
#include <linux/vmalloc.h>
-#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
@@ -153,6 +152,18 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
}
#endif /* !CONFIG_PPC_MMU_NOHASH */
}
+
+#ifdef CONFIG_PPC_BOOK3E_64
+ /*
+ * With hardware tablewalk, a sync is needed to ensure that
+ * subsequent accesses see the PTE we just wrote. Unlike userspace
+ * mappings, we can't tolerate spurious faults, so make sure
+ * the new PTE will be seen the first time.
+ */
+ mb();
+#else
+ smp_wmb();
+#endif
return 0;
}
@@ -499,7 +510,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
}
unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long clr)
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set)
{
unsigned long old, tmp;
@@ -515,14 +527,15 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
andi. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
+ or %1,%1,%7\n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
- : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY)
+ : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
: "cc" );
#else
old = pmd_val(*pmdp);
- *pmdp = __pmd(old & ~clr);
+ *pmdp = __pmd((old & ~clr) | set);
#endif
if (old & _PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp);
@@ -634,6 +647,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
if (old & _PAGE_HASHPTE)
hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);
}
+ /*
+ * This ensures that generic code that rely on IRQ disabling
+ * to prevent a parallel THP split work as expected.
+ */
+ kick_all_cpus_sync();
}
/*
@@ -687,7 +705,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_none(*pmdp));
+ WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
@@ -697,7 +715,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
- pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT);
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
}
/*
@@ -824,7 +842,7 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
unsigned long old;
pgtable_t *pgtable_slot;
- old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
+ old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
* We have pmd == none and we are holding page_table_lock.
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 9d1d33cd2be5..0399a6702958 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -97,7 +97,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
static void __slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
- * appropriately too. */
+ * and PR KVM appropriately too. */
unsigned long linear_llp, vmalloc_llp, lflags, vflags;
unsigned long ksp_esid_data, ksp_vsid_data;
@@ -256,10 +256,14 @@ static inline void patch_slb_encoding(unsigned int *insn_addr,
patch_instruction(insn_addr, insn);
}
+extern u32 slb_compare_rr_to_size[];
+extern u32 slb_miss_kernel_load_linear[];
+extern u32 slb_miss_kernel_load_io[];
+extern u32 slb_compare_rr_to_size[];
+extern u32 slb_miss_kernel_load_vmemmap[];
+
void slb_set_size(u16 size)
{
- extern unsigned int *slb_compare_rr_to_size;
-
if (mmu_slb_size == size)
return;
@@ -272,11 +276,7 @@ void slb_initialize(void)
unsigned long linear_llp, vmalloc_llp, io_llp;
unsigned long lflags, vflags;
static int slb_encoding_inited;
- extern unsigned int *slb_miss_kernel_load_linear;
- extern unsigned int *slb_miss_kernel_load_io;
- extern unsigned int *slb_compare_rr_to_size;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- extern unsigned int *slb_miss_kernel_load_vmemmap;
unsigned long vmemmap_llp;
#endif
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 17aa6dfceb34..736d18b3cefd 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -35,7 +35,7 @@ _GLOBAL(slb_allocate_realmode)
* check for bad kernel/user address
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
*/
- rldicr. r9,r3,4,(63 - 46 - 4)
+ rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
bne- 8f
srdi r9,r3,60 /* get region */
@@ -59,7 +59,8 @@ _GLOBAL(slb_allocate_realmode)
/* Linear mapping encoding bits, the "li" instruction below will
* be patched by the kernel at boot
*/
-_GLOBAL(slb_miss_kernel_load_linear)
+.globl slb_miss_kernel_load_linear
+slb_miss_kernel_load_linear:
li r11,0
/*
* context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
@@ -79,7 +80,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
/* Check virtual memmap region. To be patches at kernel boot */
cmpldi cr0,r9,0xf
bne 1f
-_GLOBAL(slb_miss_kernel_load_vmemmap)
+.globl slb_miss_kernel_load_vmemmap
+slb_miss_kernel_load_vmemmap:
li r11,0
b 6f
1:
@@ -95,7 +97,8 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
b 6f
5:
/* IO mapping */
- _GLOBAL(slb_miss_kernel_load_io)
+.globl slb_miss_kernel_load_io
+slb_miss_kernel_load_io:
li r11,0
6:
/*
@@ -250,7 +253,8 @@ slb_finish_load:
7: ld r10,PACASTABRR(r13)
addi r10,r10,1
/* This gets soft patched on boot. */
-_GLOBAL(slb_compare_rr_to_size)
+.globl slb_compare_rr_to_size
+slb_compare_rr_to_size:
cmpldi r10,0
blt+ 4f
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 7ce9cf3b6988..b0c75cc15efc 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -408,7 +408,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
if (fixed && (addr & ((1ul << pshift) - 1)))
return -EINVAL;
if (fixed && addr > (mm->task_size - len))
- return -EINVAL;
+ return -ENOMEM;
/* If hint, make sure it matches our alignment restrictions */
if (!fixed && addr) {
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index a770df2dae70..6c0b1f5f8d2c 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -78,7 +78,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
for (; npages > 0; --npages) {
- pte_update(mm, addr, pte, 0, 0);
+ pte_update(mm, addr, pte, 0, 0, 0);
addr += PAGE_SIZE;
++pte;
}
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 36e44b4260eb..c99f6510a0b2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/pgalloc.h>
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index b4113bf86353..356e8b41fb09 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -39,37 +39,49 @@
* *
**********************************************************************/
+/*
+ * Note that, unlike non-bolted handlers, TLB_EXFRAME is not
+ * modified by the TLB miss handlers themselves, since the TLB miss
+ * handler code will not itself cause a recursive TLB miss.
+ *
+ * TLB_EXFRAME will be modified when crit/mc/debug exceptions are
+ * entered/exited.
+ */
.macro tlb_prolog_bolted intnum addr
- mtspr SPRN_SPRG_GEN_SCRATCH,r13
+ mtspr SPRN_SPRG_GEN_SCRATCH,r12
+ mfspr r12,SPRN_SPRG_TLB_EXFRAME
+ std r13,EX_TLB_R13(r12)
+ std r10,EX_TLB_R10(r12)
mfspr r13,SPRN_SPRG_PACA
- std r10,PACA_EXTLB+EX_TLB_R10(r13)
+
mfcr r10
- std r11,PACA_EXTLB+EX_TLB_R11(r13)
+ std r11,EX_TLB_R11(r12)
#ifdef CONFIG_KVM_BOOKE_HV
BEGIN_FTR_SECTION
mfspr r11, SPRN_SRR1
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
DO_KVM \intnum, SPRN_SRR1
- std r16,PACA_EXTLB+EX_TLB_R16(r13)
+ std r16,EX_TLB_R16(r12)
mfspr r16,\addr /* get faulting address */
- std r14,PACA_EXTLB+EX_TLB_R14(r13)
+ std r14,EX_TLB_R14(r12)
ld r14,PACAPGD(r13)
- std r15,PACA_EXTLB+EX_TLB_R15(r13)
- std r10,PACA_EXTLB+EX_TLB_CR(r13)
- TLB_MISS_PROLOG_STATS_BOLTED
+ std r15,EX_TLB_R15(r12)
+ std r10,EX_TLB_CR(r12)
+ TLB_MISS_PROLOG_STATS
.endm
.macro tlb_epilog_bolted
- ld r14,PACA_EXTLB+EX_TLB_CR(r13)
- ld r10,PACA_EXTLB+EX_TLB_R10(r13)
- ld r11,PACA_EXTLB+EX_TLB_R11(r13)
+ ld r14,EX_TLB_CR(r12)
+ ld r10,EX_TLB_R10(r12)
+ ld r11,EX_TLB_R11(r12)
+ ld r13,EX_TLB_R13(r12)
mtcr r14
- ld r14,PACA_EXTLB+EX_TLB_R14(r13)
- ld r15,PACA_EXTLB+EX_TLB_R15(r13)
- TLB_MISS_RESTORE_STATS_BOLTED
- ld r16,PACA_EXTLB+EX_TLB_R16(r13)
- mfspr r13,SPRN_SPRG_GEN_SCRATCH
+ ld r14,EX_TLB_R14(r12)
+ ld r15,EX_TLB_R15(r12)
+ TLB_MISS_RESTORE_STATS
+ ld r16,EX_TLB_R16(r12)
+ mfspr r12,SPRN_SPRG_GEN_SCRATCH
.endm
/* Data TLB miss */
@@ -136,7 +148,7 @@ BEGIN_MMU_FTR_SECTION
*/
PPC_TLBSRX_DOT(0,R16)
ldx r14,r14,r15 /* grab pgd entry */
- beq normal_tlb_miss_done /* tlb exists already, bail */
+ beq tlb_miss_done_bolted /* tlb exists already, bail */
MMU_FTR_SECTION_ELSE
ldx r14,r14,r15 /* grab pgd entry */
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
@@ -192,6 +204,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
mtspr SPRN_MAS7_MAS3,r15
tlbwe
+tlb_miss_done_bolted:
TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -239,6 +252,183 @@ itlb_miss_fault_bolted:
beq tlb_miss_common_bolted
b itlb_miss_kernel_bolted
+#ifdef CONFIG_PPC_FSL_BOOK3E
+/*
+ * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
+ *
+ * Linear mapping is bolted: no virtual page table or nested TLB misses
+ * Indirect entries in TLB1, hardware loads resulting direct entries
+ * into TLB0
+ * No HES or NV hint on TLB1, so we need to do software round-robin
+ * No tlbsrx. so we need a spinlock, and we have to deal
+ * with MAS-damage caused by tlbsx
+ * 4K pages only
+ */
+
+ START_EXCEPTION(instruction_tlb_miss_e6500)
+ tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
+
+ ld r11,PACA_TCD_PTR(r13)
+ srdi. r15,r16,60 /* get region */
+ ori r16,r16,1
+
+ TLB_MISS_STATS_SAVE_INFO_BOLTED
+ bne tlb_miss_kernel_e6500 /* user/kernel test */
+
+ b tlb_miss_common_e6500
+
+ START_EXCEPTION(data_tlb_miss_e6500)
+ tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
+
+ ld r11,PACA_TCD_PTR(r13)
+ srdi. r15,r16,60 /* get region */
+ rldicr r16,r16,0,62
+
+ TLB_MISS_STATS_SAVE_INFO_BOLTED
+ bne tlb_miss_kernel_e6500 /* user vs kernel check */
+
+/*
+ * This is the guts of the TLB miss handler for e6500 and derivatives.
+ * We are entered with:
+ *
+ * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
+ * r15 = crap (free to use)
+ * r14 = page table base
+ * r13 = PACA
+ * r11 = tlb_per_core ptr
+ * r10 = cpu number
+ */
+tlb_miss_common_e6500:
+ /*
+ * Search if we already have an indirect entry for that virtual
+ * address, and if we do, bail out.
+ *
+ * MAS6:IND should be already set based on MAS4
+ */
+1: lbarx r15,0,r11
+ lhz r10,PACAPACAINDEX(r13)
+ cmpdi r15,0
+ cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */
+ bne 2f
+ stbcx. r10,0,r11
+ bne 1b
+3:
+ .subsection 1
+2: cmpd cr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */
+ beq cr1,3b /* unlock will happen if cr1.eq = 0 */
+ lbz r15,0(r11)
+ cmpdi r15,0
+ bne 2b
+ b 1b
+ .previous
+
+ mfspr r15,SPRN_MAS2
+
+ tlbsx 0,r16
+ mfspr r10,SPRN_MAS1
+ andis. r10,r10,MAS1_VALID@h
+ bne tlb_miss_done_e6500
+
+ /* Undo MAS-damage from the tlbsx */
+ mfspr r10,SPRN_MAS1
+ oris r10,r10,MAS1_VALID@h
+ mtspr SPRN_MAS1,r10
+ mtspr SPRN_MAS2,r15
+
+ /* Now, we need to walk the page tables. First check if we are in
+ * range.
+ */
+ rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+ bne- tlb_miss_fault_e6500
+
+ rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
+ cmpldi cr0,r14,0
+ clrrdi r15,r15,3
+ beq- tlb_miss_fault_e6500 /* No PGDIR, bail */
+ ldx r14,r14,r15 /* grab pgd entry */
+
+ rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
+ clrrdi r15,r15,3
+ cmpdi cr0,r14,0
+ bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */
+ ldx r14,r14,r15 /* grab pud entry */
+
+ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
+ clrrdi r15,r15,3
+ cmpdi cr0,r14,0
+ bge tlb_miss_fault_e6500
+ ldx r14,r14,r15 /* Grab pmd entry */
+
+ mfspr r10,SPRN_MAS0
+ cmpdi cr0,r14,0
+ bge tlb_miss_fault_e6500
+
+ /* Now we build the MAS for a 2M indirect page:
+ *
+ * MAS 0 : ESEL needs to be filled by software round-robin
+ * MAS 1 : Fully set up
+ * - PID already updated by caller if necessary
+ * - TSIZE for now is base ind page size always
+ * - TID already cleared if necessary
+ * MAS 2 : Default not 2M-aligned, need to be redone
+ * MAS 3+7 : Needs to be done
+ */
+
+ ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+ mtspr SPRN_MAS7_MAS3,r14
+
+ clrrdi r15,r16,21 /* make EA 2M-aligned */
+ mtspr SPRN_MAS2,r15
+
+ lbz r15,TCD_ESEL_NEXT(r11)
+ lbz r16,TCD_ESEL_MAX(r11)
+ lbz r14,TCD_ESEL_FIRST(r11)
+ rlwimi r10,r15,16,0x00ff0000 /* insert esel_next into MAS0 */
+ addi r15,r15,1 /* increment esel_next */
+ mtspr SPRN_MAS0,r10
+ cmpw r15,r16
+ iseleq r15,r14,r15 /* if next == last use first */
+ stb r15,TCD_ESEL_NEXT(r11)
+
+ tlbwe
+
+tlb_miss_done_e6500:
+ .macro tlb_unlock_e6500
+ beq cr1,1f /* no unlock if lock was recursively grabbed */
+ li r15,0
+ isync
+ stb r15,0(r11)
+1:
+ .endm
+
+ tlb_unlock_e6500
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+ tlb_epilog_bolted
+ rfi
+
+tlb_miss_kernel_e6500:
+ mfspr r10,SPRN_MAS1
+ ld r14,PACA_KERNELPGD(r13)
+ cmpldi cr0,r15,8 /* Check for vmalloc region */
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+ beq+ tlb_miss_common_e6500
+
+tlb_miss_fault_e6500:
+ tlb_unlock_e6500
+ /* We need to check if it was an instruction miss */
+ andi. r16,r16,1
+ bne itlb_miss_fault_e6500
+dtlb_miss_fault_e6500:
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+ tlb_epilog_bolted
+ b exc_data_storage_book3e
+itlb_miss_fault_e6500:
+ TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+ tlb_epilog_bolted
+ b exc_instruction_storage_book3e
+#endif /* CONFIG_PPC_FSL_BOOK3E */
+
/**********************************************************************
* *
* TLB miss handling for Book3E with TLB reservation and HES support *
@@ -918,7 +1108,8 @@ tlb_load_linear:
ld r11,PACATOC(r13)
ld r11,linear_map_top@got(r11)
ld r10,0(r11)
- cmpld cr0,r10,r16
+ tovirt(10,10)
+ cmpld cr0,r16,r10
bge tlb_load_linear_fault
/* MAS1 need whole new setup. */
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 358d74303138..92cb18d52ea8 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -43,6 +43,7 @@
#include <asm/tlb.h>
#include <asm/code-patching.h>
#include <asm/hugetlb.h>
+#include <asm/paca.h>
#include "mmu_decl.h"
@@ -58,6 +59,10 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
.shift = 12,
.enc = BOOK3E_PAGESZ_4K,
},
+ [MMU_PAGE_2M] = {
+ .shift = 21,
+ .enc = BOOK3E_PAGESZ_2M,
+ },
[MMU_PAGE_4M] = {
.shift = 22,
.enc = BOOK3E_PAGESZ_4M,
@@ -136,9 +141,18 @@ static inline int mmu_get_tsize(int psize)
int mmu_linear_psize; /* Page size used for the linear mapping */
int mmu_pte_psize; /* Page size used for PTE pages */
int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
-int book3e_htw_enabled; /* Is HW tablewalk enabled ? */
+int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
unsigned long linear_map_top; /* Top of linear mapping */
+
+/*
+ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+ * exceptions. This is used for bolted and e6500 TLB miss handlers which
+ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+ * this is set to zero.
+ */
+int extlb_level_exc;
+
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -377,7 +391,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
{
int tsize = mmu_psize_defs[mmu_pte_psize].enc;
- if (book3e_htw_enabled) {
+ if (book3e_htw_mode != PPC_HTW_NONE) {
unsigned long start = address & PMD_MASK;
unsigned long end = address + PMD_SIZE;
unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
@@ -430,7 +444,7 @@ static void setup_page_sizes(void)
def = &mmu_psize_defs[psize];
shift = def->shift;
- if (shift == 0)
+ if (shift == 0 || shift & 1)
continue;
/* adjust to be in terms of 4^shift Kb */
@@ -440,21 +454,40 @@ static void setup_page_sizes(void)
def->flags |= MMU_PAGE_SIZE_DIRECT;
}
- goto no_indirect;
+ goto out;
}
if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
- u32 tlb1ps = mfspr(SPRN_TLB1PS);
+ u32 tlb1cfg, tlb1ps;
+
+ tlb0cfg = mfspr(SPRN_TLB0CFG);
+ tlb1cfg = mfspr(SPRN_TLB1CFG);
+ tlb1ps = mfspr(SPRN_TLB1PS);
+ eptcfg = mfspr(SPRN_EPTCFG);
+
+ if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+ book3e_htw_mode = PPC_HTW_E6500;
+
+ /*
+ * We expect 4K subpage size and unrestricted indirect size.
+ * The lack of a restriction on indirect size is a Freescale
+ * extension, indicated by PSn = 0 but SPSn != 0.
+ */
+ if (eptcfg != 2)
+ book3e_htw_mode = PPC_HTW_NONE;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
struct mmu_psize_def *def = &mmu_psize_defs[psize];
if (tlb1ps & (1U << (def->shift - 10))) {
def->flags |= MMU_PAGE_SIZE_DIRECT;
+
+ if (book3e_htw_mode && psize == MMU_PAGE_2M)
+ def->flags |= MMU_PAGE_SIZE_INDIRECT;
}
}
- goto no_indirect;
+ goto out;
}
#endif
@@ -471,8 +504,11 @@ static void setup_page_sizes(void)
}
/* Indirect page sizes supported ? */
- if ((tlb0cfg & TLBnCFG_IND) == 0)
- goto no_indirect;
+ if ((tlb0cfg & TLBnCFG_IND) == 0 ||
+ (tlb0cfg & TLBnCFG_PT) == 0)
+ goto out;
+
+ book3e_htw_mode = PPC_HTW_IBM;
/* Now, we only deal with one IND page size for each
* direct size. Hopefully all implementations today are
@@ -497,8 +533,8 @@ static void setup_page_sizes(void)
def->ind = ps + 10;
}
}
- no_indirect:
+out:
/* Cleanup array and print summary */
pr_info("MMU: Supported page sizes\n");
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
@@ -518,44 +554,28 @@ static void setup_page_sizes(void)
}
}
-static void __patch_exception(int exc, unsigned long addr)
-{
- extern unsigned int interrupt_base_book3e;
- unsigned int *ibase = &interrupt_base_book3e;
-
- /* Our exceptions vectors start with a NOP and -then- a branch
- * to deal with single stepping from userspace which stops on
- * the second instruction. Thus we need to patch the second
- * instruction of the exception, not the first one
- */
-
- patch_branch(ibase + (exc / 4) + 1, addr, 0);
-}
-
-#define patch_exception(exc, name) do { \
- extern unsigned int name; \
- __patch_exception((exc), (unsigned long)&name); \
-} while (0)
-
static void setup_mmu_htw(void)
{
- /* Check if HW tablewalk is present, and if yes, enable it by:
- *
- * - patching the TLB miss handlers to branch to the
- * one dedicates to it
- *
- * - setting the global book3e_htw_enabled
- */
- unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+ /*
+ * If we want to use HW tablewalk, enable it by patching the TLB miss
+ * handlers to branch to the one dedicated to it.
+ */
- if ((tlb0cfg & TLBnCFG_IND) &&
- (tlb0cfg & TLBnCFG_PT)) {
+ switch (book3e_htw_mode) {
+ case PPC_HTW_IBM:
patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
- book3e_htw_enabled = 1;
+ break;
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ case PPC_HTW_E6500:
+ extlb_level_exc = EX_TLB_SIZE;
+ patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+ patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+ break;
+#endif
}
pr_info("MMU: Book3E HW tablewalk %s\n",
- book3e_htw_enabled ? "enabled" : "not supported");
+ book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
}
/*
@@ -576,8 +596,13 @@ static void __early_init_mmu(int boot_cpu)
/* XXX This should be decided at runtime based on supported
* page sizes in the TLB, but for now let's assume 16M is
* always there and a good fit (which it probably is)
+ *
+ * Freescale booke only supports 4K pages in TLB0, so use that.
*/
- mmu_vmemmap_psize = MMU_PAGE_16M;
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+ else
+ mmu_vmemmap_psize = MMU_PAGE_16M;
/* XXX This code only checks for TLB 0 capabilities and doesn't
* check what page size combos are supported by the HW. It
@@ -595,8 +620,16 @@ static void __early_init_mmu(int boot_cpu)
/* Set MAS4 based on page table setting */
mas4 = 0x4 << MAS4_WIMGED_SHIFT;
- if (book3e_htw_enabled) {
- mas4 |= mas4 | MAS4_INDD;
+ switch (book3e_htw_mode) {
+ case PPC_HTW_E6500:
+ mas4 |= MAS4_INDD;
+ mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+ mas4 |= MAS4_TLBSELD(1);
+ mmu_pte_psize = MMU_PAGE_2M;
+ break;
+
+ case PPC_HTW_IBM:
+ mas4 |= MAS4_INDD;
#ifdef CONFIG_PPC_64K_PAGES
mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
mmu_pte_psize = MMU_PAGE_256M;
@@ -604,13 +637,16 @@ static void __early_init_mmu(int boot_cpu)
mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
mmu_pte_psize = MMU_PAGE_1M;
#endif
- } else {
+ break;
+
+ case PPC_HTW_NONE:
#ifdef CONFIG_PPC_64K_PAGES
mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
#else
mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
#endif
mmu_pte_psize = mmu_virtual_psize;
+ break;
}
mtspr(SPRN_MAS4, mas4);
@@ -630,8 +666,12 @@ static void __early_init_mmu(int boot_cpu)
/* limit memory so we dont have linear faults */
memblock_enforce_memory_limit(linear_map_top);
- patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
- patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
+ if (book3e_htw_mode == PPC_HTW_NONE) {
+ extlb_level_exc = EX_TLB_SIZE;
+ patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
+ patch_exception(0x1e0,
+ exc_instruction_tlb_miss_bolted_book3e);
+ }
}
#endif
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 626ad081639f..43ff3c797fbf 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -402,7 +402,9 @@ _GLOBAL(set_context)
* Load TLBCAM[index] entry in to the L2 CAM MMU
*/
_GLOBAL(loadcam_entry)
- LOAD_REG_ADDR(r4, TLBCAM)
+ mflr r5
+ LOAD_REG_ADDR_PIC(r4, TLBCAM)
+ mtlr r5
mulli r5,r3,TLBCAM_SIZE
add r3,r5,r4
lwz r4,TLBCAM_MAS0(r3)