aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile5
-rw-r--r--arch/powerpc/mm/copro_fault.c3
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c551
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c442
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hash_native_64.c40
-rw-r--r--arch/powerpc/mm/hash_utils_64.c28
-rw-r--r--arch/powerpc/mm/hugetlbpage.c216
-rw-r--r--arch/powerpc/mm/init-common.c107
-rw-r--r--arch/powerpc/mm/init_64.c77
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c6
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c60
-rw-r--r--arch/powerpc/mm/numa.c13
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c3
-rw-r--r--arch/powerpc/mm/pgtable-radix.c58
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/pgtable_32.c37
-rw-r--r--arch/powerpc/mm/pgtable_64.c34
-rw-r--r--arch/powerpc/mm/tlb-radix.c18
-rw-r--r--arch/powerpc/mm/tlb_nohash.c21
20 files changed, 1394 insertions, 344 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1a4e570f7894..7414034df1c3 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,7 +7,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
- init_$(BITS).o pgtable_$(BITS).o
+ init_$(BITS).o pgtable_$(BITS).o \
+ init-common.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
@@ -42,3 +43,5 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
+obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
+obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 362954f98029..aaa7ec6788b9 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -134,6 +134,9 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
return 1;
}
+ /* Bad address */
+ if (!vsid)
+ return 1;
vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
new file mode 100644
index 000000000000..d979709a0239
--- /dev/null
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel virtual memory and dumps the pages that are in
+ * the hash pagetable, along with their flags to
+ * /sys/kernel/debug/kernel_hash_pagetable.
+ *
+ * If radix is enabled then there is no hash page table and so no debugfs file
+ * is generated.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/plpar_wrappers.h>
+#include <linux/memblock.h>
+#include <asm/firmware.h>
+
+struct pg_state {
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned int level;
+ u64 current_flags;
+};
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+ { 0, "Start of kernel VM" },
+ { 0, "vmalloc() Area" },
+ { 0, "vmalloc() End" },
+ { 0, "isa I/O start" },
+ { 0, "isa I/O end" },
+ { 0, "phb I/O start" },
+ { 0, "phb I/O end" },
+ { 0, "I/O remap start" },
+ { 0, "I/O remap end" },
+ { 0, "vmemmap start" },
+ { -1, NULL },
+};
+
+struct flag_info {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+ bool is_val;
+ int shift;
+};
+
+static const struct flag_info v_flag_array[] = {
+ {
+ .mask = SLB_VSID_B,
+ .val = SLB_VSID_B_256M,
+ .set = "ssize: 256M",
+ .clear = "ssize: 1T ",
+ }, {
+ .mask = HPTE_V_SECONDARY,
+ .val = HPTE_V_SECONDARY,
+ .set = "secondary",
+ .clear = "primary ",
+ }, {
+ .mask = HPTE_V_VALID,
+ .val = HPTE_V_VALID,
+ .set = "valid ",
+ .clear = "invalid",
+ }, {
+ .mask = HPTE_V_BOLTED,
+ .val = HPTE_V_BOLTED,
+ .set = "bolted",
+ .clear = "",
+ }
+};
+
+static const struct flag_info r_flag_array[] = {
+ {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWXX,
+ .set = "prot:RW--",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWRX,
+ .set = "prot:RWR-",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWRW,
+ .set = "prot:RWRW",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RXRX,
+ .set = "prot:R-R-",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RXXX,
+ .set = "prot:R---",
+ }, {
+ .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+ .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+ .set = "key",
+ .clear = "",
+ .is_val = true,
+ }, {
+ .mask = HPTE_R_R,
+ .val = HPTE_R_R,
+ .set = "ref",
+ .clear = " ",
+ }, {
+ .mask = HPTE_R_C,
+ .val = HPTE_R_C,
+ .set = "changed",
+ .clear = " ",
+ }, {
+ .mask = HPTE_R_N,
+ .val = HPTE_R_N,
+ .set = "no execute",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_W,
+ .set = "writethru",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_I,
+ .set = "no cache",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_G,
+ .set = "guarded",
+ }
+};
+
+static int calculate_pagesize(struct pg_state *st, int ps, char s[])
+{
+ static const char units[] = "BKMGTPE";
+ const char *unit = units;
+
+ while (ps > 9 && unit[1]) {
+ ps -= 10;
+ unit++;
+ }
+ seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit);
+ return ps;
+}
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+ *flag, u64 pte, int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++, flag++) {
+ const char *s = NULL;
+ u64 val;
+
+ /* flag not defined so don't check it */
+ if (flag->mask == 0)
+ continue;
+ /* Some 'flags' are actually values */
+ if (flag->is_val) {
+ val = pte & flag->val;
+ if (flag->shift)
+ val = val >> flag->shift;
+ seq_printf(st->seq, " %s:%llx", flag->set, val);
+ } else {
+ if ((pte & flag->mask) == flag->val)
+ s = flag->set;
+ else
+ s = flag->clear;
+ if (s)
+ seq_printf(st->seq, " %s", s);
+ }
+ }
+}
+
+static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
+ unsigned long rpn, int bps, int aps, unsigned long lp)
+{
+ int aps_index;
+
+ while (ea >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+ seq_printf(st->seq, "0x%lx:\t", ea);
+ seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v));
+ dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array));
+ seq_printf(st->seq, " rpn: %lx\t", rpn);
+ dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array));
+
+ calculate_pagesize(st, bps, "base");
+ aps_index = calculate_pagesize(st, aps, "actual");
+ if (aps_index != 2)
+ seq_printf(st->seq, "LP enc: %lx", lp);
+ seq_puts(st->seq, "\n");
+}
+
+
+static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
+ *r)
+{
+ struct hash_pte *hptep;
+ unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v;
+ int i, ssize = mmu_kernel_ssize;
+ unsigned long shift = mmu_psize_defs[psize].shift;
+
+ /* calculate hash */
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /* to check in the secondary hash table, we invert the hash */
+ if (!primary)
+ hash = ~hash;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hptep = htab_address + hpte_group;
+ hpte_v = be64_to_cpu(hptep->v);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* HPTE matches */
+ *v = be64_to_cpu(hptep->v);
+ *r = be64_to_cpu(hptep->r);
+ return 0;
+ }
+ ++hpte_group;
+ }
+ return -1;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
+{
+ struct hash_pte ptes[4];
+ unsigned long vsid, vpn, hash, hpte_group, want_v;
+ int i, j, ssize = mmu_kernel_ssize;
+ long lpar_rc = 0;
+ unsigned long shift = mmu_psize_defs[psize].shift;
+
+ /* calculate hash */
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /* to check in the secondary hash table, we invert the hash */
+ if (!primary)
+ hash = ~hash;
+ hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+ /* see if we can find an entry in the hpte with this hash */
+ for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
+ lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+
+ if (lpar_rc != H_SUCCESS)
+ continue;
+ for (j = 0; j < 4; j++) {
+ if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
+ (ptes[j].v & HPTE_V_VALID)) {
+ /* HPTE matches */
+ *v = ptes[j].v;
+ *r = ptes[j].r;
+ return 0;
+ }
+ }
+ }
+ return -1;
+}
+#endif
+
+static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
+ unsigned long *lp_bits)
+{
+ struct mmu_psize_def entry;
+ unsigned long arpn, mask, lp;
+ int penc = -2, idx = 0, shift;
+
+ /*.
+ * The LP field has 8 bits. Depending on the actual page size, some of
+ * these bits are concatenated with the APRN to get the RPN. The rest
+ * of the bits in the LP field is the LP value and is an encoding for
+ * the base page size and the actual page size.
+ *
+ * - find the mmu entry for our base page size
+ * - go through all page encodings and use the associated mask to
+ * find an encoding that matches our encoding in the LP field.
+ */
+ arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+ lp = arpn & 0xff;
+
+ entry = mmu_psize_defs[bps];
+ while (idx < MMU_PAGE_COUNT) {
+ penc = entry.penc[idx];
+ if ((penc != -1) && (mmu_psize_defs[idx].shift)) {
+ shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT;
+ mask = (0x1 << (shift)) - 1;
+ if ((lp & mask) == penc) {
+ *aps = mmu_psize_to_shift(idx);
+ *lp_bits = lp & mask;
+ *rpn = arpn >> shift;
+ return;
+ }
+ }
+ idx++;
+ }
+}
+
+static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
+ u64 *r)
+{
+#ifdef CONFIG_PPC_PSERIES
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return pseries_find(ea, psize, primary, v, r);
+#endif
+ return native_find(ea, psize, primary, v, r);
+}
+
+static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
+{
+ unsigned long slot;
+ u64 v = 0, r = 0;
+ unsigned long rpn, lp_bits;
+ int base_psize = 0, actual_psize = 0;
+
+ if (ea <= PAGE_OFFSET)
+ return -1;
+
+ /* Look in primary table */
+ slot = base_hpte_find(ea, psize, true, &v, &r);
+
+ /* Look in secondary table */
+ if (slot == -1)
+ slot = base_hpte_find(ea, psize, true, &v, &r);
+
+ /* No entry found */
+ if (slot == -1)
+ return -1;
+
+ /*
+ * We found an entry in the hash page table:
+ * - check that this has the same base page
+ * - find the actual page size
+ * - find the RPN
+ */
+ base_psize = mmu_psize_to_shift(psize);
+
+ if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) {
+ decode_r(psize, r, &rpn, &actual_psize, &lp_bits);
+ } else {
+ /* 4K actual page size */
+ actual_psize = 12;
+ rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+ /* In this case there are no LP bits */
+ lp_bits = -1;
+ }
+ /*
+ * We didn't find a matching encoding, so the PTE we found isn't for
+ * this address.
+ */
+ if (actual_psize == -1)
+ return -1;
+
+ dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits);
+ return 0;
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+ unsigned long addr, pteval, psize;
+ int i, status;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ addr = start + i * PAGE_SIZE;
+ pteval = pte_val(*pte);
+
+ if (addr < VMALLOC_END)
+ psize = mmu_vmalloc_psize;
+ else
+ psize = mmu_io_psize;
+#ifdef CONFIG_PPC_64K_PAGES
+ /* check for secret 4K mappings */
+ if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
+ ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+ psize = mmu_io_psize;
+#endif
+ /* check for hashpte */
+ status = hpte_find(st, addr, psize);
+
+ if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE)
+ && (status != -1)) {
+ /* found a hpte that is not in the linux page tables */
+ seq_printf(st->seq, "page probably bolted before linux"
+ " pagetables were set: addr:%lx, pteval:%lx\n",
+ addr, pteval);
+ }
+ }
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ addr = start + i * PMD_SIZE;
+ if (!pmd_none(*pmd))
+ /* pmd exists */
+ walk_pte(st, pmd, addr);
+ }
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+ pud_t *pud = pud_offset(pgd, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ addr = start + i * PUD_SIZE;
+ if (!pud_none(*pud))
+ /* pud exists */
+ walk_pmd(st, pud, addr);
+ }
+}
+
+static void walk_pagetables(struct pg_state *st)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ unsigned int i;
+ unsigned long addr;
+
+ /*
+ * Traverse the linux pagetable structure and dump pages that are in
+ * the hash pagetable.
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ addr = KERN_VIRT_START + i * PGDIR_SIZE;
+ if (!pgd_none(*pgd))
+ /* pgd exists */
+ walk_pud(st, pgd, addr);
+ }
+}
+
+
+static void walk_linearmapping(struct pg_state *st)
+{
+ unsigned long addr;
+
+ /*
+ * Traverse the linear mapping section of virtual memory and dump pages
+ * that are in the hash pagetable.
+ */
+ unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift;
+
+ for (addr = PAGE_OFFSET; addr < PAGE_OFFSET +
+ memblock_phys_mem_size(); addr += psize)
+ hpte_find(st, addr, mmu_linear_psize);
+}
+
+static void walk_vmemmap(struct pg_state *st)
+{
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ struct vmemmap_backing *ptr = vmemmap_list;
+
+ /*
+ * Traverse the vmemmaped memory and dump pages that are in the hash
+ * pagetable.
+ */
+ while (ptr->list) {
+ hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
+ ptr = ptr->list;
+ }
+ seq_puts(st->seq, "---[ vmemmap end ]---\n");
+#endif
+}
+
+static void populate_markers(void)
+{
+ address_markers[0].start_address = PAGE_OFFSET;
+ address_markers[1].start_address = VMALLOC_START;
+ address_markers[2].start_address = VMALLOC_END;
+ address_markers[3].start_address = ISA_IO_BASE;
+ address_markers[4].start_address = ISA_IO_END;
+ address_markers[5].start_address = PHB_IO_BASE;
+ address_markers[6].start_address = PHB_IO_END;
+ address_markers[7].start_address = IOREMAP_BASE;
+ address_markers[8].start_address = IOREMAP_END;
+#ifdef CONFIG_PPC_STD_MMU_64
+ address_markers[9].start_address = H_VMEMMAP_BASE;
+#else
+ address_markers[9].start_address = VMEMMAP_BASE;
+#endif
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct pg_state st = {
+ .seq = m,
+ .start_address = PAGE_OFFSET,
+ .marker = address_markers,
+ };
+ /*
+ * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and
+ * dump pages that are in the hash pagetable.
+ */
+ walk_linearmapping(&st);
+ walk_pagetables(&st);
+ walk_vmemmap(&st);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int ptdump_init(void)
+{
+ struct dentry *debugfs_file;
+
+ if (!radix_enabled()) {
+ populate_markers();
+ debugfs_file = debugfs_create_file("kernel_hash_pagetable",
+ 0400, NULL, NULL, &ptdump_fops);
+ return debugfs_file ? 0 : -ENOMEM;
+ }
+ return 0;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
new file mode 100644
index 000000000000..49abaf4dc8e3
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel pagetables and dumps the
+ * information about the used sections of memory to
+ * /sys/kernel/debug/kernel_pagetables.
+ *
+ * Derived from the arm64 implementation:
+ * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
+ * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+
+/*
+ * To visualise what is happening,
+ *
+ * - PTRS_PER_P** = how many entries there are in the corresponding P**
+ * - P**_SHIFT = how many bits of the address we use to index into the
+ * corresponding P**
+ * - P**_SIZE is how much memory we can access through the table - not the
+ * size of the table itself.
+ * P**={PGD, PUD, PMD, PTE}
+ *
+ *
+ * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
+ * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
+ * a page.
+ *
+ * In the case where there are only 3 levels, the PUD is folded into the
+ * PGD: every PUD has only one entry which points to the PMD.
+ *
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the PTE entries. When the continuity is broken it then
+ * dumps out a description of the range - ie PTEs that are virtually contiguous
+ * with the same PTE flags are chunked together. This is to make it clear how
+ * different areas of the kernel virtual memory are used.
+ *
+ */
+struct pg_state {
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned int level;
+ u64 current_flags;
+};
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+ { 0, "Start of kernel VM" },
+ { 0, "vmalloc() Area" },
+ { 0, "vmalloc() End" },
+ { 0, "isa I/O start" },
+ { 0, "isa I/O end" },
+ { 0, "phb I/O start" },
+ { 0, "phb I/O end" },
+ { 0, "I/O remap start" },
+ { 0, "I/O remap end" },
+ { 0, "vmemmap start" },
+ { -1, NULL },
+};
+
+struct flag_info {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+ bool is_val;
+ int shift;
+};
+
+static const struct flag_info flag_array[] = {
+ {
+#ifdef CONFIG_PPC_STD_MMU_64
+ .mask = _PAGE_PRIVILEGED,
+ .val = 0,
+#else
+ .mask = _PAGE_USER,
+ .val = _PAGE_USER,
+#endif
+ .set = "user",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_RW,
+ .val = _PAGE_RW,
+ .set = "rw",
+ .clear = "ro",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PTE,
+ .val = _PAGE_PTE,
+ .set = "pte",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "present",
+ .clear = " ",
+ }, {
+#ifdef CONFIG_PPC_STD_MMU_64
+ .mask = H_PAGE_HASHPTE,
+ .val = H_PAGE_HASHPTE,
+#else
+ .mask = _PAGE_HASHPTE,
+ .val = _PAGE_HASHPTE,
+#endif
+ .set = "hpte",
+ .clear = " ",
+ }, {
+#ifndef CONFIG_PPC_STD_MMU_64
+ .mask = _PAGE_GUARDED,
+ .val = _PAGE_GUARDED,
+ .set = "guarded",
+ .clear = " ",
+ }, {
+#endif
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+#ifndef CONFIG_PPC_STD_MMU_64
+ .mask = _PAGE_WRITETHRU,
+ .val = _PAGE_WRITETHRU,
+ .set = "write through",
+ .clear = " ",
+ }, {
+#endif
+ .mask = _PAGE_NO_CACHE,
+ .val = _PAGE_NO_CACHE,
+ .set = "no cache",
+ .clear = " ",
+ }, {
+#ifdef CONFIG_PPC_BOOK3S_64
+ .mask = H_PAGE_BUSY,
+ .val = H_PAGE_BUSY,
+ .set = "busy",
+ }, {
+#ifdef CONFIG_PPC_64K_PAGES
+ .mask = H_PAGE_COMBO,
+ .val = H_PAGE_COMBO,
+ .set = "combo",
+ }, {
+ .mask = H_PAGE_4K_PFN,
+ .val = H_PAGE_4K_PFN,
+ .set = "4K_pfn",
+ }, {
+#endif
+ .mask = H_PAGE_F_GIX,
+ .val = H_PAGE_F_GIX,
+ .set = "f_gix",
+ .is_val = true,
+ .shift = H_PAGE_F_GIX_SHIFT,
+ }, {
+ .mask = H_PAGE_F_SECOND,
+ .val = H_PAGE_F_SECOND,
+ .set = "f_second",
+ }, {
+#endif
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct pgtable_level {
+ const struct flag_info *flag;
+ size_t num;
+ u64 mask;
+};
+
+static struct pgtable_level pg_level[] = {
+ {
+ }, { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+ *flag, u64 pte, int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++, flag++) {
+ const char *s = NULL;
+ u64 val;
+
+ /* flag not defined so don't check it */
+ if (flag->mask == 0)
+ continue;
+ /* Some 'flags' are actually values */
+ if (flag->is_val) {
+ val = pte & flag->val;
+ if (flag->shift)
+ val = val >> flag->shift;
+ seq_printf(st->seq, " %s:%llx", flag->set, val);
+ } else {
+ if ((pte & flag->mask) == flag->val)
+ s = flag->set;
+ else
+ s = flag->clear;
+ if (s)
+ seq_printf(st->seq, " %s", s);
+ }
+ st->current_flags &= ~flag->mask;
+ }
+ if (st->current_flags != 0)
+ seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
+}
+
+static void dump_addr(struct pg_state *st, unsigned long addr)
+{
+ static const char units[] = "KMGTPE";
+ const char *unit = units;
+ unsigned long delta;
+
+ seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1);
+ delta = (addr - st->start_address) >> 10;
+ /* Work out what appropriate unit to use */
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ seq_printf(st->seq, "%9lu%c", delta, *unit);
+
+}
+
+static void note_page(struct pg_state *st, unsigned long addr,
+ unsigned int level, u64 val)
+{
+ u64 flag = val & pg_level[level].mask;
+ /* At first no level is set */
+ if (!st->level) {
+ st->level = level;
+ st->current_flags = flag;
+ st->start_address = addr;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ /*
+ * Dump the section of virtual memory when:
+ * - the PTE flags from one entry to the next differs.
+ * - we change levels in the tree.
+ * - the address is in a different section of memory and is thus
+ * used for a different purpose, regardless of the flags.
+ */
+ } else if (flag != st->current_flags || level != st->level ||
+ addr >= st->marker[1].start_address) {
+
+ /* Check the PTE flags */
+ if (st->current_flags) {
+ dump_addr(st, addr);
+
+ /* Dump all the flags */
+ if (pg_level[st->level].flag)
+ dump_flag_info(st, pg_level[st->level].flag,
+ st->current_flags,
+ pg_level[st->level].num);
+
+ seq_puts(st->seq, "\n");
+ }
+
+ /*
+ * Address indicates we have passed the end of the
+ * current section of virtual memory
+ */
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+ st->start_address = addr;
+ st->current_flags = flag;
+ st->level = level;
+ }
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ addr = start + i * PAGE_SIZE;
+ note_page(st, addr, 4, pte_val(*pte));
+
+ }
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ addr = start + i * PMD_SIZE;
+ if (!pmd_none(*pmd))
+ /* pmd exists */
+ walk_pte(st, pmd, addr);
+ else
+ note_page(st, addr, 3, pmd_val(*pmd));
+ }
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+ pud_t *pud = pud_offset(pgd, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ addr = start + i * PUD_SIZE;
+ if (!pud_none(*pud))
+ /* pud exists */
+ walk_pmd(st, pud, addr);
+ else
+ note_page(st, addr, 2, pud_val(*pud));
+ }
+}
+
+static void walk_pagetables(struct pg_state *st)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ unsigned int i;
+ unsigned long addr;
+
+ /*
+ * Traverse the linux pagetable structure and dump pages that are in
+ * the hash pagetable.
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ addr = KERN_VIRT_START + i * PGDIR_SIZE;
+ if (!pgd_none(*pgd))
+ /* pgd exists */
+ walk_pud(st, pgd, addr);
+ else
+ note_page(st, addr, 1, pgd_val(*pgd));
+ }
+}
+
+static void populate_markers(void)
+{
+ address_markers[0].start_address = PAGE_OFFSET;
+ address_markers[1].start_address = VMALLOC_START;
+ address_markers[2].start_address = VMALLOC_END;
+ address_markers[3].start_address = ISA_IO_BASE;
+ address_markers[4].start_address = ISA_IO_END;
+ address_markers[5].start_address = PHB_IO_BASE;
+ address_markers[6].start_address = PHB_IO_END;
+ address_markers[7].start_address = IOREMAP_BASE;
+ address_markers[8].start_address = IOREMAP_END;
+#ifdef CONFIG_PPC_STD_MMU_64
+ address_markers[9].start_address = H_VMEMMAP_BASE;
+#else
+ address_markers[9].start_address = VMEMMAP_BASE;
+#endif
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct pg_state st = {
+ .seq = m,
+ .start_address = KERN_VIRT_START,
+ .marker = address_markers,
+ };
+ /* Traverse kernel page tables */
+ walk_pagetables(&st);
+ note_page(&st, 0, 0, 0);
+ return 0;
+}
+
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void build_pgtable_complete_mask(void)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ if (pg_level[i].flag)
+ for (j = 0; j < pg_level[i].num; j++)
+ pg_level[i].mask |= pg_level[i].flag[j].mask;
+}
+
+static int ptdump_init(void)
+{
+ struct dentry *debugfs_file;
+
+ populate_markers();
+ build_pgtable_complete_mask();
+ debugfs_file = debugfs_create_file("kernel_pagetables", 0400, NULL,
+ NULL, &ptdump_fops);
+ return debugfs_file ? 0 : -ENOMEM;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index d0b137d96df1..6fd30ac7d14a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -391,6 +391,20 @@ good_area:
if (is_exec) {
/*
+ * An execution fault + no execute ?
+ *
+ * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we
+ * deliberately create NX mappings, and use the fault to do the
+ * cache flush. This is usually handled in hash_page_do_lazy_icache()
+ * but we could end up here if that races with a concurrent PTE
+ * update. In that case we need to fall through here to the VMA
+ * check below.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+ (regs->msr & SRR1_ISI_N_OR_G))
+ goto bad_area;
+
+ /*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
*
@@ -404,6 +418,7 @@ good_area:
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
+
#ifdef CONFIG_PPC_STD_MMU
/*
* protfault should only happen due to us
@@ -512,7 +527,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = entry->fixup;
+ regs->nip = extable_fixup(entry);
return;
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 83ddc0e171b0..cc332608e656 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -123,8 +123,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
va |= ssize << 8;
sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
- asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
- : : "r"(va) : "memory");
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1)
+ : : "r" (va), "i" (CPU_FTR_ARCH_206)
+ : "memory");
break;
default:
/* We need 14 to 14 + i bits of va */
@@ -141,8 +142,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
*/
va |= (vpn & 0xfe);
va |= 1; /* L */
- asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
- : : "r"(va) : "memory");
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1)
+ : : "r" (va), "i" (CPU_FTR_ARCH_206)
+ : "memory");
break;
}
@@ -221,13 +223,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+ hpte_v = hpte_old_to_new_v(hpte_v);
+ }
+
hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */
eieio();
@@ -295,6 +302,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
vpn, want_v & HPTE_V_AVPN, slot, newpp);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -309,6 +318,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep);
/* recheck with locks held */
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) {
ret = -1;
@@ -350,6 +361,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
@@ -409,6 +422,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
@@ -467,6 +482,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -504,6 +521,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+ hpte_r = hpte_new_to_old_r(hpte_r);
+ }
if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
@@ -512,11 +533,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
- else
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
-
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -639,6 +656,9 @@ static void native_flush_hash_range(unsigned long number, int local)
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v,
+ be64_to_cpu(hptep->r));
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 78dabf065ba9..8410b4bb36ed 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -796,37 +796,17 @@ static void update_hid_for_hash(void)
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
- unsigned long ps_field;
- unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ mmu_partition_table_init();
/*
- * slb llp encoding for the page size used in VPM real mode.
- * We can ignore that for lpid 0
+ * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+ * For now, UPRT is 0 and we have no segment table.
*/
- ps_field = 0;
htab_size = __ilog2(htab_size) - 18;
-
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
- /* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
- partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
- /*
- * FIXME!! This should be done via update_partition table
- * For now UPRT is 0 for us.
- */
- partition_tb->patb1 = 0;
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
update_hid_for_hash();
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
-
}
static void __init htab_initialize(void)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a5d3ecdabc44..289df38fb7e0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -26,6 +26,8 @@
#ifdef CONFIG_HUGETLB_PAGE
#define PAGE_SHIFT_64K 16
+#define PAGE_SHIFT_512K 19
+#define PAGE_SHIFT_8M 23
#define PAGE_SHIFT_16M 24
#define PAGE_SHIFT_16G 34
@@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT;
* implementations may have more than one gpage size, so we need multiple
* arrays
*/
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define MAX_NUMBER_GPAGES 128
struct psize_gpages {
u64 gpage_list[MAX_NUMBER_GPAGES];
@@ -64,14 +66,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
{
struct kmem_cache *cachep;
pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
- int num_hugepd = 1 << (pshift - pdshift);
- cachep = hugepte_cache;
-#else
- cachep = PGT_CACHE(pdshift - pshift);
-#endif
+ int num_hugepd;
+
+ if (pshift >= pdshift) {
+ cachep = hugepte_cache;
+ num_hugepd = 1 << (pshift - pdshift);
+ } else {
+ cachep = PGT_CACHE(pdshift - pshift);
+ num_hugepd = 1;
+ }
new = kmem_cache_zalloc(cachep, GFP_KERNEL);
@@ -89,7 +93,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
smp_wmb();
spin_lock(&mm->page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+
/*
* We have multiple higher-level entries that point to the same
* actual pte location. Fill in each as we go and backtrack on error.
@@ -100,8 +104,18 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+#ifdef CONFIG_PPC_BOOK3S_64
+ hpdp->pd = __pa(new) |
+ (shift_to_mmu_psize(pshift) << 2);
+#elif defined(CONFIG_PPC_8xx)
+ hpdp->pd = __pa(new) |
+ (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
+ _PMD_PAGE_512K) |
+ _PMD_PRESENT;
+#else
/* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+#endif
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -109,17 +123,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
-#else
- if (!hugepd_none(*hpdp))
- kmem_cache_free(cachep, new);
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
-#else
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
- }
-#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -128,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
* These macros define how to determine which level of the page table holds
* the hpdp.
*/
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
#else
@@ -136,7 +139,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
/*
* At this point we do the placement change only for BOOK3S 64. This would
* possibly work on other subarchs.
@@ -153,6 +155,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
+#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT)
/* 16GB huge page */
return (pte_t *) pg;
@@ -178,32 +181,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
- if (!hpdp)
- return NULL;
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
- return NULL;
-
- return hugepte_offset(*hpdp, addr, pdshift);
-}
-
#else
-
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
-
- pg = pgd_offset(mm, addr);
-
if (pshift >= HUGEPD_PGD_SHIFT) {
hpdp = (hugepd_t *)pg;
} else {
@@ -217,7 +195,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
-
+#endif
if (!hpdp)
return NULL;
@@ -228,9 +206,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(*hpdp, addr, pdshift);
}
-#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy allocator is setup.
*/
@@ -310,7 +287,11 @@ static int __init do_gpage_early_setup(char *param, char *val,
npages = 0;
if (npages > MAX_NUMBER_GPAGES) {
pr_warn("MMU: %lu pages requested for page "
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
"size %llu KB, limiting to "
+#else
+ "size %u KB, limiting to "
+#endif
__stringify(MAX_NUMBER_GPAGES) "\n",
npages, size / 1024);
npages = MAX_NUMBER_GPAGES;
@@ -392,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
}
#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -442,6 +423,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
}
put_cpu_var(hugepd_freelist_cur);
}
+#else
+static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
#endif
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
@@ -453,13 +436,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
unsigned long pdmask = ~((1UL << pdshift) - 1);
unsigned int num_hugepd = 1;
+ unsigned int shift = hugepd_shift(*hpdp);
-#ifdef CONFIG_PPC_FSL_BOOK3E
/* Note: On fsl the hpdp may be the first of several */
- num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
-#else
- unsigned int shift = hugepd_shift(*hpdp);
-#endif
+ if (shift > pdshift)
+ num_hugepd = 1 << (shift - pdshift);
start &= pdmask;
if (start < floor)
@@ -475,11 +456,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0;
-#ifdef CONFIG_PPC_FSL_BOOK3E
- hugepd_free(tlb, hugepte);
-#else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
+ if (shift >= pdshift)
+ hugepd_free(tlb, hugepte);
+ else
+ pgtable_free_tlb(tlb, hugepte, pdshift - shift);
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -492,6 +472,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
start = addr;
do {
+ unsigned long more;
+
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
@@ -502,15 +484,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
WARN_ON(!pmd_none_or_clear_bad(pmd));
continue;
}
-#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
addr, next, floor, ceiling);
} while (addr = next, addr != end);
@@ -550,15 +533,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
ceiling);
} else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
+ unsigned long more;
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
addr, next, floor, ceiling);
}
@@ -615,15 +600,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
+ unsigned long more;
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at the pgd level
* for a single hugepage, but all of them point to the
* same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
addr, next, floor, ceiling);
}
@@ -753,12 +740,13 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if ((size < PAGE_SIZE) || !is_power_of_4(size))
+ if (size <= PAGE_SIZE)
return -EINVAL;
-#else
- if (!is_power_of_2(size)
- || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
+#if defined(CONFIG_PPC_FSL_BOOK3E)
+ if (!is_power_of_4(size))
+ return -EINVAL;
+#elif !defined(CONFIG_PPC_8xx)
+ if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
return -EINVAL;
#endif
@@ -791,53 +779,15 @@ static int __init hugepage_setup_sz(char *str)
}
__setup("hugepagesz=", hugepage_setup_sz);
-#ifdef CONFIG_PPC_FSL_BOOK3E
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
int psize;
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- unsigned shift;
-
- if (!mmu_psize_defs[psize].shift)
- continue;
-
- shift = mmu_psize_to_shift(psize);
-
- /* Don't treat normal page sizes as huge... */
- if (shift != PAGE_SHIFT)
- if (add_huge_page_size(1ULL << shift) < 0)
- continue;
- }
-
- /*
- * Create a kmem cache for hugeptes. The bottom bits in the pte have
- * size information encoded in them, so align them to allow this
- */
- hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
- HUGEPD_SHIFT_MASK + 1, 0, NULL);
- if (hugepte_cache == NULL)
- panic("%s: Unable to create kmem cache for hugeptes\n",
- __func__);
-
- /* Default hpage size = 4M */
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
- else
- panic("%s: Unable to set default huge page size\n", __func__);
-
-
- return 0;
-}
-#else
-static int __init hugetlbpage_init(void)
-{
- int psize;
-
+#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
-
+#endif
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
unsigned pdshift;
@@ -850,9 +800,9 @@ static int __init hugetlbpage_init(void)
if (add_huge_page_size(1ULL << shift) < 0)
continue;
- if (shift < PMD_SHIFT)
+ if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT;
- else if (shift < PUD_SHIFT)
+ else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
@@ -860,14 +810,38 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
*/
- if (pdshift != shift) {
+ if (pdshift > shift) {
pgtable_cache_add(pdshift - shift, NULL);
if (!PGT_CACHE(pdshift - shift))
panic("hugetlbpage_init(): could not create "
"pgtable cache for %d bit pagesize\n", shift);
}
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+ else if (!hugepte_cache) {
+ /*
+ * Create a kmem cache for hugeptes. The bottom bits in
+ * the pte have size information encoded in them, so
+ * align them to allow this
+ */
+ hugepte_cache = kmem_cache_create("hugepte-cache",
+ sizeof(pte_t),
+ HUGEPD_SHIFT_MASK + 1,
+ 0, NULL);
+ if (hugepte_cache == NULL)
+ panic("%s: Unable to create kmem cache "
+ "for hugeptes\n", __func__);
+
+ }
+#endif
}
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+ /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */
+ if (mmu_psize_defs[MMU_PAGE_4M].shift)
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_512K].shift)
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
+#else
/* Set default large page size. Currently, we pick 16M or 1M
* depending on what is available
*/
@@ -877,11 +851,13 @@ static int __init hugetlbpage_init(void)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
else if (mmu_psize_defs[MMU_PAGE_2M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
-
+#endif
+ else
+ panic("%s: Unable to set default huge page size\n", __func__);
return 0;
}
-#endif
+
arch_initcall(hugetlbpage_init);
void flush_dcache_icache_hugepage(struct page *page)
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
new file mode 100644
index 000000000000..a175cd82ae8c
--- /dev/null
+++ b/arch/powerpc/mm/init-common.c
@@ -0,0 +1,107 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * Dave Engebretsen <engebret@us.ibm.com>
+ * Rework for PPC64 port.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+static void pgd_ctor(void *addr)
+{
+ memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static void pud_ctor(void *addr)
+{
+ memset(addr, 0, PUD_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+ memset(addr, 0, PMD_TABLE_SIZE);
+}
+
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
+
+/*
+ * Create a kmem_cache() for pagetables. This is not used for PTE
+ * pages - they're linked to struct page, come from the normal free
+ * pages pool and have a different entry size (see real_pte_t) to
+ * everything else. Caches created by this function are used for all
+ * the higher level pagetables, and for hugepage pagetables.
+ */
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+{
+ char *name;
+ unsigned long table_size = sizeof(void *) << shift;
+ unsigned long align = table_size;
+
+ /* When batching pgtable pointers for RCU freeing, we store
+ * the index size in the low bits. Table alignment must be
+ * big enough to fit it.
+ *
+ * Likewise, hugeapge pagetable pointers contain a (different)
+ * shift value in the low bits. All tables must be aligned so
+ * as to leave enough 0 bits in the address to contain it. */
+ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
+ HUGEPD_SHIFT_MASK + 1);
+ struct kmem_cache *new;
+
+ /* It would be nice if this was a BUILD_BUG_ON(), but at the
+ * moment, gcc doesn't seem to recognize is_power_of_2 as a
+ * constant expression, so so much for that. */
+ BUG_ON(!is_power_of_2(minalign));
+ BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
+
+ if (PGT_CACHE(shift))
+ return; /* Already have a cache of this size */
+
+ align = max_t(unsigned long, align, minalign);
+ name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+ new = kmem_cache_create(name, table_size, align, 0, ctor);
+ kfree(name);
+ pgtable_cache[shift - 1] = new;
+ pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
+
+
+void pgtable_cache_init(void)
+{
+ pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+
+ if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE))
+ pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ /*
+ * In all current configs, when the PUD index exists it's the
+ * same size as either the pgd or pmd index except with THP enabled
+ * on book3s 64
+ */
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
+ if (!PGT_CACHE(PGD_INDEX_SIZE))
+ panic("Couldn't allocate pgd cache");
+ if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE))
+ panic("Couldn't allocate pmd pgtable caches");
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ panic("Couldn't allocate pud pgtable caches");
+}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 16ada1eb7e26..a000c3585390 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -80,83 +80,6 @@ EXPORT_SYMBOL_GPL(memstart_addr);
phys_addr_t kernstart_addr;
EXPORT_SYMBOL_GPL(kernstart_addr);
-static void pgd_ctor(void *addr)
-{
- memset(addr, 0, PGD_TABLE_SIZE);
-}
-
-static void pud_ctor(void *addr)
-{
- memset(addr, 0, PUD_TABLE_SIZE);
-}
-
-static void pmd_ctor(void *addr)
-{
- memset(addr, 0, PMD_TABLE_SIZE);
-}
-
-struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-
-/*
- * Create a kmem_cache() for pagetables. This is not used for PTE
- * pages - they're linked to struct page, come from the normal free
- * pages pool and have a different entry size (see real_pte_t) to
- * everything else. Caches created by this function are used for all
- * the higher level pagetables, and for hugepage pagetables.
- */
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
-{
- char *name;
- unsigned long table_size = sizeof(void *) << shift;
- unsigned long align = table_size;
-
- /* When batching pgtable pointers for RCU freeing, we store
- * the index size in the low bits. Table alignment must be
- * big enough to fit it.
- *
- * Likewise, hugeapge pagetable pointers contain a (different)
- * shift value in the low bits. All tables must be aligned so
- * as to leave enough 0 bits in the address to contain it. */
- unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
- HUGEPD_SHIFT_MASK + 1);
- struct kmem_cache *new;
-
- /* It would be nice if this was a BUILD_BUG_ON(), but at the
- * moment, gcc doesn't seem to recognize is_power_of_2 as a
- * constant expression, so so much for that. */
- BUG_ON(!is_power_of_2(minalign));
- BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
-
- if (PGT_CACHE(shift))
- return; /* Already have a cache of this size */
-
- align = max_t(unsigned long, align, minalign);
- name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
- new = kmem_cache_create(name, table_size, align, 0, ctor);
- kfree(name);
- pgtable_cache[shift - 1] = new;
- pr_debug("Allocated pgtable cache for order %d\n", shift);
-}
-
-
-void pgtable_cache_init(void)
-{
- pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
- pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
- /*
- * In all current configs, when the PUD index exists it's the
- * same size as either the pgd or pmd index except with THP enabled
- * on book3s 64
- */
- if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
- pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
-
- if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
- panic("Couldn't allocate pgtable caches");
- if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
- panic("Couldn't allocate pud pgtable caches");
-}
-
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Given an address within the vmemmap, determine the pfn of the page that
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index b114f8b93ec9..73bf6e14c3aa 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.pte_frag = NULL;
#endif
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&mm->context);
+ mm_iommu_init(mm);
#endif
return 0;
}
@@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm)
}
#endif
-
void destroy_context(struct mm_struct *mm)
{
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_cleanup(&mm->context);
+ WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
-
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
kfree(mm->context.cop_lockp);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e0f1c33601dd..104bad029ce9 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
}
pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
- current->pid,
+ current ? current->pid : 0,
incr ? '+' : '-',
npages << PAGE_SHIFT,
mm->locked_vm << PAGE_SHIFT,
@@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
return ret;
}
-bool mm_iommu_preregistered(void)
+bool mm_iommu_preregistered(struct mm_struct *mm)
{
- if (!current || !current->mm)
- return false;
-
- return !list_empty(&current->mm->context.iommu_group_mem_list);
+ return !list_empty(&mm->context.iommu_group_mem_list);
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
@@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_get(unsigned long ua, unsigned long entries,
+long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0;
struct page *page = NULL;
- if (!current || !current->mm)
- return -ESRCH; /* process exited */
-
mutex_lock(&mem_list_mutex);
- list_for_each_entry_rcu(mem, &current->mm->context.iommu_group_mem_list,
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
++mem->used;
@@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
}
- ret = mm_iommu_adjust_locked_vm(current->mm, entries, true);
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;
@@ -215,11 +209,11 @@ populate:
mem->entries = entries;
*pmem = mem;
- list_add_rcu(&mem->next, &current->mm->context.iommu_group_mem_list);
+ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
unlock_exit:
if (locked_entries && ret)
- mm_iommu_adjust_locked_vm(current->mm, locked_entries, false);
+ mm_iommu_adjust_locked_vm(mm, locked_entries, false);
mutex_unlock(&mem_list_mutex);
@@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head)
static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
{
list_del_rcu(&mem->next);
- mm_iommu_adjust_locked_vm(current->mm, mem->entries, false);
call_rcu(&mem->rcu, mm_iommu_free);
}
-long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
+long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
- if (!current || !current->mm)
- return -ESRCH; /* process exited */
-
mutex_lock(&mem_list_mutex);
if (mem->used == 0) {
@@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
/* @mapped became 0 so now mappings are disabled, release the region */
mm_iommu_release(mem);
+ mm_iommu_adjust_locked_vm(mm, mem->entries, false);
+
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -304,14 +296,12 @@ unlock_exit:
}
EXPORT_SYMBOL_GPL(mm_iommu_put);
-struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
- unsigned long size)
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
- list_for_each_entry_rcu(mem,
- &current->mm->context.iommu_group_mem_list,
- next) {
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua <= ua) &&
(ua + size <= mem->ua +
(mem->entries << PAGE_SHIFT))) {
@@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
}
EXPORT_SYMBOL_GPL(mm_iommu_lookup);
-struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
- unsigned long entries)
+struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
- list_for_each_entry_rcu(mem,
- &current->mm->context.iommu_group_mem_list,
- next) {
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
break;
@@ -373,17 +361,7 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
}
EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
-void mm_iommu_init(mm_context_t *ctx)
+void mm_iommu_init(struct mm_struct *mm)
{
- INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list);
-}
-
-void mm_iommu_cleanup(mm_context_t *ctx)
-{
- struct mm_iommu_table_group_mem_t *mem, *tmp;
-
- list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) {
- list_del_rcu(&mem->next);
- mm_iommu_do_free(mem);
- }
+ INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a51c188b81f3..0cb6bd8bfccf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1085,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr)
int hot_add_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory = NULL;
- int nid, found = 0;
+ int nid;
if (!numa_enabled || (min_common_depth < 0))
return first_online_node;
@@ -1101,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
if (nid < 0 || !node_online(nid))
nid = first_online_node;
- if (NODE_DATA(nid)->node_spanned_pages)
- return nid;
-
- for_each_online_node(nid) {
- if (NODE_DATA(nid)->node_spanned_pages) {
- found = 1;
- break;
- }
- }
-
- BUG_ON(!found);
return nid;
}
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437cbabf1..ebf9782bacf9 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
+ __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp),
+ pmd_pte(entry), address);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 688b54517655..cfa53ccc8baf 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -159,7 +159,7 @@ redo:
* Allocate Partition table and process table for the
* host.
*/
- BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large.");
+ BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
@@ -177,23 +177,15 @@ redo:
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field;
+ unsigned long rts_field, dw0;
+ mmu_partition_table_init();
rts_field = radix__get_tree_size();
+ dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+ mmu_partition_table_set_entry(0, dw0, 0);
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
- partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
- RADIX_PGD_INDEX_SIZE | PATB_HR);
pr_info("Initializing Radix MMU\n");
pr_info("Partition table %p\n", partition_tb);
-
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
}
void __init radix_init_native(void)
@@ -248,7 +240,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
/* top 3 bit is AP encoding */
shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
ap = be32_to_cpu(prop[0]) >> 29;
- pr_info("Page size sift = %d AP=0x%x\n", shift, ap);
+ pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
idx = get_idx_from_shift(shift);
if (idx < 0)
@@ -320,6 +312,38 @@ static void update_hid_for_radix(void)
cpu_relax();
}
+static void radix_init_amor(void)
+{
+ /*
+ * In HV mode, we init AMOR (Authority Mask Override Register) so that
+ * the hypervisor and guest can setup IAMR (Instruction Authority Mask
+ * Register), enable key 0 and set it to 1.
+ *
+ * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
+ */
+ mtspr(SPRN_AMOR, (3ul << 62));
+}
+
+static void radix_init_iamr(void)
+{
+ unsigned long iamr;
+
+ /*
+ * The IAMR should set to 0 on DD1.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ iamr = 0;
+ else
+ iamr = (1ul << 62);
+
+ /*
+ * Radix always uses key0 of the IAMR to determine if an access is
+ * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
+ * fetch.
+ */
+ mtspr(SPRN_IAMR, iamr);
+}
+
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
@@ -376,8 +400,12 @@ void __init radix__early_init_mmu(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
+ radix_init_amor();
}
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+ radix_init_iamr();
radix_init_pgtable();
}
@@ -397,7 +425,9 @@ void radix__early_init_mmu_secondary(void)
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ radix_init_amor();
}
+ radix_init_iamr();
}
void radix__mmu_cleanup_all(void)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 911fdfb63ec1..cb39c8bd2436 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
if (changed) {
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(vma->vm_mm, ptep, entry);
+ __ptep_set_access_flags(vma->vm_mm, ptep, entry, address);
flush_tlb_page(vma, address);
}
return changed;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 0ae0572bc239..a65c0b4c0669 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -42,43 +42,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT)
-
-#ifndef CONFIG_PPC_4K_PAGES
-static struct kmem_cache *pgtable_cache;
-
-void pgtable_cache_init(void)
-{
- pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER,
- 1 << PGDIR_ORDER, 0, NULL);
- if (pgtable_cache == NULL)
- panic("Couldn't allocate pgtable caches");
-}
-#endif
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- pgd_t *ret;
-
- /* pgdir take page or two with 4K pages and a page fraction otherwise */
-#ifndef CONFIG_PPC_4K_PAGES
- ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO);
-#else
- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- PGDIR_ORDER - PAGE_SHIFT);
-#endif
- return ret;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-#ifndef CONFIG_PPC_4K_PAGES
- kmem_cache_free(pgtable_cache, (void *)pgd);
-#else
- free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
-#endif
-}
-
__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4edb808..8bca7f58afc4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /* Global flush of TLBs and partition table caches for this lpid */
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 3493cf4e0452..61b79119065f 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -428,3 +428,21 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+
+void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
+ unsigned long address)
+{
+ /*
+ * We track page size in pte only for DD1, So we can
+ * call this only on DD1.
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ VM_WARN_ON(1);
+ return;
+ }
+
+ if (old_pte & _PAGE_LARGE)
+ radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
+ else
+ radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
+}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 050badc0ebd3..ba28fcb98597 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -53,7 +53,7 @@
* other sizes not listed here. The .ind field is only used on MMUs that have
* indirect page table entries.
*/
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx)
#ifdef CONFIG_PPC_FSL_BOOK3E
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
@@ -85,6 +85,25 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
.enc = BOOK3E_PAGESZ_1GB,
},
};
+#elif defined(CONFIG_PPC_8xx)
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+ /* we only manage 4k and 16k pages as normal pages */
+#ifdef CONFIG_PPC_4K_PAGES
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ },
+#else
+ [MMU_PAGE_16K] = {
+ .shift = 14,
+ },
+#endif
+ [MMU_PAGE_512K] = {
+ .shift = 19,
+ },
+ [MMU_PAGE_8M] = {
+ .shift = 23,
+ },
+};
#else
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {