aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include/asm/book3s/64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/include/asm/book3s/64')
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h95
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h77
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h108
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h70
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h104
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h52
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h13
13 files changed, 435 insertions, 197 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index cf5ba5254299..8fd8599c9395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -2,10 +2,10 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
-#define H_PTE_INDEX_SIZE 9
-#define H_PMD_INDEX_SIZE 7
-#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 9
+#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
+#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
+#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
@@ -13,6 +13,21 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
+#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
+
+/*
+ * Our page table limit us to 64TB. Hence for the kernel mapping,
+ * each MAP area is limited to 16 TB.
+ * The four map areas are: linear mapping, vmap, IO and vmemmap
+ */
+#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 16TB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f82ee8a3b561..d1d9177d9ebd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,16 +2,29 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#define H_PTE_INDEX_SIZE 8
-#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 10
-#define H_PGD_INDEX_SIZE 8
+#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
+#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+
/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49
+#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
* 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 54b7af6cd27f..1d1183048cfd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,6 +29,10 @@
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK (~(0xcUL << 60))
/*
* We store the slot details in the second half of page table.
@@ -42,59 +46,63 @@
#endif
/*
- * Define the address range of the kernel non-linear virtual area. In contrast
- * to the linear mapping, this is managed using the kernel page tables and then
- * inserted into the hash page table to actually take effect, similarly to user
- * mappings.
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
+ * | |
+ * | |
+ * | 512TB/16TB of vmemmap |
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap start
+ * | |
+ * | 512TB/16TB of IO map |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 512TB/16TB of vmap |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
*/
-#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-/*
- * Allow virtual mapping of one context size.
- * 512TB for 64K page size
- * 64TB for 4K page size
- */
-#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
-/*
- * 8TB IO mapping size
- */
-#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
+#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
+#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
-/*
- * The vmalloc space starts at the beginning of the kernel non-linear virtual
- * region, and occupies 504T (64K) or 56T (4K)
- */
-#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
-#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_VMEMMAP_START H_KERN_IO_END
+#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
+#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
-#define H_KERN_IO_START H_VMALLOC_END
+#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
/*
* Region IDs
*/
-#define REGION_SHIFT 60UL
-#define REGION_MASK (0xfUL << REGION_SHIFT)
-#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
-#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
-#define USER_REGION_ID (0UL)
+#define USER_REGION_ID 0
+#define LINEAR_MAP_REGION_ID 1
+#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
/*
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
-
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
-
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
#define _PTEIDX_GROUP_IX 0x7
@@ -103,6 +111,25 @@
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
+static inline int get_region_id(unsigned long ea)
+{
+ int region_id;
+ int id = (ea >> 60UL);
+
+ if (id == 0)
+ return USER_REGION_ID;
+
+ if (ea < H_KERN_VIRT_START)
+ return LINEAR_MAP_REGION_ID;
+
+ VM_BUG_ON(id != 0xc);
+ BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+ region_id = NON_LINEAR_REGION_ID(ea);
+ VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
+ return region_id;
+}
+
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
static inline int hash__pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index ec2a55a553c7..12e150e615b7 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -36,8 +36,8 @@ static inline int hstate_get_psize(struct hstate *hstate)
}
}
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void)
+#define __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED
+static inline bool gigantic_page_runtime_supported(void)
{
/*
* We used gigantic page reservation with hypervisor assist in some case.
@@ -49,7 +49,6 @@ static inline bool gigantic_page_supported(void)
return true;
}
-#endif
/* hugepd entry valid bit */
#define HUGEPD_VAL_BITS (0x8000000000000000UL)
@@ -62,4 +61,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+/*
+ * This should work for other subarchs too. But right now we use the
+ * new format only for 64bit book3s
+ */
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+ /*
+ * We have only four bits to encode, MMU page size
+ */
+ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
+ return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
+}
+
+static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
+{
+ return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
+}
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_page(vma, vmaddr);
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
+
+ return hugepd_page(hpd) + idx;
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ int mmu_psize;
+
+ if (shift > SLICE_HIGH_SHIFT)
+ return -EINVAL;
+
+ mmu_psize = shift_to_mmu_psize(shift);
+
+ /*
+ * We need to make sure that for different page sizes reported by
+ * firmware we only add hugetlb support for page sizes that can be
+ * supported by linux page table layout.
+ * For now we have
+ * Radix: 2M and 1G
+ * Hash: 16M and 16G
+ */
+ if (radix_enabled()) {
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
+ } else {
+ if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+ return -EINVAL;
+ }
+ return mmu_psize;
+}
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index 000000000000..f254de956d6a
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+
+#include <linux/const.h>
+
+#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
+#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
+#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+#define AMR_KUAP_SHIFT 62
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_restore_amr gpr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ ld \gpr, STACK_REGS_KUAP(r1)
+ mtspr SPRN_AMR, \gpr
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ mfspr \gpr1, SPRN_AMR
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+999: tdne \gpr1, \gpr2
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ .ifnb \msr_pr_cr
+ bne \msr_pr_cr, 99f
+ .endif
+ mfspr \gpr1, SPRN_AMR
+ std \gpr1, STACK_REGS_KUAP(r1)
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+ cmpd \use_cr, \gpr1, \gpr2
+ beq \use_cr, 99f
+ // We don't isync here because we very recently entered via rfid
+ mtspr SPRN_AMR, \gpr2
+ isync
+99:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <asm/reg.h>
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static inline void set_kuap(unsigned long value)
+{
+ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return;
+
+ /*
+ * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+ * before and after the move to AMR. See table 6 on page 1134.
+ */
+ isync();
+ mtspr(SPRN_AMR, value);
+ isync();
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ // This is written so we can resolve to a single case at build time
+ if (__builtin_constant_p(to) && to == NULL)
+ set_kuap(AMR_KUAP_BLOCK_WRITE);
+ else if (__builtin_constant_p(from) && from == NULL)
+ set_kuap(AMR_KUAP_BLOCK_READ);
+ else
+ set_kuap(0);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+ (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+ "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..1e4705516a54 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
#endif
#define MAX_VMALLOC_CTX_CNT 1
-#define MAX_MEMMAP_CTX_CNT 1
+#define MAX_IO_CTX_CNT 1
+#define MAX_VMEMMAP_CTX_CNT 1
/*
* 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble.
*
- * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
- * context easily. The +1 is to map the unused 0xe region mapping.
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
- MAX_MEMMAP_CTX_CNT + 2)
-
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
/*
* For platforms that support on 65bit VA we limit the context bits
*/
@@ -657,8 +655,8 @@ extern void slb_set_size(u16 size);
/* 4 bits per slice and we have one slice per 1TB */
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
-
+#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -687,12 +685,41 @@ struct subpage_prot_table {
#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
#else
static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+ u16 user_psize; /* page size index */
+
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+#endif
+ struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
#if 0
/*
* The code below is equivalent to this function for arguments
@@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
/*
* Bad address. We return VSID 0 for that
*/
- if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
return 0;
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
* 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
* 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
* 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
-
- * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
- * 0x00006 - Not used - Can map 0xe000000000000000 range.
- * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
*
- * So we can compute the context from the region (top nibble) by
- * subtracting 11, or 0xc - 1.
+ * vmap, IO, vmemap
+ *
+ * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
*/
static inline unsigned long get_kernel_context(unsigned long ea)
{
- unsigned long region_id = REGION_ID(ea);
+ unsigned long region_id = get_region_id(ea);
unsigned long ctx;
/*
- * For linear mapping we do support multiple context
+ * Depending on Kernel config, kernel region can have one context
+ * or more.
*/
- if (region_id == KERNEL_REGION_ID) {
+ if (region_id == LINEAR_MAP_REGION_ID) {
/*
* We already verified ea to be not beyond the addr limit.
*/
- ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
} else
- ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+ ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
return ctx;
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..74d24201fc4f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -25,15 +25,22 @@ struct mmu_psize_def {
};
};
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
/*
- * For BOOK3s 64 with 4k and 64K linux page size
- * we want to use pointers, because the page table
- * actually store pfn
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
*/
-typedef pte_t *pgtable_t;
-
-#endif /* __ASSEMBLY__ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined(CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS 51
+#else
+#define MAX_PHYSMEM_BITS 46
+#endif
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
@@ -89,16 +96,6 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
typedef struct {
union {
/*
@@ -112,7 +109,6 @@ typedef struct {
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
- u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
atomic_t active_cpus;
@@ -122,27 +118,9 @@ typedef struct {
/* NPU NMMU context */
struct npu_context *npu_context;
+ struct hash_mm_context *hash_context;
-#ifdef CONFIG_PPC_MM_SLICES
- /* SLB page size encodings*/
- unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
- struct slice_mask mask_64k;
-# endif
- struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
- struct slice_mask mask_16m;
- struct slice_mask mask_16g;
-# endif
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
/*
* pagetable fragment support
*/
@@ -163,6 +141,60 @@ typedef struct {
#endif
} mm_context_t;
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->hash_context->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &ctx->hash_context->mask_64k;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &ctx->hash_context->mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &ctx->hash_context->mask_16g;
+#endif
+ BUG_ON(psize != MMU_PAGE_4K);
+
+ return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+ return ctx->hash_context->spt;
+}
+#endif
+
/*
* The current system page and segment sizes
*/
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 138bc2ecc0c4..d45e4449619f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,29 +19,7 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
-extern void pte_fragment_free(unsigned long *, int);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
#ifdef CONFIG_SMP
@@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
+ if (unlikely(!pgd))
+ return pgd;
+
/*
* Don't scan the PGD for pointers, it contains references to PUDs but
* those references are not full pointers and so can't be recognised by
@@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
*pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
}
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
@@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
pgtable_free_tlb(tlb, table, PTE_INDEX);
}
-#define check_pgt_cache() do { } while (0)
-
extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
static inline void update_page_count(int psize, long count)
{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 581f91be9dd4..7dede2e34b70 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end;
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
#define KERN_VIRT_START __kernel_virt_start
-#define KERN_VIRT_SIZE __kernel_virt_size
#define KERN_IO_START __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
@@ -296,8 +298,7 @@ extern unsigned long pci_io_base;
#include <asm/barrier.h>
/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
* the ioremap space
*
* ISA_IO_BASE = KERN_IO_START, 64K reserved area
@@ -310,7 +311,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
#define HAVE_PAGE_AGP
@@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd);
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index 863c3e8286fb..d5f5ab73dc7f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -5,10 +5,11 @@
/*
* For 4K page size supported index is 13/9/9/9
*/
-#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
/*
* One fragment per per page
*/
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index ccb78ca9d0c5..54e33828b0fb 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -5,10 +5,10 @@
/*
* For 64K page size supported index is 13/9/9/5
*/
-#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
/*
* We use a 256 byte PTE page fragment in radix
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 5ab134eeed20..574eca33f893 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -72,19 +72,17 @@
* | |
* | |
* | |
- * +------------------------------+ Kernel IO map end (0xc010000000000000)
+ * +------------------------------+ Kernel vmemmap end (0xc010000000000000)
* | |
+ * | 512TB |
* | |
- * | 1/2 of virtual map |
+ * +------------------------------+ Kernel IO map end/vmemap start
* | |
+ * | 512TB |
* | |
- * +------------------------------+ Kernel IO map start
+ * +------------------------------+ Kernel vmap end/ IO map start
* | |
- * | 1/4 of virtual map |
- * | |
- * +------------------------------+ Kernel vmemap start
- * | |
- * | 1/4 of virtual map |
+ * | 512TB |
* | |
* +------------------------------+ Kernel virt start (0xc008000000000000)
* | |
@@ -93,24 +91,24 @@
* +------------------------------+ Kernel linear (0xc.....)
*/
-#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
-#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
-
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies a quarter of it on radix config.
- * (we keep a quarter for the virtual memmap)
+ * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
*/
+#define RADIX_KERN_MAP_SIZE (1UL << 49)
+
#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
-#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
+#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs.
- */
-#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
-#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+#define RADIX_KERN_IO_START RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
#ifndef __ASSEMBLY__
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..f0d3194ba41b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
#define _ASM_POWERPC_BOOK3S_64_SLICE_H
-#ifdef CONFIG_PPC_MM_SLICES
-
#define SLICE_LOW_SHIFT 28
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,6 @@
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-#else /* CONFIG_PPC_MM_SLICES */
-
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-
-#endif /* CONFIG_PPC_MM_SLICES */
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */