aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c60
-rw-r--r--arch/powerpc/mm/book3s64/Makefile1
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c6
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c105
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c48
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c1
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c114
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c194
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c338
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c12
-rw-r--r--arch/powerpc/mm/book3s64/vphn.c73
-rw-r--r--arch/powerpc/mm/book3s64/vphn.h16
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c312
-rw-r--r--arch/powerpc/mm/fault.c28
-rw-r--r--arch/powerpc/mm/hugetlbpage.c99
-rw-r--r--arch/powerpc/mm/init_64.c5
-rw-r--r--arch/powerpc/mm/ioremap.c99
-rw-r--r--arch/powerpc/mm/ioremap_32.c92
-rw-r--r--arch/powerpc/mm/ioremap_64.c113
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c30
-rw-r--r--arch/powerpc/mm/mem.c84
-rw-r--r--arch/powerpc/mm/mmu_decl.h7
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c16
-rw-r--r--arch/powerpc/mm/nohash/tlb.c3
-rw-r--r--arch/powerpc/mm/numa.c61
-rw-r--r--arch/powerpc/mm/pgtable-frag.c6
-rw-r--r--arch/powerpc/mm/pgtable.c16
-rw-r--r--arch/powerpc/mm/pgtable_32.c157
-rw-r--r--arch/powerpc/mm/pgtable_64.c202
-rw-r--r--arch/powerpc/mm/ptdump/bats.c2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c24
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c43
33 files changed, 1058 insertions, 1311 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0f499db315d6..5e147986400d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,7 +7,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- pgtable-frag.o \
+ pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index e249fbf6b9c3..84d5fab94f8f 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -74,7 +74,7 @@ static int find_free_bat(void)
{
int b;
- if (cpu_has_feature(CPU_FTR_601)) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
for (b = 0; b < 4; b++) {
struct ppc_bat *bat = BATS[b];
@@ -106,7 +106,7 @@ static int find_free_bat(void)
*/
static unsigned int block_size(unsigned long base, unsigned long top)
{
- unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
+ unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
@@ -189,7 +189,7 @@ void mmu_mark_initmem_nx(void)
unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
unsigned long size;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
@@ -227,7 +227,7 @@ void mmu_mark_rodata_ro(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb; i++) {
@@ -259,7 +259,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
flags &= ~_PAGE_COHERENT;
bl = (size >> 17) - 1;
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
/* 603, 604, etc. */
/* Do DBAT first */
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
@@ -297,8 +297,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
/*
* Preload a translation in the hash table
*/
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+void hash_preload(struct mm_struct *mm, unsigned long ea)
{
pmd_t *pmd;
@@ -310,6 +309,39 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
}
/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /* We have to test for regs NULL since init will get here first thing at boot */
+ if (!current->thread.regs)
+ return;
+
+ /* We also avoid filling the hash if not coming from a fault */
+ if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400)
+ return;
+
+ hash_preload(vma->vm_mm, address);
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
@@ -358,6 +390,15 @@ void __init MMU_init_hw(void)
hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
if (lg_n_hpteg > 16)
hash_mb2 = 16 - LG_HPTEG_SIZE;
+
+ /*
+ * When KASAN is selected, there is already an early temporary hash
+ * table and the switch to the final hash table is done later.
+ */
+ if (IS_ENABLED(CONFIG_KASAN))
+ return;
+
+ MMU_init_hw_patch();
}
void __init MMU_init_hw_patch(void)
@@ -400,7 +441,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
BUG_ON(first_memblock_base != 0);
/* 601 can only access 16MB at the moment */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
@@ -418,9 +459,6 @@ void __init setup_kuep(bool disabled)
{
pr_info("Activating Kernel Userspace Execution Prevention\n");
- if (cpu_has_feature(CPU_FTR_601))
- pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
-
if (disabled)
pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
}
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 974b4fc19f4f..fd393b8be14f 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_PPC_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 30d62ffe3310..90ab4f31e2b3 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -41,7 +41,7 @@
#define HPTE_LOCK_BIT (56+3)
#endif
-DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
{
@@ -56,7 +56,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
* tlbiel instruction for hash, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -112,7 +112,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
void hash__tlbiel_all(unsigned int action)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 28ced26f2a00..6c123760164e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -34,6 +34,7 @@
#include <linux/libfdt.h>
#include <linux/pkeys.h>
#include <linux/hugetlb.h>
+#include <linux/cpu.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -61,6 +62,7 @@
#include <asm/ps3.h>
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -271,10 +273,6 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
- /* Make kvm guest trampolines executable */
- if (overlaps_kvm_tmp(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
/*
* If relocatable, check if it overlaps interrupt vectors that
* are copied down to real 0. For relocatable kernel
@@ -684,10 +682,8 @@ static void __init htab_init_page_sizes(void)
if (mmu_psize_defs[MMU_PAGE_16M].shift &&
memblock_phys_mem_size() >= 0x40000000)
mmu_vmemmap_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_64K].shift)
- mmu_vmemmap_psize = MMU_PAGE_64K;
else
- mmu_vmemmap_psize = MMU_PAGE_4K;
+ mmu_vmemmap_psize = mmu_virtual_psize;
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
printk(KERN_DEBUG "Page orders: linear mapping = %d, "
@@ -825,7 +821,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now, UPRT is 0 and we have no segment table.
*/
htab_size = __ilog2(htab_size) - 18;
- mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
pr_info("Partition table %p\n", partition_tb);
}
@@ -859,12 +855,6 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
- /*
- * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
- * to inform the hypervisor that we wish to use the HPT.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -981,7 +971,7 @@ void __init hash__early_init_devtree(void)
htab_scan_page_sizes();
}
-struct hash_mm_context init_hash_mm_context;
+static struct hash_mm_context init_hash_mm_context;
void __init hash__early_init_mmu(void)
{
#ifndef CONFIG_PPC_64K_PAGES
@@ -1077,8 +1067,8 @@ void hash__early_init_mmu_secondary(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
}
/* Initialize SLB */
slb_initialize();
@@ -1462,8 +1452,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
- unsigned long dsisr)
+int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
+ unsigned long msr)
{
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
@@ -1520,8 +1510,8 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
}
#endif
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+static void hash_preload(struct mm_struct *mm, unsigned long ea,
+ bool is_exec, unsigned long trap)
{
int hugepage_shift;
unsigned long vsid;
@@ -1601,6 +1591,57 @@ out_exit:
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+ unsigned long trap;
+ bool is_exec;
+
+ if (radix_enabled()) {
+ prefetch((void *)address);
+ return;
+ }
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /*
+ * We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot.
+ *
+ * We also avoid filling the hash if not coming from a fault.
+ */
+
+ trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+ switch (trap) {
+ case 0x300:
+ is_exec = false;
+ break;
+ case 0x400:
+ is_exec = true;
+ break;
+ default:
+ return;
+ }
+
+ hash_preload(vma->vm_mm, address, is_exec, trap);
+}
+
#ifdef CONFIG_PPC_MEM_KEYS
/*
* Return the protection key associated with the given address and the
@@ -1707,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
/*
* IF we try to do a HUGE PTE update after a withdraw is done.
* we will find the below NULL. This happens when we do
- * split_huge_page_pmd
+ * split_huge_pmd
*/
if (!hpte_slot_array)
return;
@@ -1901,11 +1942,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
*
* For guests on platforms before POWER9, we clamp the it limit to 1G
* to avoid some funky things such as RTAS bugs etc...
+ *
+ * On POWER9 we limit to 1TB in case the host erroneously told us that
+ * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+ * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+ * for virtual real mode addressing and so it doesn't make sense to
+ * have an area larger than 1TB as it can't be addressed.
*/
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
ppc64_rma_size = first_memblock_size;
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+ else
+ ppc64_rma_size = min_t(u64, ppc64_rma_size,
+ 1UL << SID_SHIFT_1T);
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
@@ -1924,10 +1974,16 @@ static int hpt_order_get(void *data, u64 *val)
static int hpt_order_set(void *data, u64 val)
{
+ int ret;
+
if (!mmu_hash_ops.resize_hpt)
return -ENODEV;
- return mmu_hash_ops.resize_hpt(val);
+ cpus_read_lock();
+ ret = mmu_hash_ops.resize_hpt(val);
+ cpus_read_unlock();
+
+ return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
@@ -1950,7 +2006,4 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
- pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
- pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
- pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
}
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 90ee3a89722c..56cc84520577 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -14,6 +14,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/sizes.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
#include <linux/mm_inline.h>
@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t {
u64 dev_hpa; /* Device memory base address */
};
-static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
- unsigned long npages, bool incr)
-{
- long ret = 0, locked, lock_limit;
-
- if (!npages)
- return 0;
-
- down_write(&mm->mmap_sem);
-
- if (incr) {
- locked = mm->locked_vm + npages;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- ret = -ENOMEM;
- else
- mm->locked_vm += npages;
- } else {
- if (WARN_ON_ONCE(npages > mm->locked_vm))
- npages = mm->locked_vm;
- mm->locked_vm -= npages;
- }
-
- pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
- current ? current->pid : 0,
- incr ? '+' : '-',
- npages << PAGE_SHIFT,
- mm->locked_vm << PAGE_SHIFT,
- rlimit(RLIMIT_MEMLOCK));
- up_write(&mm->mmap_sem);
-
- return ret;
-}
-
bool mm_iommu_preregistered(struct mm_struct *mm)
{
return !list_empty(&mm->context.iommu_group_mem_list);
@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entry, chunk;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
- ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ ret = account_locked_vm(mm, entries, true);
if (ret)
return ret;
@@ -162,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
* Allow to use larger than 64k IOMMU pages. Only do that
* if we are backed by hugetlb.
*/
- if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
- struct page *head = compound_head(page);
-
- pageshift = compound_order(head) + PAGE_SHIFT;
- }
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+ pageshift = page_shift(compound_head(page));
mem->pageshift = min(mem->pageshift, pageshift);
/*
* We don't need struct page reference any more, switch
@@ -211,7 +175,7 @@ free_exit:
kfree(mem);
unlock_exit:
- mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+ account_locked_vm(mm, locked_entries, false);
return ret;
}
@@ -311,7 +275,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
unlock_exit:
mutex_unlock(&mem_list_mutex);
- mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+ account_locked_vm(mm, unlock_entries, false);
return ret;
}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 794404d50a85..2d0cb5ba9a47 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -174,7 +174,6 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
- mm->context.npu_context = NULL;
mm->context.hash_context = NULL;
return index;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 01bc9663360d..75483b40fcb1 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -8,10 +8,13 @@
#include <linux/memblock.h>
#include <misc/cxl-base.h>
+#include <asm/debugfs.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/trace.h>
#include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@@ -21,9 +24,6 @@ EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
EXPORT_SYMBOL(__pmd_frag_size_shift);
-int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
@@ -72,7 +72,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(pmd_lockptr(mm, pmdp));
- WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
+ WARN_ON(!(pmd_large(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
@@ -205,37 +205,61 @@ void __init mmu_partition_table_init(void)
* 64 K size.
*/
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
- mtspr(SPRN_PTCR, ptcr);
+ set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
}
+static void flush_partition(unsigned int lpid, bool radix)
+{
+ if (radix) {
+ radix__flush_all_lpid(lpid);
+ radix__flush_all_lpid_guest(lpid);
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ /* do we need fixup here ?*/
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
+}
+
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1)
+ unsigned long dw1, bool flush)
{
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+ /*
+ * When ultravisor is enabled, the partition table is stored in secure
+ * memory and can only be accessed doing an ultravisor call. However, we
+ * maintain a copy of the partition table in normal memory to allow Nest
+ * MMU translations to occur (for normal VMs).
+ *
+ * Therefore, here we always update partition_tb, regardless of whether
+ * we are running under an ultravisor or not.
+ */
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/*
- * Global flush of TLBs and partition table caches for this lpid.
- * The type of flush (hash or radix) depends on what the previous
- * use of this partition ID was, not the new use.
+ * If ultravisor is enabled, we do an ultravisor call to register the
+ * partition table entry (PATE), which also do a global flush of TLBs
+ * and partition table caches for the lpid. Otherwise, just do the
+ * flush. The type of flush (hash or radix) depends on what the previous
+ * use of the partition ID was, not the new use.
*/
- asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR) {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
- } else {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+ uv_register_pate(lpid, dw0, dw1);
+ pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+ dw0, dw1);
+ } else if (flush) {
+ /*
+ * Boot does not need to flush, because MMU is off and each
+ * CPU does a tlbiel_all() before switching them on, which
+ * flushes everything.
+ */
+ flush_partition(lpid, (old & PATB_HR));
}
- /* do we need fixup here ?*/
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
@@ -446,3 +470,49 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
}
+
+/*
+ * Does the CPU support tlbie?
+ */
+bool tlbie_capable __read_mostly = true;
+EXPORT_SYMBOL(tlbie_capable);
+
+/*
+ * Should tlbie be used for management of CPU TLBs, for kernel and process
+ * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
+ * guest address spaces.
+ */
+bool tlbie_enabled __read_mostly = true;
+
+static int __init setup_disable_tlbie(char *str)
+{
+ if (!radix_enabled()) {
+ pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
+ return 1;
+ }
+
+ tlbie_capable = false;
+ tlbie_enabled = false;
+
+ return 1;
+}
+__setup("disable_tlbie", setup_disable_tlbie);
+
+static int __init pgtable_debugfs_setup(void)
+{
+ if (!tlbie_capable)
+ return 0;
+
+ /*
+ * There is no locking vs tlb flushing when changing this value.
+ * The tlb flushers will see one value or another, and use either
+ * tlbie or tlbiel with IPIs. In both cases the TLBs will be
+ * invalidated as expected.
+ */
+ debugfs_create_bool("tlbie_enabled", 0600,
+ powerpc_debugfs_root,
+ &tlbie_enabled);
+
+ return 0;
+}
+arch_initcall(pgtable_debugfs_setup);
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 273ae66a9a45..3a1fbf9cb8f8 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "radix-mmu: " fmt
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
@@ -26,25 +27,13 @@
#include <asm/sections.h>
#include <asm/trace.h>
#include <asm/uaccess.h>
+#include <asm/ultravisor.h>
#include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
-static int native_register_process_table(unsigned long base, unsigned long pg_sz,
- unsigned long table_size)
-{
- unsigned long patb0, patb1;
-
- patb0 = be64_to_cpu(partition_tb[0].patb0);
- patb1 = base | table_size | PATB_GR;
-
- mmu_partition_table_set_entry(0, patb0, patb1);
-
- return 0;
-}
-
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
{
@@ -198,14 +187,14 @@ void radix__change_memory_range(unsigned long start, unsigned long end,
pudp = pud_alloc(&init_mm, pgdp, idx);
if (!pudp)
continue;
- if (pud_huge(*pudp)) {
+ if (pud_is_leaf(*pudp)) {
ptep = (pte_t *)pudp;
goto update_the_pte;
}
pmdp = pmd_alloc(&init_mm, pudp, idx);
if (!pmdp)
continue;
- if (pmd_huge(*pmdp)) {
+ if (pmd_is_leaf(*pmdp)) {
ptep = pmdp_ptep(pmdp);
goto update_the_pte;
}
@@ -319,7 +308,7 @@ static int __meminit create_physical_mapping(unsigned long start,
return 0;
}
-void __init radix_init_pgtable(void)
+static void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -379,18 +368,6 @@ void __init radix_init_pgtable(void)
*/
rts_field = radix__get_tree_size();
process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
- /*
- * Fill in the partition table. We are suppose to use effective address
- * of process table here. But our linear mapping also enable us to use
- * physical address here.
- */
- register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
- pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
/*
* The init_mm context is given the first available (non-zero) PID,
@@ -411,20 +388,15 @@ void __init radix_init_pgtable(void)
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field, dw0;
+ unsigned long rts_field, dw0, dw1;
mmu_partition_table_init();
rts_field = radix__get_tree_size();
dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
- mmu_partition_table_set_entry(0, dw0, 0);
+ dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
+ mmu_partition_table_set_entry(0, dw0, dw1, false);
pr_info("Initializing Radix MMU\n");
- pr_info("Partition table %p\n", partition_tb);
-}
-
-void __init radix_init_native(void)
-{
- register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -515,14 +487,6 @@ void __init radix__early_init_devtree(void)
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
found:
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- if (mmu_psize_defs[MMU_PAGE_2M].shift) {
- /*
- * map vmemmap using 2M if available
- */
- mmu_vmemmap_psize = MMU_PAGE_2M;
- }
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
return;
}
@@ -587,7 +551,13 @@ void __init radix__early_init_mmu(void)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/* vmemmap mapping */
- mmu_vmemmap_psize = mmu_virtual_psize;
+ if (mmu_psize_defs[MMU_PAGE_2M].shift) {
+ /*
+ * map vmemmap using 2M if available
+ */
+ mmu_vmemmap_psize = MMU_PAGE_2M;
+ } else
+ mmu_vmemmap_psize = mmu_virtual_psize;
#endif
/*
* initialize page table size
@@ -622,8 +592,9 @@ void __init radix__early_init_mmu(void)
__pmd_frag_nr = RADIX_PMD_FRAG_NR;
__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
+ radix_init_pgtable();
+
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- radix_init_native();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -634,11 +605,9 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -651,14 +620,14 @@ void radix__early_init_mmu_secondary(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
+
radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
@@ -668,7 +637,7 @@ void radix__mmu_cleanup_all(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
- mtspr(SPRN_PTCR, 0);
+ set_ptcr_when_no_uv(0);
powernv_set_nmmu_ptcr(0);
radix__flush_tlb_all();
}
@@ -738,8 +707,8 @@ static int __meminit stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start, -1);
- create_physical_mapping(params->end, params->aligned_end, -1);
+ create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1);
+ create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -832,7 +801,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
if (!pmd_present(*pmd))
continue;
- if (pmd_huge(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@@ -857,7 +826,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
if (!pud_present(*pud))
continue;
- if (pud_huge(*pud)) {
+ if (pud_is_leaf(*pud)) {
split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@@ -883,7 +852,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
if (!pgd_present(*pgd))
continue;
- if (pgd_huge(*pgd)) {
+ if (pgd_is_leaf(*pgd)) {
split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}
@@ -903,7 +872,7 @@ int __meminit radix__create_section_mapping(unsigned long start, unsigned long e
return -1;
}
- return create_physical_mapping(start, end, nid);
+ return create_physical_mapping(__pa(start), __pa(end), nid);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -1118,3 +1087,108 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
+
+int __init arch_ioremap_pud_supported(void)
+{
+ /* HPT does not cope with large pages in the vmalloc area */
+ return radix_enabled();
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+ return radix_enabled();
+}
+
+int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
+{
+ return 0;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pud;
+ pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+ return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ if (pud_huge(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+ pmd_t *pmd;
+ int i;
+
+ pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pud_clear(pud);
+
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd[i])) {
+ pte_t *pte;
+ pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+ pte_free_kernel(&init_mm, pte);
+ }
+ }
+
+ pmd_free(&init_mm, pmd);
+
+ return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pmd;
+ pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+ return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ if (pmd_huge(*pmd)) {
+ pmd_clear(pmd);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+ pte_t *pte;
+
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pmd_clear(pmd);
+
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+ pte_free_kernel(&init_mm, pte);
+
+ return 1;
+}
+
+int __init arch_ioremap_p4d_supported(void)
+{
+ return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index bb9835681315..631be42abd33 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -25,7 +25,7 @@
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -51,11 +51,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
* and partition table entries. Then flush the remaining sets of the
* TLB.
*/
- tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
- for (set = 1; set < num_sets; set++)
- tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
- /* Do the same for process scoped entries. */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* MSR[HV] should flush partition scope translations first. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+ }
+
+ /* Flush process scoped entries. */
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
for (set = 1; set < num_sets; set++)
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
@@ -83,7 +87,7 @@ void radix__tlbiel_all(unsigned int action)
else
WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
static __always_inline void __tlbiel_pid(unsigned long pid, int set,
@@ -116,22 +120,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbiel_lpid(unsigned long lpid, int set,
- unsigned long ric)
-{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
- prs = 0; /* partition scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
-}
-
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -146,25 +134,22 @@ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
- unsigned long ric)
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
+ rs = lpid;
prs = 1; /* process scoped */
r = 1; /* radix format */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -179,8 +164,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void __tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -195,8 +180,8 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -235,7 +220,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
@@ -258,7 +243,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
}
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
@@ -285,32 +270,37 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
+struct tlbiel_pid {
+ unsigned long pid;
+ unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
{
- int set;
+ struct tlbiel_pid *t = info;
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+ else
+ _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
- asm volatile("ptesync": : :"memory");
+static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
+ unsigned long pid, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_pid t = { .pid = pid, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Always want the CPU translations to be invalidated with tlbiel in
+ * these paths, so while coprocessors must use tlbie, we can not
+ * optimise away the tlbiel component.
*/
- __tlbiel_lpid(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
- }
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
}
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
@@ -337,35 +327,28 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
- int set;
-
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
-
- asm volatile("ptesync": : :"memory");
-
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
*/
- __tlbiel_lpid_guest(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
@@ -377,8 +360,8 @@ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -409,8 +392,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -420,7 +403,54 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+struct tlbiel_va {
+ unsigned long pid;
+ unsigned long va;
+ unsigned long psize;
+ unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+ struct tlbiel_va *t = info;
+
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+ else
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_multicast(struct mm_struct *mm,
+ unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
+}
+
+struct tlbiel_va_range {
+ unsigned long pid;
+ unsigned long start;
+ unsigned long end;
+ unsigned long page_size;
+ unsigned long psize;
+ bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+ struct tlbiel_va_range *t = info;
+
+ _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+ t->psize, t->also_pwc);
+}
+
+static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -443,6 +473,21 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va_range t = { .start = start, .end = end,
+ .pid = pid, .page_size = page_size,
+ .psize = psize, .also_pwc = also_pwc };
+
+ on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+}
+
/*
* Base TLB flushing operations:
*
@@ -580,10 +625,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -609,7 +658,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- _tlbie_pid(pid, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_ALL);
@@ -644,7 +696,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie())
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ else
+ _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
} else {
local:
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -666,9 +721,35 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
#define radix__flush_all_mm radix__local_flush_all_mm
#endif /* CONFIG_SMP */
+static void do_tlbiel_kernel(void *info)
+{
+ _tlbiel_pid(0, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_kernel_broadcast(void)
+{
+ on_each_cpu(do_tlbiel_kernel, NULL, 1);
+ if (tlbie_capable) {
+ /*
+ * Coherent accelerators don't refcount kernel memory mappings,
+ * so have to always issue a tlbie for them. This is quite a
+ * slow path anyway.
+ */
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ }
+}
+
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- _tlbie_pid(0, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ else
+ _tlbiel_kernel_broadcast();
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
@@ -724,10 +805,14 @@ is_local:
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
}
} else {
bool hflush = flush_all_sizes;
@@ -752,8 +837,8 @@ is_local:
gflush = false;
}
- asm volatile("ptesync": : :"memory");
if (local) {
+ asm volatile("ptesync": : :"memory");
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
@@ -762,7 +847,8 @@ is_local:
__tlbiel_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
- } else {
+ } else if (cputlb_use_tlbie()) {
+ asm volatile("ptesync": : :"memory");
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
@@ -772,6 +858,15 @@ is_local:
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ } else {
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, mmu_virtual_psize, false);
+ if (hflush)
+ _tlbiel_va_range_multicast(mm,
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+ if (gflush)
+ _tlbiel_va_range_multicast(mm,
+ gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
}
}
preempt_enable();
@@ -830,32 +925,19 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
-void radix__flush_tlb_lpid(unsigned int lpid)
+void radix__flush_all_lpid(unsigned int lpid)
{
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
-
-/*
- * Flush partition scoped translations from LPID (=LPIDR)
- */
-void radix__local_flush_tlb_lpid(unsigned int lpid)
-{
- _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
-}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
+EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
/*
- * Flush process scoped translations from LPID (=LPIDR).
- * Important difference, the guest normally manages its own translations,
- * but some cases e.g., vCPU CPU migration require KVM to flush.
+ * Flush process scoped translations from LPID (=LPIDR)
*/
-void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+void radix__flush_all_lpid_guest(unsigned int lpid)
{
- _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
+ _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
-
static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
@@ -961,16 +1043,26 @@ is_local:
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- also_pwc = true;
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ also_pwc = true;
+
+ _tlbie_pid(pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ }
- _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
}
} else {
if (local)
_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
- else
+ else if (cputlb_use_tlbie())
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, psize, also_pwc);
}
preempt_enable();
}
@@ -1012,7 +1104,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ if (cputlb_use_tlbie())
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ else
+ _tlbiel_va_range_multicast(mm,
+ addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
} else {
local:
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 9ba07e55c489..2ef24a53f4c9 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
#include <linux/syscalls.h>
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
return 0;
}
+static const struct mm_walk_ops subpage_walk_ops = {
+ .pmd_entry = subpage_walk_pmd_entry,
+};
+
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
- struct mm_walk subpage_proto_walk = {
- .mm = mm,
- .pmd_entry = subpage_walk_pmd_entry,
- };
/*
* We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
if (vma->vm_start >= (addr + len))
break;
vma->vm_flags |= VM_NOHUGEPAGE;
- walk_page_vma(vma, &subpage_proto_walk);
+ walk_page_vma(vma, &subpage_walk_ops, NULL);
vma = vma->vm_next;
}
}
diff --git a/arch/powerpc/mm/book3s64/vphn.c b/arch/powerpc/mm/book3s64/vphn.c
deleted file mode 100644
index 0ee7734afb50..000000000000
--- a/arch/powerpc/mm/book3s64/vphn.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/byteorder.h>
-#include "vphn.h"
-
-/*
- * The associativity domain numbers are returned from the hypervisor as a
- * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
- * special value of "all ones" (aka. 0xffff) and its size may not exceed 48
- * bytes.
- *
- * --- 16-bit fields -->
- * _________________________
- * | 0 | 1 | 2 | 3 | be_packed[0]
- * ------+-----+-----+------
- * _________________________
- * | 4 | 5 | 6 | 7 | be_packed[1]
- * -------------------------
- * ...
- * _________________________
- * | 20 | 21 | 22 | 23 | be_packed[5]
- * -------------------------
- *
- * Convert to the sequence they would appear in the ibm,associativity property.
- */
-int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
-{
- __be64 be_packed[VPHN_REGISTER_COUNT];
- int i, nr_assoc_doms = 0;
- const __be16 *field = (const __be16 *) be_packed;
- u16 last = 0;
- bool is_32bit = false;
-
-#define VPHN_FIELD_UNUSED (0xffff)
-#define VPHN_FIELD_MSB (0x8000)
-#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
-
- /* Let's fix the values returned by plpar_hcall9() */
- for (i = 0; i < VPHN_REGISTER_COUNT; i++)
- be_packed[i] = cpu_to_be64(packed[i]);
-
- for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
- u16 new = be16_to_cpup(field++);
-
- if (is_32bit) {
- /*
- * Let's concatenate the 16 bits of this field to the
- * 15 lower bits of the previous field
- */
- unpacked[++nr_assoc_doms] =
- cpu_to_be32(last << 16 | new);
- is_32bit = false;
- } else if (new == VPHN_FIELD_UNUSED)
- /* This is the list terminator */
- break;
- else if (new & VPHN_FIELD_MSB) {
- /* Data is in the lower 15 bits of this field */
- unpacked[++nr_assoc_doms] =
- cpu_to_be32(new & VPHN_FIELD_MASK);
- } else {
- /*
- * Data is in the lower 15 bits of this field
- * concatenated with the next 16 bit field
- */
- last = new;
- is_32bit = true;
- }
- }
-
- /* The first cell contains the length of the property */
- unpacked[0] = cpu_to_be32(nr_assoc_doms);
-
- return nr_assoc_doms;
-}
diff --git a/arch/powerpc/mm/book3s64/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
deleted file mode 100644
index f0b93c2dd578..000000000000
--- a/arch/powerpc/mm/book3s64/vphn.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ARCH_POWERPC_MM_VPHN_H_
-#define _ARCH_POWERPC_MM_VPHN_H_
-
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
-#define VPHN_REGISTER_COUNT 6
-
-/*
- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
- * form the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
-
-extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
-
-#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..2a82984356f8 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -4,310 +4,18 @@
* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
*
* Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core. The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
*/
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
-#include <linux/export.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include <mm/mmu_decl.h>
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE (IOREMAP_TOP)
-#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct ppc_vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
-};
-
-static struct ppc_vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct ppc_vm_region *
-ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct ppc_vm_region *c, *new;
-
- new = kmalloc(sizeof(struct ppc_vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
-{
- struct ppc_vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- struct page *page;
- struct ppc_vm_region *c;
- unsigned long order;
- u64 mask = ISA_DMA_THRESHOLD, limit;
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- if ((~mask) & ISA_DMA_THRESHOLD) {
- dev_warn(dev, "coherent DMA mask %#llx is smaller "
- "than system GFP_DMA mask %#llx\n",
- mask, (unsigned long long)ISA_DMA_THRESHOLD);
- goto no_page;
- }
- }
-
-
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
- size, mask);
- return NULL;
- }
-
- order = get_order(size);
-
- /* Might be useful if we ever have a real legacy DMA zone... */
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- flush_dcache_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = ppc_vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- unsigned long vaddr = c->vm_start;
- struct page *end = page + (1 << order);
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
- do {
- SetPageReserved(page);
- map_kernel_page(vaddr, page_to_phys(page),
- pgprot_noncached(PAGE_KERNEL));
- page++;
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- return NULL;
-}
-
-/*
- * free a page as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- struct ppc_vm_region *c;
- unsigned long flags, addr;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- addr = c->vm_start;
- do {
- pte_t *ptep;
- unsigned long pfn;
-
- ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
- addr),
- addr),
- addr);
- if (!pte_none(*ptep) && pte_present(*ptep)) {
- pfn = pte_pfn(*ptep);
- pte_clear(&init_mm, addr, ptep);
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- __free_reserved_page(page);
- }
- }
- addr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, vaddr);
- dump_stack();
-}
-
/*
* make an area consistent.
*/
@@ -408,23 +116,9 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
__dma_sync_page(paddr, size, dir);
}
-/*
- * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
- */
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
- dma_addr_t dma_addr)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- /* This should always be populated, so we don't test every
- * level. If that fails, we'll have a nice crash which
- * will be as good as a BUG_ON()
- */
- unsigned long cpu_addr = (unsigned long)vaddr;
- pgd_t *pgd = pgd_offset_k(cpu_addr);
- pud_t *pud = pud_offset(pgd, cpu_addr);
- pmd_t *pmd = pmd_offset(pud, cpu_addr);
- pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+ unsigned long kaddr = (unsigned long)page_address(page);
- if (pte_none(*ptep) || !pte_present(*ptep))
- return 0;
- return pte_pfn(*ptep);
+ flush_dcache_range(kaddr, kaddr + size);
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec6b7ad70659..8432c281de92 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,26 +42,6 @@
#include <asm/debug.h>
#include <asm/kup.h>
-static inline bool notify_page_fault(struct pt_regs *regs)
-{
- bool ret = false;
-
-#ifdef CONFIG_KPROBES
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = true;
- preempt_enable();
- }
-#endif /* CONFIG_KPROBES */
-
- if (unlikely(debugger_fault_handler(regs)))
- ret = true;
-
- return ret;
-}
-
/*
* Check whether the instruction inst is a store using
* an update addressing form which will update r1.
@@ -178,13 +158,12 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb,
- current);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return 0;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return 0;
}
@@ -462,8 +441,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
bool must_retry = false;
+ bool kprobe_fault = kprobe_page_fault(regs, 11);
- if (notify_page_fault(regs))
+ if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;
if (unlikely(page_fault_is_bad(error_code))) {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b5d92dc32844..73d4873fc7f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -61,12 +61,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
num_hugepd = 1;
}
+ if (!cachep) {
+ WARN_ONCE(1, "No page table cache created for hugetlb tables");
+ return -ENOMEM;
+ }
+
new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
- if (! new)
+ if (!new)
return -ENOMEM;
/*
@@ -130,6 +135,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift == PUD_SHIFT)
return (pte_t *)pu;
else if (pshift > PMD_SHIFT) {
@@ -138,6 +145,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
if (pshift == PMD_SHIFT)
/* 16MB hugepage */
return (pte_t *)pm;
@@ -154,12 +163,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift >= PUD_SHIFT) {
ptl = pud_lockptr(mm, pu);
hpdp = (hugepd_t *)pu;
} else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
ptl = pmd_lockptr(mm, pm);
hpdp = (hugepd_t *)pm;
}
@@ -511,13 +524,6 @@ retry:
return page;
}
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
- unsigned long sz)
-{
- unsigned long __boundary = (addr + sz) & ~(sz-1);
- return (__boundary - 1 < end - 1) ? __boundary : end;
-}
-
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -588,6 +594,7 @@ __setup("hugepagesz=", hugepage_setup_sz);
static int __init hugetlbpage_init(void)
{
+ bool configured = false;
int psize;
if (hugetlb_disabled) {
@@ -638,10 +645,15 @@ static int __init hugetlbpage_init(void)
pgtable_cache_add(pdshift - shift);
else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(PTE_T_ORDER);
+
+ configured = true;
}
- if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
- hugetlbpage_init_default();
+ if (configured) {
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_default();
+ } else
+ pr_info("Failed to initialize. Disabling HugeTLB");
return 0;
}
@@ -655,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
BUG_ON(!PageCompound(page));
- for (i = 0; i < (1UL << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i));
} else {
@@ -665,68 +677,3 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
}
-
-static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long pte_end;
- struct page *head, *page;
- pte_t pte;
- int refs;
-
- pte_end = (addr + sz) & ~(sz-1);
- if (pte_end < end)
- end = pte_end;
-
- pte = READ_ONCE(*ptep);
-
- if (!pte_access_permitted(pte, write))
- return 0;
-
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
-
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- /* Could be optimized better */
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a4e17a979e45..a44f6281ca3a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -194,8 +194,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
* fail due to alignment issues when using 16MB hugepages, so
* fall back to system memory if the altmap allocation fail.
*/
- if (altmap)
+ if (altmap) {
p = altmap_alloc_block_buf(page_size, altmap);
+ if (!p)
+ pr_debug("altmap block allocation failed, falling back to system memory");
+ }
if (!p)
p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
new file mode 100644
index 000000000000..fc669643ce6a
--- /dev/null
+++ b/arch/powerpc/mm/ioremap.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/io-workarounds.h>
+
+unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+ pte_t pte = __pte(flags);
+ void *caller = __builtin_return_address(0);
+
+ /* writeable implies dirty for kernel addresses */
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
+
+ /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+ pte = pte_exprotect(pte);
+ pte = pte_mkprivileged(pte);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, pte_pgprot(pte), caller);
+ return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long i;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ int err = map_kernel_page(ea + i, pa + i, prot);
+
+ if (WARN_ON_ONCE(err)) /* Should clean up */
+ return err;
+ }
+
+ return 0;
+}
+
+void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ struct vm_struct *area;
+ int ret;
+ unsigned long va;
+
+ area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller);
+ if (area == NULL)
+ return NULL;
+
+ area->phys_addr = pa;
+ va = (unsigned long)area->addr;
+
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (!ret)
+ return (void __iomem *)area->addr + offset;
+
+ unmap_kernel_range(va, size);
+ free_vm_area(area);
+
+ return NULL;
+}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
new file mode 100644
index 000000000000..f36121f25243
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <mm/mmu_decl.h>
+
+void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
+{
+ unsigned long v;
+ phys_addr_t p, offset;
+ int err;
+
+ /*
+ * Choose an address to map it to.
+ * Once the vmalloc system is running, we use it.
+ * Before then, we use space going down from IOREMAP_TOP
+ * (ioremap_bot records where we're up to).
+ */
+ p = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - p;
+
+ /*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (p < 16 * 1024 * 1024)
+ p += _ISA_MEM_BASE;
+
+#ifndef CONFIG_CRASH_DUMP
+ /*
+ * Don't allow anybody to remap normal RAM that we're using.
+ * mem_init() sets high_memory so only do the check after that.
+ */
+ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
+ page_is_ram(__phys_to_pfn(p))) {
+ pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__,
+ (unsigned long long)p, __builtin_return_address(0));
+ return NULL;
+ }
+#endif
+
+ if (size == 0)
+ return NULL;
+
+ /*
+ * Is it already mapped? Perhaps overlapped by a previous
+ * mapping.
+ */
+ v = p_block_mapped(p);
+ if (v)
+ return (void __iomem *)v + offset;
+
+ if (slab_is_available())
+ return do_ioremap(p, offset, size, prot, caller);
+
+ /*
+ * Should check if it is a candidate for a BAT mapping
+ */
+
+ err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+ if (err)
+ return NULL;
+ ioremap_bot -= size;
+
+ return (void __iomem *)ioremap_bot + offset;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ /*
+ * If mapped by BATs then there is nothing to do.
+ * Calling vfree() generates a benign warning.
+ */
+ if (v_block_mapped((unsigned long)addr))
+ return;
+
+ if (addr > high_memory && (unsigned long)addr < ioremap_bot)
+ vunmap((void *)(PAGE_MASK & (unsigned long)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
new file mode 100644
index 000000000000..fd29e51700cd
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/**
+ * Low level function to establish the page tables for an IO mapping
+ */
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+{
+ int ret;
+ unsigned long va = (unsigned long)ea;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
+ WARN_ON(pa & ~PAGE_MASK);
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (ret)
+ unmap_kernel_range(va, size);
+ } else {
+ ret = early_ioremap_range(va, pa, size, prot);
+ }
+
+ if (ret)
+ return NULL;
+
+ return (void __iomem *)ea;
+}
+EXPORT_SYMBOL(__ioremap_at);
+
+/**
+ * Low level function to tear down the page tables for an IO mapping. This is
+ * used for mappings that are manipulated manually, like partial unmapping of
+ * PCI IOs or ISA space.
+ */
+void __iounmap_at(void *ea, unsigned long size)
+{
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ unmap_kernel_range((unsigned long)ea, size);
+}
+EXPORT_SYMBOL(__iounmap_at);
+
+void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ phys_addr_t paligned, offset;
+ void __iomem *ret;
+ int err;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ /*
+ * Choose an address to map it to. Once the vmalloc system is running,
+ * we use it. Before that, we map using addresses going up from
+ * ioremap_bot. vmalloc will use the addresses from IOREMAP_BASE
+ * through ioremap_bot.
+ */
+ paligned = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - paligned;
+
+ if (size == 0 || paligned == 0)
+ return NULL;
+
+ if (slab_is_available())
+ return do_ioremap(paligned, offset, size, prot, caller);
+
+ err = early_ioremap_range(ioremap_bot, paligned, size, prot);
+ if (err)
+ return NULL;
+
+ ret = (void __iomem *)ioremap_bot + offset;
+ ioremap_bot += size;
+
+ return ret;
+}
+
+/*
+ * Unmap an IO region and remove it from vmalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ */
+void iounmap(volatile void __iomem *token)
+{
+ void *addr;
+
+ if (!slab_is_available())
+ return;
+
+ addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK);
+
+ if ((unsigned long)addr < ioremap_bot) {
+ pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr);
+ return;
+ }
+ vunmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 0d62be3cba47..802387b231ad 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -5,6 +5,7 @@
#include <linux/kasan.h>
#include <linux/printk.h>
#include <linux/memblock.h>
+#include <linux/moduleloader.h>
#include <linux/sched/task.h>
#include <linux/vmalloc.h>
#include <asm/pgalloc.h>
@@ -21,7 +22,7 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
__set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
}
-static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
{
pmd_t *pmd;
unsigned long k_cur, k_next;
@@ -35,7 +36,10 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
continue;
- new = pte_alloc_one_kernel(&init_mm);
+ if (slab_is_available())
+ new = pte_alloc_one_kernel(&init_mm);
+ else
+ new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
if (!new)
return -ENOMEM;
@@ -43,7 +47,19 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
kasan_populate_pte(new, PAGE_READONLY);
else
kasan_populate_pte(new, PAGE_KERNEL_RO);
- pmd_populate_kernel(&init_mm, pmd, new);
+
+ smp_wmb(); /* See comment in __pte_alloc */
+
+ spin_lock(&init_mm.page_table_lock);
+ /* Has another populated it ? */
+ if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) {
+ pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+
+ if (new && slab_is_available())
+ pte_free_kernel(&init_mm, new);
}
return 0;
}
@@ -71,7 +87,7 @@ static int __ref kasan_init_region(void *start, size_t size)
if (!slab_is_available())
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
- for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
void *va = block ? block + k_cur - k_start : kasan_get_one_page();
pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
@@ -134,7 +150,11 @@ void __init kasan_init(void)
#ifdef CONFIG_MODULES
void *module_alloc(unsigned long size)
{
- void *base = vmalloc_exec(size);
+ void *base;
+
+ base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
if (!base)
return NULL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2540d3b2588c..be941d382c8d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -120,33 +120,24 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
void __ref arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page;
+ struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;
- /*
- * If we have an altmap then we need to skip over any reserved PFNs
- * when querying the zone.
- */
- page = pfn_to_page(start_pfn);
- if (altmap)
- page += vmem_altmap_offset(altmap);
-
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
@@ -159,7 +150,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
pr_warn("Hash collision while resizing HPT\n");
}
#endif
-#endif /* CONFIG_MEMORY_HOTPLUG */
#ifndef CONFIG_NEED_MULTIPLE_NODES
void __init mem_topology_setup(void)
@@ -249,7 +239,7 @@ void __init paging_init(void)
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
+ 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
@@ -312,12 +302,9 @@ void __init mem_init(void)
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
#endif /* CONFIG_HIGHMEM */
-#ifdef CONFIG_NOT_COHERENT_CACHE
- pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
- IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
-#endif /* CONFIG_NOT_COHERENT_CACHE */
- pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
- ioremap_bot, IOREMAP_TOP);
+ if (ioremap_bot != IOREMAP_TOP)
+ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
+ ioremap_bot, IOREMAP_TOP);
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
#endif /* CONFIG_PPC32 */
@@ -417,63 +404,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
EXPORT_SYMBOL(flush_icache_user_range);
/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- *
- * This must always be called with the pte lock held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
- pte_t *ptep)
-{
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * We don't need to worry about _PAGE_PRESENT here because we are
- * called with either mm->page_table_lock held or ptl lock held
- */
- unsigned long trap;
- bool is_exec;
-
- if (radix_enabled()) {
- prefetch((void *)address);
- return;
- }
-
- /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
- if (!pte_young(*ptep) || address >= TASK_SIZE)
- return;
-
- /* We try to figure out if we are coming from an instruction
- * access fault and pass that down to __hash_page so we avoid
- * double-faulting on execution of fresh text. We have to test
- * for regs NULL since init will get here first thing at boot
- *
- * We also avoid filling the hash if not coming from a fault
- */
-
- trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
- switch (trap) {
- case 0x300:
- is_exec = false;
- break;
- case 0x400:
- is_exec = true;
- break;
- default:
- return;
- }
-
- hash_preload(vma->vm_mm, address, is_exec, trap);
-#endif /* CONFIG_PPC_BOOK3S */
-#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
- && defined(CONFIG_HUGETLB_PAGE)
- if (is_vm_hugetlb_page(vma))
- book3e_hugetlb_preload(vma, address, *ptep);
-#endif
-}
-
-/*
* System memory should not be in /proc/iomem but various tools expect it
* (eg kdump).
*/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 32c1a191c28a..c750ac9ec713 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -82,10 +82,6 @@ static inline void print_system_hash_info(void) {}
#else /* CONFIG_PPC_MMU_NOHASH */
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap);
-
-
extern void _tlbie(unsigned long address);
extern void _tlbia(void);
@@ -95,6 +91,8 @@ void print_system_hash_info(void);
#ifdef CONFIG_PPC32
+void hash_preload(struct mm_struct *mm, unsigned long ea);
+
extern void mapin_ram(void);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
@@ -108,7 +106,6 @@ extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
-extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern phys_addr_t __initial_memory_limit_addr;
extern phys_addr_t total_memory;
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index 61915f4d3c7f..8b88be91b622 100644
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -122,8 +122,8 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
return found;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte)
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
@@ -183,6 +183,18 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ if (is_vm_hugetlb_page(vma))
+ book3e_hugetlb_preload(vma, address, *ptep);
+}
+
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
struct hstate *hstate = hstate_file(vma->vm_file);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index d4acf6fa0596..696f568253a0 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -630,7 +630,6 @@ static void early_init_this_mmu(void)
#ifdef CONFIG_PPC_FSL_BOOK3E
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
- int __maybe_unused cpu = smp_processor_id();
bool map = true;
/* use a quarter of the TLBCAM for bolted linear map */
@@ -704,6 +703,8 @@ static void __init early_init_mmu_global(void)
* for use by the TLB miss code
*/
linear_map_top = memblock_end_of_DRAM();
+
+ ioremap_bot = IOREMAP_BASE;
}
static void __init early_mmu_set_memory_limit(void)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 917904d2fe97..50d68d21ddcc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -163,6 +163,22 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ int dist = 0;
+
+ int i, index;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ index = be32_to_cpu(distance_ref_points[i]);
+ if (cpu1_assoc[index] == cpu2_assoc[index])
+ break;
+ dist++;
+ }
+
+ return dist;
+}
+
/* must hold reference to node during call */
static const __be32 *of_get_associativity(struct device_node *dev)
{
@@ -212,7 +228,7 @@ static int associativity_to_nid(const __be32 *associativity)
{
int nid = NUMA_NO_NODE;
- if (min_common_depth == -1)
+ if (!numa_enabled)
goto out;
if (of_read_number(associativity, 1) >= min_common_depth)
@@ -416,17 +432,19 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
- int default_nid = 0;
+ int default_nid = NUMA_NO_NODE;
int nid = default_nid;
int rc, index;
+ if ((min_common_depth < 0) || !numa_enabled)
+ return default_nid;
+
rc = of_get_assoc_arrays(&aa);
if (rc)
return default_nid;
- if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
- !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
- lmb->aa_index < aa.n_arrays) {
+ if (min_common_depth <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
@@ -626,8 +644,14 @@ static int __init parse_numa_properties(void)
min_common_depth = find_min_common_depth();
- if (min_common_depth < 0)
+ if (min_common_depth < 0) {
+ /*
+ * if we fail to parse min_common_depth from device tree
+ * mark the numa disabled, boot with numa disabled.
+ */
+ numa_enabled = false;
return min_common_depth;
+ }
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
@@ -743,7 +767,7 @@ void __init dump_numa_cpu_topology(void)
unsigned int node;
unsigned int cpu, count;
- if (min_common_depth == -1 || !numa_enabled)
+ if (!numa_enabled)
return;
for_each_online_node(node) {
@@ -808,7 +832,7 @@ static void __init find_possible_nodes(void)
struct device_node *rtas;
u32 numnodes, i;
- if (min_common_depth <= 0)
+ if (!numa_enabled)
return;
rtas = of_find_node_by_path("/rtas");
@@ -1010,7 +1034,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
struct device_node *memory = NULL;
int nid;
- if (!numa_enabled || (min_common_depth < 0))
+ if (!numa_enabled)
return first_online_node;
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -1063,9 +1087,6 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-
-#include "book3s64/vphn.h"
-
struct topology_update_data {
struct topology_update_data *next;
unsigned int cpu;
@@ -1161,25 +1182,13 @@ static int update_cpu_associativity_changes_mask(void)
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
-{
- long rc;
- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
- u64 flags = 1;
- int hwcpu = get_hard_smp_processor_id(cpu);
-
- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
- vphn_unpack_associativity(retbuf, associativity);
-
- return rc;
-}
-
static long vphn_get_associativity(unsigned long cpu,
__be32 *associativity)
{
long rc;
- rc = hcall_vphn(cpu, associativity);
+ rc = hcall_vphn(get_hard_smp_processor_id(cpu),
+ VPHN_FLAG_VCPU, associativity);
switch (rc) {
case H_FUNCTION:
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index a7b05214760c..ee4bd6d38602 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
if (!page)
return NULL;
- if (!pgtable_page_ctor(page)) {
+ if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return NULL;
}
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index fc10c0c24f51..e3759b69f81b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -336,10 +336,11 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
if (pgd_none(pgd))
return NULL;
- if (pgd_huge(pgd)) {
+ if (pgd_is_leaf(pgd)) {
ret_pte = (pte_t *)pgdp;
goto out;
}
+
if (is_hugepd(__hugepd(pgd_val(pgd)))) {
hpdp = (hugepd_t *)&pgd;
goto out_huge;
@@ -357,14 +358,16 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
if (pud_none(pud))
return NULL;
- if (pud_huge(pud)) {
+ if (pud_is_leaf(pud)) {
ret_pte = (pte_t *)pudp;
goto out;
}
+
if (is_hugepd(__hugepd(pud_val(pud)))) {
hpdp = (hugepd_t *)&pud;
goto out_huge;
}
+
pdshift = PMD_SHIFT;
pmdp = pmd_offset(&pud, ea);
pmd = READ_ONCE(*pmdp);
@@ -393,15 +396,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
ret_pte = (pte_t *)pmdp;
goto out;
}
- /*
- * pmd_large check below will handle the swap pmd pte
- * we need to do both the check because they are config
- * dependent.
- */
- if (pmd_huge(pmd) || pmd_large(pmd)) {
+
+ if (pmd_is_leaf(pmd)) {
ret_pte = (pte_t *)pmdp;
goto out;
}
+
if (is_hugepd(__hugepd(pmd_val(pmd)))) {
hpdp = (hugepd_t *)&pmd;
goto out_huge;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d53188dee18f..8ec5dfb65b2e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -27,166 +27,13 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
-#include <asm/io.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <mm/mmu_decl.h>
-unsigned long ioremap_bot;
-EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
-
extern char etext[], _stext[], _sinittext[], _einittext[];
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *
-ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *
-ioremap_wt(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
-void __iomem *
-ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_coherent);
-
-void __iomem *
-ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- pte_t pte = __pte(flags);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- pte = pte_mkprivileged(pte);
-
- return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_prot);
-
-void __iomem *
-__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
-{
- unsigned long v, i;
- phys_addr_t p;
- int err;
-
- /*
- * Choose an address to map it to.
- * Once the vmalloc system is running, we use it.
- * Before then, we use space going down from IOREMAP_TOP
- * (ioremap_bot records where we're up to).
- */
- p = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - p;
-
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16*1024*1024)
- p += _ISA_MEM_BASE;
-
-#ifndef CONFIG_CRASH_DUMP
- /*
- * Don't allow anybody to remap normal RAM that we're using.
- * mem_init() sets high_memory so only do the check after that.
- */
- if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
- page_is_ram(__phys_to_pfn(p))) {
- printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
- (unsigned long long)p, __builtin_return_address(0));
- return NULL;
- }
-#endif
-
- if (size == 0)
- return NULL;
-
- /*
- * Is it already mapped? Perhaps overlapped by a previous
- * mapping.
- */
- v = p_block_mapped(p);
- if (v)
- goto out;
-
- if (slab_is_available()) {
- struct vm_struct *area;
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (area == 0)
- return NULL;
- area->phys_addr = p;
- v = (unsigned long) area->addr;
- } else {
- v = (ioremap_bot -= size);
- }
-
- /*
- * Should check if it is a candidate for a BAT mapping
- */
-
- err = 0;
- for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_kernel_page(v + i, p + i, prot);
- if (err) {
- if (slab_is_available())
- vunmap((void *)v);
- return NULL;
- }
-
-out:
- return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *addr)
-{
- /*
- * If mapped by BATs then there is nothing to do.
- * Calling vfree() generates a benign warning.
- */
- if (v_block_mapped((unsigned long)addr))
- return;
-
- if (addr > high_memory && (unsigned long) addr < ioremap_bot)
- vunmap((void *) (PAGE_MASK & (unsigned long)addr));
-}
-EXPORT_SYMBOL(iounmap);
-
static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr = memblock_alloc(size, size);
@@ -252,7 +99,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
#ifdef CONFIG_PPC_BOOK3S_32
if (ktext)
- hash_preload(&init_mm, v, false, 0x300);
+ hash_preload(&init_mm, v);
#endif
v += PAGE_SIZE;
p += PAGE_SIZE;
@@ -360,7 +207,7 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
- if (v_block_mapped((unsigned long)_stext) + 1)
+ if (v_block_mapped((unsigned long)_stext + 1))
mmu_mark_initmem_nx();
else
change_page_attr(page, numpages, PAGE_KERNEL);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 12d5e083942d..e78832dce7bb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This file contains ioremap and related functions for 64-bit machines.
+ * This file contains pgtable related functions for 64-bit machines.
*
* Derived from arch/ppc64/mm/init.c
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -34,7 +34,6 @@
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -98,209 +97,26 @@ unsigned long __pte_frag_nr;
EXPORT_SYMBOL(__pte_frag_nr);
unsigned long __pte_frag_size_shift;
EXPORT_SYMBOL(__pte_frag_size_shift);
-unsigned long ioremap_bot;
-#else /* !CONFIG_PPC_BOOK3S_64 */
-unsigned long ioremap_bot = IOREMAP_BASE;
#endif
-/**
- * __ioremap_at - Low level function to establish the page tables
- * for an IO mapping
- */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
-{
- unsigned long i;
-
- /* We don't support the 4K PFN hack with ioremap */
- if (pgprot_val(prot) & H_PAGE_4K_PFN)
- return NULL;
-
- if ((ea + size) >= (void *)IOREMAP_END) {
- pr_warn("Outside the supported range\n");
- return NULL;
- }
-
- WARN_ON(pa & ~PAGE_MASK);
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- for (i = 0; i < size; i += PAGE_SIZE)
- if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
- return NULL;
-
- return (void __iomem *)ea;
-}
-
-/**
- * __iounmap_from - Low level function to tear down the page tables
- * for an IO mapping. This is used for mappings that
- * are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
- */
-void __iounmap_at(void *ea, unsigned long size)
-{
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- unmap_kernel_range((unsigned long)ea, size);
-}
-
-void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
-{
- phys_addr_t paligned;
- void __iomem *ret;
-
- /*
- * Choose an address to map it to.
- * Once the imalloc system is running, we use it.
- * Before that, we map using addresses going
- * up from ioremap_bot. imalloc will use
- * the addresses from ioremap_bot through
- * IMALLOC_END
- *
- */
- paligned = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - paligned;
-
- if ((size == 0) || (paligned == 0))
- return NULL;
-
- if (slab_is_available()) {
- struct vm_struct *area;
-
- area = __get_vm_area_caller(size, VM_IOREMAP,
- ioremap_bot, IOREMAP_END,
- caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = paligned;
- ret = __ioremap_at(paligned, area->addr, size, prot);
- if (!ret)
- vunmap(area->addr);
- } else {
- ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
- if (ret)
- ioremap_bot += size;
- }
-
- if (ret)
- ret += addr & ~PAGE_MASK;
- return ret;
-}
-
-void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem * ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- pte_t pte = __pte(flags);
- void *caller = __builtin_return_address(0);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- /*
- * Force kernel mapping.
- */
- pte = pte_mkprivileged(pte);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
- return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
-}
-
-
-/*
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- */
-void __iounmap(volatile void __iomem *token)
-{
- void *addr;
-
- if (!slab_is_available())
- return;
-
- addr = (void *) ((unsigned long __force)
- PCI_FIX_ADDR(token) & PAGE_MASK);
- if ((unsigned long)addr < ioremap_bot) {
- printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
- " at 0x%p\n", addr);
- return;
- }
- vunmap(addr);
-}
-
-void iounmap(volatile void __iomem *token)
-{
- if (ppc_md.iounmap)
- ppc_md.iounmap(token);
- else
- __iounmap(token);
-}
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_wc);
-EXPORT_SYMBOL(ioremap_prot);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(__ioremap_at);
-EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(__iounmap);
-EXPORT_SYMBOL(__iounmap_at);
-
#ifndef __PAGETABLE_PUD_FOLDED
/* 4 level page table */
struct page *pgd_page(pgd_t pgd)
{
- if (pgd_huge(pgd))
+ if (pgd_is_leaf(pgd)) {
+ VM_WARN_ON(!pgd_huge(pgd));
return pte_page(pgd_pte(pgd));
+ }
return virt_to_page(pgd_page_vaddr(pgd));
}
#endif
struct page *pud_page(pud_t pud)
{
- if (pud_huge(pud))
+ if (pud_is_leaf(pud)) {
+ VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
+ }
return virt_to_page(pud_page_vaddr(pud));
}
@@ -310,8 +126,10 @@ struct page *pud_page(pud_t pud)
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+ if (pmd_is_leaf(pmd)) {
+ VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
return pte_page(pmd_pte(pmd));
+ }
return virt_to_page(pmd_page_vaddr(pmd));
}
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index a0d23e96e841..4154feac1da3 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -149,7 +149,7 @@ static int bats_show_603(struct seq_file *m, void *v)
static int bats_open(struct inode *inode, struct file *file)
{
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return single_open(file, bats_show_601, NULL);
return single_open(file, bats_show_603, NULL);
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 72f0e4a3d839..a07278027c6f 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
return -1;
}
-#ifdef CONFIG_PPC_PSERIES
static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
{
struct hash_pte ptes[4];
@@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *
}
return -1;
}
-#endif
static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
unsigned long *lp_bits)
@@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
u64 *r)
{
-#ifdef CONFIG_PPC_PSERIES
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
return pseries_find(ea, psize, primary, v, r);
-#endif
+
return native_find(ea, psize, primary, v, r);
}
@@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
psize = mmu_vmalloc_psize;
else
psize = mmu_io_psize;
-#ifdef CONFIG_PPC_64K_PAGES
+
/* check for secret 4K mappings */
- if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
- ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+ ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+ (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
psize = mmu_io_psize;
-#endif
+
/* check for hashpte */
status = hpte_find(st, addr, psize);
@@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st)
static void walk_vmemmap(struct pg_state *st)
{
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmemmap_backing *ptr = vmemmap_list;
+ if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
/*
* Traverse the vmemmaped memory and dump pages that are in the hash
* pagetable.
@@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st)
ptr = ptr->list;
}
seq_puts(st->seq, "---[ vmemmap end ]---\n");
-#endif
}
static void populate_markers(void)
@@ -495,11 +493,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_START;
-#else
- address_markers[9].start_address = VMEMMAP_BASE;
-#endif
}
static int ptdump_show(struct seq_file *m, void *v)
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 39bf1e2cba13..2f9ddc29c535 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -26,10 +26,6 @@
#include "ptdump.h"
-#ifdef CONFIG_PPC32
-#define KERN_VIRT_START PAGE_OFFSET
-#endif
-
/*
* To visualise what is happening,
*
@@ -88,10 +84,6 @@ static struct addr_marker address_markers[] = {
#else
{ 0, "Early I/O remap start" },
{ 0, "Early I/O remap end" },
-#ifdef CONFIG_NOT_COHERENT_CACHE
- { 0, "Consistent mem start" },
- { 0, "Consistent mem end" },
-#endif
#ifdef CONFIG_HIGHMEM
{ 0, "Highmem PTEs start" },
{ 0, "Highmem PTEs end" },
@@ -181,7 +173,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
- if (!st->check_wx)
+ if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
return;
if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
@@ -273,7 +265,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
addr = start + i * PMD_SIZE;
- if (!pmd_none(*pmd) && !pmd_huge(*pmd))
+ if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
/* pmd exists */
walk_pte(st, pmd, addr);
else
@@ -289,7 +281,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
addr = start + i * PUD_SIZE;
- if (!pud_none(*pud) && !pud_huge(*pud))
+ if (!pud_none(*pud) && !pud_is_leaf(*pud))
/* pud exists */
walk_pmd(st, pud, addr);
else
@@ -299,18 +291,16 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pagetables(struct pg_state *st)
{
- pgd_t *pgd = pgd_offset_k(0UL);
unsigned int i;
- unsigned long addr;
-
- addr = st->start_address;
+ unsigned long addr = st->start_address & PGDIR_MASK;
+ pgd_t *pgd = pgd_offset_k(addr);
/*
* Traverse the linux pagetable structure and dump pages that are in
* the hash pagetable.
*/
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
- if (!pgd_none(*pgd) && !pgd_huge(*pgd))
+ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
+ if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd))
/* pgd exists */
walk_pud(st, pgd, addr);
else
@@ -341,11 +331,6 @@ static void populate_markers(void)
#else /* !CONFIG_PPC64 */
address_markers[i++].start_address = ioremap_bot;
address_markers[i++].start_address = IOREMAP_TOP;
-#ifdef CONFIG_NOT_COHERENT_CACHE
- address_markers[i++].start_address = IOREMAP_TOP;
- address_markers[i++].start_address = IOREMAP_TOP +
- CONFIG_CONSISTENT_SIZE;
-#endif
#ifdef CONFIG_HIGHMEM
address_markers[i++].start_address = PKMAP_BASE;
address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
@@ -364,12 +349,13 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
/* Traverse kernel page tables */
walk_pagetables(&st);
@@ -407,12 +393,13 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
walk_pagetables(&st);