aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/powerpc/mm/Makefile3
-rw-r--r--arch/powerpc/mm/book3s32/Makefile1
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c5
-rw-r--r--arch/powerpc/mm/book3s32/kuep.c20
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c36
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c15
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c11
-rw-r--r--arch/powerpc/mm/book3s64/Makefile28
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c162
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c151
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c16
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c252
-rw-r--r--arch/powerpc/mm/book3s64/hugetlbpage.c164
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c68
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c34
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c59
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c3
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c65
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c114
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c16
-rw-r--r--arch/powerpc/mm/book3s64/slb.c21
-rw-r--r--arch/powerpc/mm/book3s64/slice.c (renamed from arch/powerpc/mm/slice.c)51
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c13
-rw-r--r--arch/powerpc/mm/book3s64/trace.c7
-rw-r--r--arch/powerpc/mm/cacheflush.c2
-rw-r--r--arch/powerpc/mm/copro_fault.c7
-rw-r--r--arch/powerpc/mm/drmem.c2
-rw-r--r--arch/powerpc/mm/fault.c71
-rw-r--r--arch/powerpc/mm/hugetlbpage.c42
-rw-r--r--arch/powerpc/mm/init-common.c21
-rw-r--r--arch/powerpc/mm/init_32.c55
-rw-r--r--arch/powerpc/mm/init_64.c70
-rw-r--r--arch/powerpc/mm/ioremap.c20
-rw-r--r--arch/powerpc/mm/kasan/Makefile4
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c58
-rw-r--r--arch/powerpc/mm/kasan/init_32.c191
-rw-r--r--arch/powerpc/mm/kasan/init_book3e_64.c133
-rw-r--r--arch/powerpc/mm/kasan/init_book3s_64.c104
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c192
-rw-r--r--arch/powerpc/mm/maccess.c17
-rw-r--r--arch/powerpc/mm/mem.c53
-rw-r--r--arch/powerpc/mm/mmap.c228
-rw-r--r--arch/powerpc/mm/mmu_context.c11
-rw-r--r--arch/powerpc/mm/mmu_decl.h33
-rw-r--r--arch/powerpc/mm/nohash/40x.c10
-rw-r--r--arch/powerpc/mm/nohash/44x.c20
-rw-r--r--arch/powerpc/mm/nohash/8xx.c46
-rw-r--r--arch/powerpc/mm/nohash/Makefile8
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c204
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c21
-rw-r--r--arch/powerpc/mm/nohash/e500.c375
-rw-r--r--arch/powerpc/mm/nohash/e500_hugetlbpage.c190
-rw-r--r--arch/powerpc/mm/nohash/fsl_book3e.c379
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c17
-rw-r--r--arch/powerpc/mm/nohash/kup.c33
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c15
-rw-r--r--arch/powerpc/mm/nohash/tlb.c99
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S10
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S183
-rw-r--r--arch/powerpc/mm/numa.c52
-rw-r--r--arch/powerpc/mm/pageattr.c72
-rw-r--r--arch/powerpc/mm/pgtable-frag.c2
-rw-r--r--arch/powerpc/mm/pgtable.c44
-rw-r--r--arch/powerpc/mm/pgtable_32.c32
-rw-r--r--arch/powerpc/mm/pgtable_64.c15
-rw-r--r--arch/powerpc/mm/ptdump/Makefile4
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c5
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c11
-rw-r--r--arch/powerpc/mm/ptdump/shared.c6
70 files changed, 2194 insertions, 2290 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index df8172da2301..503a6e249940 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \
+obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o \
@@ -14,7 +14,6 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 15f4773643d2..50dd8f6bdf46 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -9,5 +9,4 @@ endif
obj-y += mmu.o mmu_context.o
obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
-obj-$(CONFIG_PPC_KUEP) += kuep.o
obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
index 0f920f09af57..28676cabb005 100644
--- a/arch/powerpc/mm/book3s32/kuap.c
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -20,8 +20,11 @@ EXPORT_SYMBOL(kuap_unlock_all_ool);
void setup_kuap(bool disabled)
{
- if (!disabled)
+ if (!disabled) {
kuap_lock_all_ool();
+ init_mm.context.sr0 |= SR_KS;
+ current->thread.sr0 |= SR_KS;
+ }
if (smp_processor_id() != boot_cpuid)
return;
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
deleted file mode 100644
index c20733d6e02c..000000000000
--- a/arch/powerpc/mm/book3s32/kuep.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <asm/kup.h>
-#include <asm/smp.h>
-
-struct static_key_false disable_kuep_key;
-
-void setup_kuep(bool disabled)
-{
- if (!disabled)
- kuep_lock();
-
- if (smp_processor_id() != boot_cpuid)
- return;
-
- if (disabled)
- static_branch_enable(&disable_kuep_key);
- else
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 27061583a010..850783cfa9c7 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -23,7 +23,6 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
-#include <asm/prom.h>
#include <asm/mmu.h>
#include <asm/machdep.h>
#include <asm/code-patching.h>
@@ -76,7 +75,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static int find_free_bat(void)
+int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -100,7 +99,7 @@ static int find_free_bat(void)
* - block size has to be a power of two. This is calculated by finding the
* highest bit set to 1.
*/
-static unsigned int block_size(unsigned long base, unsigned long top)
+unsigned int bat_block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
@@ -145,7 +144,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
int idx;
while ((idx = find_free_bat()) != -1 && base != top) {
- unsigned int size = block_size(base, top);
+ unsigned int size = bat_block_size(base, top);
if (size < 128 << 10)
break;
@@ -159,10 +158,13 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long done;
- unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+ unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET;
+ unsigned long size;
+ size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET);
+ setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X);
- if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) {
+ if (debug_pagealloc_enabled_or_kfence()) {
pr_debug_once("Read-Write memory mapped without BATs\n");
if (base >= border)
return base;
@@ -196,18 +198,17 @@ void mmu_mark_initmem_nx(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
- unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
+ unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K);
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
unsigned long size;
- for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
- size = block_size(base, top);
+ for (i = 0; i < nb - 1 && base < top;) {
+ size = bat_block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
- size = block_size(base, top);
- size = max(size, 128UL << 10);
+ size = bat_block_size(base, top);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)
@@ -239,7 +240,7 @@ void mmu_mark_rodata_ro(void)
for (i = 0; i < nb; i++) {
struct ppc_bat *bat = BATS[i];
- if (bat_addrs[i].start < (unsigned long)__init_begin)
+ if (bat_addrs[i].start < (unsigned long)__end_rodata)
bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
}
@@ -247,10 +248,9 @@ void mmu_mark_rodata_ro(void)
}
/*
- * Set up one of the I/D BAT (block address translation) register pairs.
+ * Set up one of the D BAT (block address translation) register pairs.
* The parameters are not checked; in particular size must be a power
* of 2 between 128k and 256M.
- * On 603+, only set IBAT when _PAGE_EXEC is set
*/
void __init setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot)
@@ -286,10 +286,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
/* G bit must be zero in IBATs */
flags &= ~_PAGE_EXEC;
}
- if (flags & _PAGE_EXEC)
- bat[0] = bat[1];
- else
- bat[0].batu = bat[0].batl = 0;
bat_addrs[index].start = virt;
bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
@@ -318,11 +314,9 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea)
*
* This must always be called with the pte lock held.
*/
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep)
{
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
- return;
/*
* We don't need to worry about _PAGE_PRESENT here because we are
* called with either mm->page_table_lock held or ptl lock held
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index e2708e387dc3..269a3eb25a73 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -69,6 +69,12 @@ EXPORT_SYMBOL_GPL(__init_new_context);
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
mm->context.id = __init_new_context();
+ mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
+
+ if (!kuep_is_disabled())
+ mm->context.sr0 |= SR_NX;
+ if (!kuap_is_disabled())
+ mm->context.sr0 |= SR_KS;
return 0;
}
@@ -108,20 +114,13 @@ void __init mmu_context_init(void)
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
{
long id = next->context.id;
- unsigned long val;
if (id < 0)
panic("mm_struct %p has no context ID", next);
isync();
- val = CTX_TO_VSID(id, 0);
- if (!kuep_is_disabled())
- val |= SR_NX;
- if (!kuap_is_disabled())
- val |= SR_KS;
-
- update_user_segments(val);
+ update_user_segments(next->context.sr0);
if (IS_ENABLED(CONFIG_BDI_SWITCH))
abatron_pteptrs[1] = next->pgd;
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
index 19f0ef950d77..9ad6b56bfec9 100644
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -81,14 +81,15 @@ EXPORT_SYMBOL(hash__flush_range);
void hash__flush_tlb_mm(struct mm_struct *mm)
{
struct vm_area_struct *mp;
+ VMA_ITERATOR(vmi, mm, 0);
/*
- * It is safe to go down the mm's list of vmas when called
- * from dup_mmap, holding mmap_lock. It would also be safe from
- * unmap_region or exit_mmap, but not from vmtruncate on SMP -
- * but it seems dup_mmap is the only SMP case which gets here.
+ * It is safe to iterate the vmas when called from dup_mmap,
+ * holding mmap_lock. It would also be safe from unmap_region
+ * or exit_mmap, but not from vmtruncate on SMP - but it seems
+ * dup_mmap is the only SMP case which gets here.
*/
- for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+ for_each_vma(vmi, mp)
hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
}
EXPORT_SYMBOL(hash__flush_tlb_mm);
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 1b56d3af47d4..cad2abc1730f 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -2,22 +2,34 @@
ccflags-y := $(NO_MINIMAL_TOC)
+obj-y += mmu_context.o pgtable.o trace.o
+ifdef CONFIG_PPC_64S_HASH_MMU
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-
-obj-y += hash_pgtable.o hash_utils.o slb.o \
- mmu_context.o pgtable.o hash_tlb.o
-obj-$(CONFIG_PPC_NATIVE) += hash_native.o
-obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
+obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o slice.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
obj-$(CONFIG_PPC_PKEY) += pkeys.o
# Instrumenting the SLB fault path can lead to duplicate SLB entries
KCOV_INSTRUMENT_slb.o := n
+
+# Parts of these can run in real mode and therefore are
+# not safe with the current outline KASAN implementation
+KASAN_SANITIZE_mmu_context.o := n
+KASAN_SANITIZE_pgtable.o := n
+KASAN_SANITIZE_radix_pgtable.o := n
+KASAN_SANITIZE_radix_tlb.o := n
+KASAN_SANITIZE_slb.o := n
+KASAN_SANITIZE_pkeys.o := n
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
deleted file mode 100644
index a688e1324ae5..000000000000
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
- *
- * Copyright (C) 2003 David Gibson, IBM Corporation.
- *
- * Based on the IA-32 version:
- * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
- */
-
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <asm/cacheflush.h>
-#include <asm/machdep.h>
-
-unsigned int hpage_shift;
-EXPORT_SYMBOL(hpage_shift);
-
-int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
- pte_t *ptep, unsigned long trap, unsigned long flags,
- int ssize, unsigned int shift, unsigned int mmu_psize)
-{
- real_pte_t rpte;
- unsigned long vpn;
- unsigned long old_pte, new_pte;
- unsigned long rflags, pa;
- long slot, offset;
-
- BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
-
- /* Search the Linux page table for a match with va */
- vpn = hpt_vpn(ea, vsid, ssize);
-
- /*
- * At this point, we have a pte (old_pte) which can be used to build
- * or update an HPTE. There are 2 cases:
- *
- * 1. There is a valid (present) pte with no associated HPTE (this is
- * the most common case)
- * 2. There is a valid (present) pte with an associated HPTE. The
- * current values of the pp bits in the HPTE prevent access
- * because we are doing software DIRTY bit management and the
- * page is currently not DIRTY.
- */
-
-
- do {
- old_pte = pte_val(*ptep);
- /* If PTE busy, retry the access */
- if (unlikely(old_pte & H_PAGE_BUSY))
- return 0;
- /* If PTE permissions don't match, take page fault */
- if (unlikely(!check_pte_access(access, old_pte)))
- return 1;
-
- /*
- * Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access
- */
- new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_WRITE)
- new_pte |= _PAGE_DIRTY;
- } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
-
- /* Make sure this is a hugetlb entry */
- if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
- return 0;
-
- rflags = htab_convert_pte_flags(new_pte, flags);
- if (unlikely(mmu_psize == MMU_PAGE_16G))
- offset = PTRS_PER_PUD;
- else
- offset = PTRS_PER_PMD;
- rpte = __real_pte(__pte(old_pte), ptep, offset);
-
- if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- /*
- * No CPU has hugepages but lacks no execute, so we
- * don't need to worry about that case
- */
- rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
-
- /* Check if pte already has an hpte (case 2) */
- if (unlikely(old_pte & H_PAGE_HASHPTE)) {
- /* There MIGHT be an HPTE for this pte */
- unsigned long gslot;
-
- gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
- if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, flags) == -1)
- old_pte &= ~_PAGE_HPTEFLAGS;
- }
-
- if (likely(!(old_pte & H_PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, shift, ssize);
-
- pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
-
- /* clear HPTE slot informations in new PTE */
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-
- slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
- mmu_psize, ssize);
-
- /*
- * Hypervisor failure. Restore old pte and return -1
- * similar to __hash_page_*
- */
- if (unlikely(slot == -2)) {
- *ptep = __pte(old_pte);
- hash_failure_debug(ea, access, vsid, trap, ssize,
- mmu_psize, mmu_psize, old_pte);
- return -1;
- }
-
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
- }
-
- /*
- * No need to use ldarx/stdcx here
- */
- *ptep = __pte(new_pte & ~H_PAGE_BUSY);
- return 0;
-}
-
-pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- unsigned long pte_val;
- /*
- * Clear the _PAGE_PRESENT so that no hardware parallel update is
- * possible. Also keep the pte_present true so that we don't take
- * wrong fault.
- */
- pte_val = pte_update(vma->vm_mm, addr, ptep,
- _PAGE_PRESENT, _PAGE_INVALID, 1);
-
- return __pte(pte_val);
-}
-
-void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
- pte_t *ptep, pte_t old_pte, pte_t pte)
-{
-
- if (radix_enabled())
- return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
- old_pte, pte);
- set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
-}
-
-void hugetlbpage_init_default(void)
-{
- /* Set default large page size. Currently, we pick 16M or 1M
- * depending on what is available
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
- else if (mmu_psize_defs[MMU_PAGE_2M].shift)
- hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
-}
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index d8279bfe68ea..9342e79870df 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,109 +43,28 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
-{
- unsigned long rb;
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
-
- asm volatile("tlbiel %0" : : "r" (rb));
-}
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map hpte_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
-/*
- * tlbiel instruction for hash, set invalidation
- * i.e., r=1 and is=01 or is=10 or is=11
- */
-static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
- unsigned int pid,
- unsigned int ric, unsigned int prs)
+static void acquire_hpte_lock(void)
{
- unsigned long rb;
- unsigned long rs;
- unsigned int r = 0; /* hash format */
-
- rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
- rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
- : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
- : "memory");
+ lock_map_acquire(&hpte_lock_map);
}
-
-static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+static void release_hpte_lock(void)
{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa206(set, is);
-
- ppc_after_tlbiel_barrier();
+ lock_map_release(&hpte_lock_map);
}
-
-static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+#else
+static void acquire_hpte_lock(void)
{
- unsigned int set;
-
- asm volatile("ptesync": : :"memory");
-
- /*
- * Flush the partition table cache if this is HV mode.
- */
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-
- /*
- * Now invalidate the process table cache. UPRT=0 HPT modes (what
- * current hardware implements) do not use the process table, but
- * add the flushes anyway.
- *
- * From ISA v3.0B p. 1078:
- * The following forms are invalid.
- * * PRS=1, R=0, and RIC!=2 (The only process-scoped
- * HPT caching is of the Process Table.)
- */
- tlbiel_hash_set_isa300(0, is, 0, 2, 1);
-
- /*
- * Then flush the sets of the TLB proper. Hash mode uses
- * partition scoped TLB translations, which may be flushed
- * in !HV mode.
- */
- for (set = 0; set < num_sets; set++)
- tlbiel_hash_set_isa300(set, is, 0, 0, 0);
-
- ppc_after_tlbiel_barrier();
-
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
-void hash__tlbiel_all(unsigned int action)
+static void release_hpte_lock(void)
{
- unsigned int is;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- is = 3;
- break;
- case TLB_INVAL_SCOPE_LPID:
- is = 2;
- break;
- default:
- BUG();
- }
-
- if (early_cpu_has_feature(CPU_FTR_ARCH_300))
- tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
- tlbiel_all_isa206(POWER8_TLB_SETS, is);
- else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
- tlbiel_all_isa206(POWER7_TLB_SETS, is);
- else
- WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
}
+#endif
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
@@ -267,7 +186,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
va |= ssize << 8;
sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
- asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1)
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1)
: : "r" (va), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
@@ -286,7 +205,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
*/
va |= (vpn & 0xfe);
va |= 1; /* L */
- asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1)
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1)
: : "r" (va), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
@@ -324,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
@@ -338,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@@ -347,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
+ unsigned long flags;
int i;
+ local_irq_save(flags);
+
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@@ -367,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
- if (i == HPTES_PER_GROUP)
+ if (i == HPTES_PER_GROUP) {
+ local_irq_restore(flags);
return -1;
+ }
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@@ -390,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
+ release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
+ local_irq_restore(flags);
+
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
@@ -431,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
return i;
@@ -443,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
+ unsigned long irqflags;
+
+ local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
@@ -486,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
+ local_irq_restore(irqflags);
+
return ret;
}
@@ -549,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -567,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
}
/*
@@ -580,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -597,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
+
return 0;
}
@@ -621,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
- if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
- else
+ } else
native_unlock_hpte(hptep);
}
/*
@@ -684,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
- /*
- * Invalidate the hpte. NOTE: this also unlocks it
- */
-
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
@@ -869,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- else
+ else {
+ release_hpte_lock();
hptep->v = 0;
+ }
} pte_iterate_hashed_end();
}
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index ad5eff097d31..51f48984abca 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -13,10 +13,10 @@
#include <asm/sections.h>
#include <asm/mmu.h>
#include <asm/tlb.h>
+#include <asm/firmware.h>
#include <mm/mmu_decl.h>
-#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
@@ -256,7 +256,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
* the __collapse_huge_page_copy can result in copying
* the old content.
*/
- flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+ flush_hash_table_pmd_range(vma->vm_mm, &pmd, address);
return pmd;
}
@@ -378,7 +378,7 @@ int hash__has_transparent_hugepage(void)
if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
return 0;
/*
- * We need to make sure that we support 16MB hugepage in a segement
+ * We need to make sure that we support 16MB hugepage in a segment
* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
* of 64K.
*/
@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
- unsigned int step, nr_cpus, master_cpu;
+ unsigned int step, nr_cpus;
+ atomic_t master_cpu;
atomic_t cpu_counter;
};
@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
- if (parms->master_cpu != smp_processor_id())
+ // First CPU goes through, all others wait.
+ if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
- chmem_parms.master_cpu = smp_processor_id();
+ atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();
@@ -541,7 +543,7 @@ void hash__mark_rodata_ro(void)
unsigned long start, end, pp;
start = (unsigned long)_stext;
- end = (unsigned long)__init_begin;
+ end = (unsigned long)__end_rodata;
pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index eb0bccaf221e..a64ea0a7ef96 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -221,7 +221,7 @@ void __flush_hash_table_range(unsigned long start, unsigned long end)
local_irq_restore(flags);
}
-void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
pte_t *start_pte;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index cfd45245d009..6df4c6d38b66 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,9 @@
#include <linux/cpu.h>
#include <linux/pgtable.h>
#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/elf-randomize.h>
+#include <linux/of_fdt.h>
#include <asm/interrupt.h>
#include <asm/processor.h>
@@ -46,7 +49,6 @@
#include <asm/types.h>
#include <linux/uaccess.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/io.h>
#include <asm/eeh.h>
#include <asm/tlb.h>
@@ -99,8 +101,6 @@
*/
static unsigned long _SDR1;
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-EXPORT_SYMBOL_GPL(mmu_psize_defs);
u8 hpte_page_sizes[1 << LP_BITS];
EXPORT_SYMBOL_GPL(hpte_page_sizes);
@@ -114,9 +114,6 @@ EXPORT_SYMBOL_GPL(mmu_linear_psize);
int mmu_virtual_psize = MMU_PAGE_4K;
int mmu_vmalloc_psize = MMU_PAGE_4K;
EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int mmu_vmemmap_psize = MMU_PAGE_4K;
-#endif
int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
@@ -126,11 +123,8 @@ EXPORT_SYMBOL_GPL(mmu_slb_size);
#ifdef CONFIG_PPC_64K_PAGES
int mmu_ci_restrictions;
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
static u8 *linear_map_hash_slots;
static unsigned long linear_map_hash_count;
-static DEFINE_SPINLOCK(linear_map_hash_lock);
-#endif /* CONFIG_DEBUG_PAGEALLOC */
struct mmu_hash_ops mmu_hash_ops;
EXPORT_SYMBOL(mmu_hash_ops);
@@ -175,6 +169,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the partition table cache if this is HV mode.
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+ /*
+ * Now invalidate the process table cache. UPRT=0 HPT modes (what
+ * current hardware implements) do not use the process table, but
+ * add the flushes anyway.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ /*
+ * Then flush the sets of the TLB proper. Hash mode uses
+ * partition scoped TLB translations, which may be flushed
+ * in !HV mode.
+ */
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ ppc_after_tlbiel_barrier();
+
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
/*
* 'R' and 'C' update notes:
* - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
@@ -307,7 +405,7 @@ repeat:
ssize);
if (ret == -1) {
/*
- * Try to to keep bolted entries in primary.
+ * Try to keep bolted entries in primary.
* Remove non bolted entries and try insert again
*/
ret = mmu_hash_ops.hpte_remove(hpteg);
@@ -326,11 +424,9 @@ repeat:
break;
cond_resched();
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled() &&
+ if (debug_pagealloc_enabled_or_kfence() &&
(paddr >> PAGE_SHIFT) < linear_map_hash_count)
linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
}
return ret < 0 ? ret : 0;
}
@@ -563,7 +659,7 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
}
#endif /* CONFIG_HUGETLB_PAGE */
-static void mmu_psize_set_default_penc(void)
+static void __init mmu_psize_set_default_penc(void)
{
int bpsize, apsize;
for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
@@ -573,7 +669,7 @@ static void mmu_psize_set_default_penc(void)
#ifdef CONFIG_PPC_64K_PAGES
-static bool might_have_hea(void)
+static bool __init might_have_hea(void)
{
/*
* The HEA ethernet adapter requires awareness of the
@@ -644,7 +740,7 @@ static void __init htab_scan_page_sizes(void)
* low-order N bits as the encoding for the 2^(12+N) byte page size
* (if it exists).
*/
-static void init_hpte_page_sizes(void)
+static void __init init_hpte_page_sizes(void)
{
long int ap, bp;
long int shift, penc;
@@ -677,7 +773,7 @@ static void __init htab_init_page_sizes(void)
bool aligned = true;
init_hpte_page_sizes();
- if (!debug_pagealloc_enabled()) {
+ if (!debug_pagealloc_enabled_or_kfence()) {
/*
* Pick a size for the linear mapping. Currently, we only
* support 16M, 1M and 4K which is the default
@@ -965,8 +1061,7 @@ static void __init htab_initialize(void)
prot = pgprot_val(PAGE_KERNEL);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled()) {
+ if (debug_pagealloc_enabled_or_kfence()) {
linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
linear_map_hash_slots = memblock_alloc_try_nid(
linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
@@ -975,7 +1070,6 @@ static void __init htab_initialize(void)
panic("%s: Failed to allocate %lu bytes max_addr=%pa\n",
__func__, linear_map_hash_count, &ppc64_rma_size);
}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
/* create bolted the linear mapping in the hash table */
for_each_mem_range(i, &base, &end) {
@@ -1091,7 +1185,7 @@ void __init hash__early_init_mmu(void)
ps3_early_mm_init();
else if (firmware_has_feature(FW_FEATURE_LPAR))
hpte_init_pseries();
- else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
hpte_init_native();
if (!mmu_hash_ops.hpte_insert)
@@ -1165,7 +1259,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
return pp;
}
-#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
unsigned char *psizes;
@@ -1182,12 +1275,6 @@ static unsigned int get_paca_psize(unsigned long addr)
return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}
-#else
-unsigned int get_paca_psize(unsigned long addr)
-{
- return get_paca()->mm_ctx_user_psize;
-}
-#endif
/*
* Demote a segment to using 4k pages.
@@ -1244,7 +1331,7 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
spp >>= 30 - 2 * ((ea >> 12) & 0xf);
/*
- * 0 -> full premission
+ * 0 -> full permission
* 1 -> Read only
* 2 -> no access.
* We return the flag that need to be cleared.
@@ -1522,8 +1609,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
-DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
+DEFINE_INTERRUPT_HANDLER(do_hash_fault)
{
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
@@ -1566,7 +1652,7 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
if (unlikely(err < 0)) {
- // failed to instert a hash PTE due to an hypervisor error
+ // failed to insert a hash PTE due to an hypervisor error
if (user_mode(regs)) {
if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
_exception(SIGSEGV, regs, SEGV_ACCERR, ea);
@@ -1582,36 +1668,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
}
}
-/*
- * The _RAW interrupt entry checks for the in_nmi() case before
- * running the full handler.
- */
-DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
-{
- /*
- * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
- * don't call hash_page, just fail the fault. This is required to
- * prevent re-entrancy problems in the hash code, namely perf
- * interrupts hitting while something holds H_PAGE_BUSY, and taking a
- * hash fault. See the comment in hash_preload().
- *
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled. We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
- if (unlikely(in_nmi())) {
- do_bad_page_fault_segv(regs);
- return 0;
- }
-
- __do_hash_fault(regs);
-
- return 0;
-}
-
-#ifdef CONFIG_PPC_MM_SLICES
static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
{
int psize = get_slice_psize(mm, ea);
@@ -1628,12 +1684,6 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
return true;
}
-#else
-static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
-{
- return true;
-}
-#endif
static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
bool is_exec, unsigned long trap)
@@ -1677,26 +1727,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
#endif /* CONFIG_PPC_64K_PAGES */
/*
- * __hash_page_* must run with interrupts off, as it sets the
- * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
- * time and may take a hash fault reading the user stack, see
- * read_user_stack_slow() in the powerpc/perf code.
- *
- * If that takes a hash fault on the same page as we lock here, it
- * will bail out when seeing H_PAGE_BUSY set, and retry the access
- * leading to an infinite loop.
+ * __hash_page_* must run with interrupts off, including PMI interrupts
+ * off, as it sets the H_PAGE_BUSY bit.
*
- * Disabling interrupts here does not prevent perf interrupts, but it
- * will prevent them taking hash faults (see the NMI test in
- * do_hash_page), then read_user_stack's copy_from_user_nofault will
- * fail and perf will fall back to read_user_stack_slow(), which
- * walks the Linux page tables.
+ * It's otherwise possible for perf interrupts to hit at any time and
+ * may take a hash fault reading the user stack, which could take a
+ * hash miss and deadlock on the same H_PAGE_BUSY bit.
*
* Interrupts must also be off for the duration of the
* mm_is_thread_local test and update, to prevent preempt running the
* mm on another CPU (XXX: this may be racy vs kthread_use_mm).
*/
- local_irq_save(flags);
+ powerpc_local_irq_pmu_save(flags);
/* Is that local to this CPU ? */
if (mm_is_thread_local(mm))
@@ -1721,7 +1763,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
mm_ctx_user_psize(&mm->context),
pte_val(*ptep));
- local_irq_restore(flags);
+ powerpc_local_irq_pmu_restore(flags);
}
/*
@@ -1732,7 +1774,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
*
* This must always be called with the pte lock held.
*/
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep)
{
/*
@@ -1742,9 +1784,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
unsigned long trap;
bool is_exec;
- if (radix_enabled())
- return;
-
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(*ptep) || address >= TASK_SIZE)
return;
@@ -1941,7 +1980,9 @@ repeat:
return slot;
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
+
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
unsigned long hash;
@@ -1956,15 +1997,18 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
if (!vsid)
return;
+ if (linear_map_hash_slots[lmi] & 0x80)
+ return;
+
ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
HPTE_V_BOLTED,
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
- spin_lock(&linear_map_hash_lock);
+ raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
@@ -1974,11 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
- spin_lock(&linear_map_hash_lock);
- BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
+ raw_spin_lock(&linear_map_hash_lock);
+ if (!(linear_map_hash_slots[lmi] & 0x80)) {
+ raw_spin_unlock(&linear_map_hash_lock);
+ return;
+ }
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -2006,7 +2053,7 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable)
}
local_irq_restore(flags);
}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */
void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
@@ -2086,3 +2133,20 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ /*
+ * If we are using 1TB segments and we are allowed to randomise
+ * the heap, we can put it above 1TB so it is backed by a 1TB
+ * segment. Otherwise the heap will be in the bottom 1TB
+ * which always uses 256MB segments and this may result in a
+ * performance penalty.
+ */
+ if (is_32bit_task())
+ return randomize_page(mm->brk, SZ_32M);
+ else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+ return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), SZ_1G);
+ else
+ return randomize_page(mm->brk, SZ_1G);
+}
diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
new file mode 100644
index 000000000000..3bc0eb21b2a0
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+
+unsigned int hpage_shift;
+EXPORT_SYMBOL(hpage_shift);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int shift, unsigned int mmu_psize)
+{
+ real_pte_t rpte;
+ unsigned long vpn;
+ unsigned long old_pte, new_pte;
+ unsigned long rflags, pa;
+ long slot, offset;
+
+ BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
+
+ /* Search the Linux page table for a match with va */
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ /*
+ * At this point, we have a pte (old_pte) which can be used to build
+ * or update an HPTE. There are 2 cases:
+ *
+ * 1. There is a valid (present) pte with no associated HPTE (this is
+ * the most common case)
+ * 2. There is a valid (present) pte with an associated HPTE. The
+ * current values of the pp bits in the HPTE prevent access
+ * because we are doing software DIRTY bit management and the
+ * page is currently not DIRTY.
+ */
+
+
+ do {
+ old_pte = pte_val(*ptep);
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & H_PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pte)))
+ return 1;
+
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
+ new_pte |= _PAGE_DIRTY;
+ } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /* Make sure this is a hugetlb entry */
+ if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
+ return 0;
+
+ rflags = htab_convert_pte_flags(new_pte, flags);
+ if (unlikely(mmu_psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
+ rpte = __real_pte(__pte(old_pte), ptep, offset);
+
+ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+ /* Check if pte already has an hpte (case 2) */
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ /* There MIGHT be an HPTE for this pte */
+ unsigned long gslot;
+
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
+ mmu_psize, ssize, flags) == -1)
+ old_pte &= ~_PAGE_HPTEFLAGS;
+ }
+
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(vpn, shift, ssize);
+
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+
+ slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
+ mmu_psize, ssize);
+
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ mmu_psize, mmu_psize, old_pte);
+ return -1;
+ }
+
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
+ }
+
+ /*
+ * No need to use ldarx/stdcx here
+ */
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+ return 0;
+}
+#endif
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long pte_val;
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep,
+ _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+ return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+
+ if (radix_enabled())
+ return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+ old_pte, pte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+
+void __init hugetlbpage_init_defaultsize(void)
+{
+ /* Set default large page size. Currently, we pick 16M or 1M
+ * depending on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
+}
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index cd18e94d0843..7fcfba162e0d 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -305,24 +305,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL(mm_iommu_lookup);
-struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
- unsigned long ua, unsigned long size)
-{
- struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
-
- list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
- next) {
- if ((mem->ua <= ua) &&
- (ua + size <= mem->ua +
- (mem->entries << PAGE_SHIFT))) {
- ret = mem;
- break;
- }
- }
-
- return ret;
-}
-
struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
@@ -369,56 +351,6 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
}
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
-long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
- unsigned long ua, unsigned int pageshift, unsigned long *hpa)
-{
- const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- unsigned long *pa;
-
- if (entry >= mem->entries)
- return -EFAULT;
-
- if (pageshift > mem->pageshift)
- return -EFAULT;
-
- if (!mem->hpas) {
- *hpa = mem->dev_hpa + (ua - mem->ua);
- return 0;
- }
-
- pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
- if (!pa)
- return -EFAULT;
-
- *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
-
- return 0;
-}
-
-extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
-{
- struct mm_iommu_table_group_mem_t *mem;
- long entry;
- void *va;
- unsigned long *pa;
-
- mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
- if (!mem)
- return;
-
- if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
- return;
-
- entry = (ua - mem->ua) >> PAGE_SHIFT;
- va = &mem->hpas[entry];
-
- pa = (void *) vmalloc_to_phys(va);
- if (!pa)
- return;
-
- *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
-}
-
bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
unsigned int pageshift, unsigned long *size)
{
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index c10fc8a72fb3..c766e4c26e42 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -31,7 +31,8 @@ static int alloc_context_id(int min_id, int max_id)
return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
}
-void hash__reserve_context_id(int id)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void __init hash__reserve_context_id(int id)
{
int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
@@ -50,7 +51,9 @@ int hash__alloc_context_id(void)
return alloc_context_id(MIN_USER_CONTEXT, max);
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
static int realloc_context_ids(mm_context_t *ctx)
{
int i, id;
@@ -150,6 +153,13 @@ void hash__setup_new_exec(void)
slb_setup_new_exec();
}
+#else
+static inline int hash__init_new_context(struct mm_struct *mm)
+{
+ BUILD_BUG();
+ return 0;
+}
+#endif
static int radix__init_new_context(struct mm_struct *mm)
{
@@ -175,7 +185,9 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
+#ifdef CONFIG_PPC_64S_HASH_MMU
mm->context.hash_context = NULL;
+#endif
return index;
}
@@ -213,14 +225,22 @@ EXPORT_SYMBOL_GPL(__destroy_context);
static void destroy_contexts(mm_context_t *ctx)
{
- int index, context_id;
+ if (radix_enabled()) {
+ ida_free(&mmu_context_ida, ctx->id);
+ } else {
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ int index, context_id;
- for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
- context_id = ctx->extended_id[index];
- if (context_id)
- ida_free(&mmu_context_ida, context_id);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_free(&mmu_context_ida, context_id);
+ }
+ kfree(ctx->hash_context);
+#else
+ BUILD_BUG(); // radix_enabled() should be constant true
+#endif
}
- kfree(ctx->hash_context);
}
static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..f6151a589298 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -6,6 +6,8 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/memblock.h>
+#include <linux/memremap.h>
+#include <linux/pkeys.h>
#include <linux/debugfs.h>
#include <misc/cxl-base.h>
@@ -22,6 +24,13 @@
#include "internal.h"
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
+
unsigned long __pmd_frag_nr;
EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
@@ -207,17 +216,12 @@ void __init mmu_partition_table_init(void)
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
unsigned long ptcr;
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
/* Initialize the Partition Table with no entries */
partition_tb = memblock_alloc(patb_size, patb_size);
if (!partition_tb)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, patb_size, patb_size);
- /*
- * update partition table control register,
- * 64 K size.
- */
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
@@ -328,7 +332,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
- * the allocated page with single fragement
+ * the allocated page with single fragment
* count.
*/
if (likely(!mm->context.pmd_frag)) {
@@ -526,3 +530,46 @@ static int __init pgtable_debugfs_setup(void)
return 0;
}
arch_initcall(pgtable_debugfs_setup);
+
+#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN)
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+ if (!radix_enabled()) {
+ unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+ return max(SUBSECTION_SIZE, 1UL << shift);
+ }
+
+ return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+ unsigned long prot;
+
+ /* Radix supports execute-only, but protection_map maps X -> RX */
+ if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) {
+ prot = pgprot_val(PAGE_EXECONLY);
+ } else {
+ prot = pgprot_val(protection_map[vm_flags &
+ (VM_ACCESS_FLAGS | VM_SHARED)]);
+ }
+
+ if (vm_flags & VM_SAO)
+ prot |= _PAGE_SAO;
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ prot |= vmflag_to_pte_pkey_bits(vm_flags);
+#endif
+
+ return __pgprot(prot);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index a2d9ad138709..1d2675ab6711 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -10,6 +10,7 @@
#include <asm/mmu.h>
#include <asm/setup.h>
#include <asm/smp.h>
+#include <asm/firmware.h>
#include <linux/pkeys.h>
#include <linux/of_fdt.h>
@@ -66,7 +67,7 @@ static int __init dt_scan_storage_keys(unsigned long node,
return 1;
}
-static int scan_pkey_feature(void)
+static int __init scan_pkey_feature(void)
{
int ret;
int pkeys_total = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 23d3e08911d3..5e3195568525 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -41,61 +41,6 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
-/*
- * A vairant of hugetlb_get_unmapped_area doing topdown search
- * FIXME!! should we do as x86 does or non hugetlb area does ?
- * ie, use topdown or not based on mmap_is_legacy check ?
- */
-unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- struct hstate *h = hstate_file(file);
- int fixed = (flags & MAP_FIXED);
- unsigned long high_limit;
- struct vm_unmapped_area_info info;
-
- high_limit = DEFAULT_MAP_WINDOW;
- if (addr >= high_limit || (fixed && (addr + len > high_limit)))
- high_limit = TASK_SIZE;
-
- if (len & ~huge_page_mask(h))
- return -EINVAL;
- if (len > high_limit)
- return -ENOMEM;
-
- if (fixed) {
- if (addr > high_limit - len)
- return -ENOMEM;
- if (prepare_hugepage_range(file, addr, len))
- return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (high_limit - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
- return addr;
- }
- /*
- * We are always doing an topdown search here. Slice code
- * does that too.
- */
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
-
- return vm_unmapped_area(&info);
-}
-
void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t pte)
@@ -103,11 +48,13 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
/*
- * To avoid NMMU hang while relaxing access we need to flush the tlb before
- * we set the new value.
+ * POWER9 NMMU must flush the TLB after clearing the PTE before
+ * installing a PTE with more relaxed access permissions, see
+ * radix__ptep_set_access_flags.
*/
- if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
- (atomic_read(&mm->context.copros) > 0))
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ atomic_read(&mm->context.copros) > 0)
radix__flush_hugetlb_page(vma, addr);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 3a600bd7fbc6..cac727b01799 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -30,10 +30,12 @@
#include <asm/trace.h>
#include <asm/uaccess.h>
#include <asm/ultravisor.h>
+#include <asm/set_memory.h>
#include <trace/events/thp.h>
-unsigned int mmu_pid_bits;
+#include <mm/mmu_decl.h>
+
unsigned int mmu_base_pid;
unsigned long radix_mem_block_size __ro_after_init;
@@ -229,7 +231,7 @@ void radix__mark_rodata_ro(void)
unsigned long start, end;
start = (unsigned long)_stext;
- end = (unsigned long)__init_begin;
+ end = (unsigned long)__end_rodata;
radix__change_memory_range(start, end, _PAGE_WRITE);
}
@@ -260,21 +262,24 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
- if (addr < __pa_symbol(__init_begin))
- return __pa_symbol(__init_begin);
+ if (addr < __pa_symbol(__srwx_boundary))
+ return __pa_symbol(__srwx_boundary);
#endif
return end;
}
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end,
- unsigned long max_mapping_size,
int nid, pgprot_t _prot)
{
unsigned long vaddr, addr, mapping_size = 0;
bool prev_exec, exec = false;
pgprot_t prot;
int psize;
+ unsigned long max_mapping_size = radix_mem_block_size;
+
+ if (debug_pagealloc_enabled_or_kfence())
+ max_mapping_size = PAGE_SIZE;
start = ALIGN(start, PAGE_SIZE);
end = ALIGN_DOWN(end, PAGE_SIZE);
@@ -335,7 +340,7 @@ static void __init radix_init_pgtable(void)
u64 i;
/* We don't support slb for radix */
- mmu_slb_size = 0;
+ slb_set_size(0);
/*
* Create the linear mapping
@@ -353,22 +358,16 @@ static void __init radix_init_pgtable(void)
}
WARN_ON(create_physical_mapping(start, end,
- radix_mem_block_size,
-1, PAGE_KERNEL));
}
- /* Find out how many PID bits are supported */
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * Older versions of KVM on these machines perfer if the
+ * Older versions of KVM on these machines prefer if the
* guest only uses the low 19 PID bits.
*/
- if (!mmu_pid_bits)
- mmu_pid_bits = 19;
- } else {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
+ mmu_pid_bits = 19;
}
mmu_base_pid = 1;
@@ -449,11 +448,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
- /* Find MMU PID size */
- prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
- if (prop && size == 4)
- mmu_pid_bits = be32_to_cpup(prop);
-
/* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop)
@@ -510,7 +504,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
return 1;
}
-static unsigned long radix_memory_block_size(void)
+static unsigned long __init radix_memory_block_size(void)
{
unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
@@ -528,7 +522,7 @@ static unsigned long radix_memory_block_size(void)
#else /* CONFIG_MEMORY_HOTPLUG */
-static unsigned long radix_memory_block_size(void)
+static unsigned long __init radix_memory_block_size(void)
{
return 1UL * 1024 * 1024 * 1024;
}
@@ -572,22 +566,11 @@ void __init radix__early_init_devtree(void)
return;
}
-static void radix_init_amor(void)
-{
- /*
- * In HV mode, we init AMOR (Authority Mask Override Register) so that
- * the hypervisor and guest can setup IAMR (Instruction Authority Mask
- * Register), enable key 0 and set it to 1.
- *
- * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
- */
- mtspr(SPRN_AMOR, (3ul << 62));
-}
-
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
+#ifdef CONFIG_PPC_64S_HASH_MMU
#ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
mmu_virtual_psize = MMU_PAGE_64K;
@@ -605,6 +588,7 @@ void __init radix__early_init_mmu(void)
} else
mmu_vmemmap_psize = mmu_virtual_psize;
#endif
+#endif
/*
* initialize page table size
*/
@@ -644,7 +628,6 @@ void __init radix__early_init_mmu(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
- radix_init_amor();
} else {
radix_init_pseries();
}
@@ -668,8 +651,6 @@ void radix__early_init_mmu_secondary(void)
set_ptcr_when_no_uv(__pa(partition_tb) |
(PATB_SIZE_SHIFT - 12));
-
- radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
@@ -874,7 +855,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
}
return create_physical_mapping(__pa(start), __pa(end),
- radix_mem_block_size, nid, prot);
+ nid, prot);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -920,10 +901,17 @@ void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long
#endif
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
void radix__kernel_map_pages(struct page *page, int numpages, int enable)
{
- pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n");
+ unsigned long addr;
+
+ addr = (unsigned long)page_address(page);
+
+ if (enable)
+ set_memory_p(addr, numpages);
+ else
+ set_memory_np(addr, numpages);
}
#endif
@@ -961,15 +949,6 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
pmd = *pmdp;
pmd_clear(pmdp);
- /*
- * pmdp collapse_flush need to ensure that there are no parallel gup
- * walk after this call. This is needed so that we can have stable
- * page ref count when collapsing a page. We don't allow a collapse page
- * if we have gup taken on the page. We can ensure that by sending IPI
- * because gup walk happens with IRQ disabled.
- */
- serialize_against_pte_lookup(vma->vm_mm);
-
radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
return pmd;
@@ -1042,16 +1021,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
unsigned long change = pte_val(entry) ^ pte_val(*ptep);
/*
- * To avoid NMMU hang while relaxing access, we need mark
- * the pte invalid in between.
+ * On POWER9, the NMMU is not able to relax PTE access permissions
+ * for a translation with a TLB. The PTE must be invalidated, TLB
+ * flushed before the new PTE is installed.
+ *
+ * This only needs to be done for radix, because hash translation does
+ * flush when updating the linux pte (and we don't support NMMU
+ * accelerators on HPT on POWER9 anyway XXX: do we?).
+ *
+ * POWER10 (and P9P) NMMU does behave as per ISA.
*/
- if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) &&
+ atomic_read(&mm->context.copros) > 0) {
unsigned long old_pte, new_pte;
old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
- /*
- * new value of pte
- */
new_pte = old_pte | set;
radix__flush_tlb_page_psize(mm, address, psize);
__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
@@ -1059,9 +1043,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
__radix_pte_update(ptep, 0, set);
/*
* Book3S does not require a TLB flush when relaxing access
- * restrictions when the address space is not attached to a
- * NMMU, because the core MMU will reload the pte after taking
- * an access fault, which is defined by the architecture.
+ * restrictions when the address space (modulo the POWER9 nest
+ * MMU issue above) because the MMU will reload the PTE after
+ * taking an access fault, as defined by the architecture. See
+ * "Setting a Reference or Change Bit or Upgrading Access
+ * Authority (PTE Subject to Atomic Hardware Updates)" in
+ * Power ISA Version 3.1B.
*/
}
/* See ptesync comment in radix__set_pte_at */
@@ -1074,11 +1061,12 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
/*
- * To avoid NMMU hang while relaxing access we need to flush the tlb before
- * we set the new value. We need to do this only for radix, because hash
- * translation does flush when updating the linux pte.
+ * POWER9 NMMU must flush the TLB after clearing the PTE before
+ * installing a PTE with more relaxed access permissions, see
+ * radix__ptep_set_access_flags.
*/
- if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
(atomic_read(&mm->context.copros) > 0))
radix__flush_tlb_page(vma, addr);
@@ -1100,7 +1088,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
int pud_clear_huge(pud_t *pud)
{
- if (pud_huge(*pud)) {
+ if (pud_is_leaf(*pud)) {
pud_clear(pud);
return 1;
}
@@ -1147,7 +1135,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
int pmd_clear_huge(pmd_t *pmd)
{
- if (pmd_huge(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
pmd_clear(pmd);
return 1;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 7724af19ed7e..4e29b619578c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -397,7 +397,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
/*
* Workaround the fact that the "ric" argument to __tlbie_pid
- * must be a compile-time contraint to match the "i" constraint
+ * must be a compile-time constraint to match the "i" constraint
* in the asm statement.
*/
switch (ric) {
@@ -755,10 +755,18 @@ EXPORT_SYMBOL(radix__local_flush_tlb_page);
static bool mm_needs_flush_escalation(struct mm_struct *mm)
{
/*
- * P9 nest MMU has issues with the page walk cache
- * caching PTEs and not flushing them properly when
- * RIC = 0 for a PID/LPID invalidate
+ * The P9 nest MMU has issues with the page walk cache caching PTEs
+ * and not flushing them when RIC = 0 for a PID/LPID invalidate.
+ *
+ * This may have been fixed in shipping firmware (by disabling PWC
+ * or preventing it from caching PTEs), but until that is confirmed,
+ * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes
+ * to RIC=2.
+ *
+ * POWER10 (and P9P) does not have this problem.
*/
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return false;
if (atomic_read(&mm->context.copros) > 0)
return true;
return false;
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index f0037bcc47a0..6956f637a38c 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -9,7 +9,6 @@
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
*/
-#include <asm/asm-prototypes.h>
#include <asm/interrupt.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -348,7 +347,7 @@ void slb_setup_new_exec(void)
/*
* We have no good place to clear the slb preload cache on exec,
* flush_thread is about the earliest arch hook but that happens
- * after we switch to the mm and have aleady preloaded the SLBEs.
+ * after we switch to the mm and have already preloaded the SLBEs.
*
* For the most part that's probably okay to use entries from the
* previous exec, they will age out if unused. It may turn out to
@@ -616,7 +615,7 @@ static void slb_cache_update(unsigned long esid_data)
} else {
/*
* Our cache is full and the current cache content strictly
- * doesn't indicate the active SLB conents. Bump the ptr
+ * doesn't indicate the active SLB contents. Bump the ptr
* so that switch_slb() will ignore the cache.
*/
local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
@@ -868,19 +867,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
return err;
}
}
-
-DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
-{
- int err = regs->result;
-
- if (err == -EFAULT) {
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, SIGSEGV);
- } else if (err == -EINVAL) {
- unrecoverable_exception(regs);
- } else {
- BUG();
- }
-}
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 82b45b1cb973..c0b58afb9a47 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -276,20 +276,18 @@ static bool slice_scan_available(unsigned long addr,
}
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
- unsigned long len,
+ unsigned long addr, unsigned long len,
const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
- unsigned long addr, found, next_end;
+ unsigned long found, next_end;
struct vm_unmapped_area_info info;
info.flags = 0;
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
- addr = TASK_UNMAPPED_BASE;
/*
* Check till the allow max value for this mmap request
*/
@@ -322,12 +320,12 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
}
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
- unsigned long len,
+ unsigned long addr, unsigned long len,
const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
- unsigned long addr, found, prev;
+ unsigned long found, prev;
struct vm_unmapped_area_info info;
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
@@ -335,8 +333,6 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
- addr = mm->mmap_base;
/*
* If we are trying to allocate above DEFAULT_MAP_WINDOW
* Add the different to the mmap_base.
@@ -377,7 +373,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* can happen with large stack limits and large mmap()
* allocations.
*/
- return slice_find_area_bottomup(mm, len, available, psize, high_limit);
+ return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, psize, high_limit);
}
@@ -386,9 +382,9 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
int topdown, unsigned long high_limit)
{
if (topdown)
- return slice_find_area_topdown(mm, len, mask, psize, high_limit);
+ return slice_find_area_topdown(mm, mm->mmap_base, len, mask, psize, high_limit);
else
- return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
+ return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, psize, high_limit);
}
static inline void slice_copy_mask(struct slice_mask *dst,
@@ -645,6 +641,9 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long pgoff,
unsigned long flags)
{
+ if (radix_enabled())
+ return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
+
return slice_get_unmapped_area(addr, len, flags,
mm_ctx_user_psize(&current->mm->context), 0);
}
@@ -655,6 +654,9 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long pgoff,
const unsigned long flags)
{
+ if (radix_enabled())
+ return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags);
+
return slice_get_unmapped_area(addr0, len, flags,
mm_ctx_user_psize(&current->mm->context), 1);
}
@@ -712,7 +714,6 @@ void slice_init_new_context_exec(struct mm_struct *mm)
bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
}
-#ifdef CONFIG_PPC_BOOK3S_64
void slice_setup_new_exec(void)
{
struct mm_struct *mm = current->mm;
@@ -724,7 +725,6 @@ void slice_setup_new_exec(void)
mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
}
-#endif
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long len, unsigned int psize)
@@ -779,4 +779,29 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
return !slice_check_range_fits(mm, maskp, addr, len);
}
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+ /* With radix we don't use slice, so derive it from vma*/
+ if (radix_enabled())
+ return vma_kernel_pagesize(vma);
+
+ return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start));
+}
+
+static int file_to_psize(struct file *file)
+{
+ struct hstate *hstate = hstate_file(file);
+ return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ if (radix_enabled())
+ return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+
+ return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1);
+}
#endif
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 60c6ea16a972..d73b3b4176e8 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -149,24 +149,15 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, addr);
/*
* We don't try too hard, we just mark all the vma in that range
* VM_NOHUGEPAGE and split them.
*/
- vma = find_vma(mm, addr);
- /*
- * If the range is in unmapped range, just return
- */
- if (vma && ((addr + len) <= vma->vm_start))
- return;
-
- while (vma) {
- if (vma->vm_start >= (addr + len))
- break;
+ for_each_vma_range(vmi, vma, addr + len) {
vma->vm_flags |= VM_NOHUGEPAGE;
walk_page_vma(vma, &subpage_walk_ops, NULL);
- vma = vma->vm_next;
}
}
#else
diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c
new file mode 100644
index 000000000000..ccd64b5e6cac
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/trace.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is for defining trace points and trace related helpers.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <trace/events/thp.h>
+#endif
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 63363787e000..0e9b4879c0f9 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -12,7 +12,7 @@ static inline bool flush_coherent_icache(void)
/*
* For a snooping icache, we still need a dummy icbi to purge all the
* prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
+ * before the icbi to order the actual stores to memory that might
* have modified instructions with the icbi.
*/
if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 8acd00178956..7c507fb48182 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -65,6 +65,11 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
ret = 0;
*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (*flt & VM_FAULT_COMPLETED)
+ return 0;
+
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
@@ -82,6 +87,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+#ifdef CONFIG_PPC_64S_HASH_MMU
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
{
u64 vsid, vsidkey;
@@ -146,3 +152,4 @@ void copro_flush_all_slbs(struct mm_struct *mm)
cxl_slbia(mm);
}
EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
+#endif
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 22197b18d85e..2369d1bf2411 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -11,7 +11,7 @@
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
-#include <asm/prom.h>
+#include <linux/slab.h>
#include <asm/drmem.h>
static int n_root_addr_cells, n_root_size_cells;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a8d0ce85d39a..2bef19cc1b98 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -270,7 +270,11 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma
return false;
}
- if (unlikely(!vma_is_accessible(vma)))
+ /*
+ * Check for a read fault. This could be caused by a read on an
+ * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page.
+ */
+ if (unlikely(!(vma->vm_flags & VM_READ)))
return true;
/*
* We should ideally do the vma pkey access check here. But in the
@@ -367,7 +371,22 @@ static void sanity_check_fault(bool is_write, bool is_user,
#elif defined(CONFIG_PPC_8xx)
#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
#elif defined(CONFIG_PPC64)
-#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
+static int page_fault_is_bad(unsigned long err)
+{
+ unsigned long flag = DSISR_BAD_FAULT_64S;
+
+ /*
+ * PAPR+ v2.11 § 14.15.3.4.1 (unreleased)
+ * If byte 0, bit 3 of pi-attribute-specifier-type in
+ * ibm,pi-features property is defined, ignore the DSI error
+ * which is caused by the paste instruction on the
+ * suspended NX window.
+ */
+ if (mmu_has_feature(MMU_FTR_NX_DSI))
+ flag &= ~DSISR_BAD_COPYPASTE;
+
+ return err & flag;
+}
#else
#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
#endif
@@ -511,15 +530,17 @@ retry:
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED)
+ goto out;
+
/*
* Handle the retry right now, the mmap_lock has been released in that
* case.
*/
if (unlikely(fault & VM_FAULT_RETRY)) {
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
mmap_read_unlock(current->mm);
@@ -527,6 +548,7 @@ retry:
if (unlikely(fault & VM_FAULT_ERROR))
return mm_fault_error(regs, address, fault);
+out:
/*
* Major/minor page fault accounting.
*/
@@ -568,18 +590,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault);
static void __bad_page_fault(struct pt_regs *regs, int sig)
{
int is_write = page_fault_is_write(regs->dsisr);
+ const char *msg;
/* kernel has accessed a bad area */
+ if (regs->dar < PAGE_SIZE)
+ msg = "Kernel NULL pointer dereference";
+ else
+ msg = "Unable to handle kernel data access";
+
switch (TRAP(regs)) {
case INTERRUPT_DATA_STORAGE:
- case INTERRUPT_DATA_SEGMENT:
case INTERRUPT_H_DATA_STORAGE:
- pr_alert("BUG: %s on %s at 0x%08lx\n",
- regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
- "Unable to handle kernel data access",
+ pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
is_write ? "write" : "read", regs->dar);
break;
+ case INTERRUPT_DATA_SEGMENT:
+ pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
+ break;
case INTERRUPT_INST_STORAGE:
case INTERRUPT_INST_SEGMENT:
pr_alert("BUG: Unable to handle kernel instruction fetch%s",
@@ -620,4 +648,27 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
{
bad_page_fault(regs, SIGSEGV);
}
+
+/*
+ * In radix, segment interrupts indicate the EA is not addressable by the
+ * page table geometry, so they are always sent here.
+ *
+ * In hash, this is called if do_slb_fault returns error. Typically it is
+ * because the EA was outside the region allowed by software.
+ */
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+ int err = regs->result;
+
+ if (err == -EFAULT) {
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ else
+ bad_page_fault(regs, SIGSEGV);
+ } else if (err == -EINVAL) {
+ unrecoverable_exception(regs);
+ } else {
+ BUG();
+ }
+}
#endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 82d8b368ca6d..5852a86d990d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,7 @@
#include <asm/setup.h>
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
+#include <asm/firmware.h>
bool hugetlb_disabled = false;
@@ -391,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
+ more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd));
if (more > next)
next = more;
@@ -433,7 +434,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
+ more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud));
if (more > next)
next = more;
@@ -495,7 +496,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
* for a single hugepage, but all of them point to the
* same kmem cache that holds the hugepte.
*/
- more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
+ more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd));
if (more > next)
next = more;
@@ -542,34 +543,6 @@ retry:
return page;
}
-#ifdef CONFIG_PPC_MM_SLICES
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct hstate *hstate = hstate_file(file);
- int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
-
-#ifdef CONFIG_PPC_RADIX_MMU
- if (radix_enabled())
- return radix__hugetlb_get_unmapped_area(file, addr, len,
- pgoff, flags);
-#endif
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
-}
-#endif
-
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
- /* With radix we don't use slice, so derive it from vma*/
- if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
- unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
- return 1UL << mmu_psize_to_shift(psize);
- }
- return vma_kernel_pagesize(vma);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
int shift = __ffs(size);
@@ -650,7 +623,7 @@ static int __init hugetlbpage_init(void)
if (pdshift > shift) {
if (!IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(pdshift - shift);
- } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) ||
+ } else if (IS_ENABLED(CONFIG_PPC_E500) ||
IS_ENABLED(CONFIG_PPC_8xx)) {
pgtable_cache_add(PTE_T_ORDER);
}
@@ -658,10 +631,7 @@ static int __init hugetlbpage_init(void)
configured = true;
}
- if (configured) {
- if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
- hugetlbpage_init_default();
- } else
+ if (!configured)
pr_info("Failed to initialize. Disabling HugeTLB");
return 0;
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 3a82f89827a5..119ef491f797 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -20,6 +20,7 @@
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kup.h>
+#include <asm/smp.h>
phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
EXPORT_SYMBOL_GPL(memstart_addr);
@@ -33,6 +34,9 @@ bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
static int __init parse_nosmep(char *p)
{
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return 0;
+
disable_kuep = true;
pr_warn("Disabling Kernel Userspace Execution Prevention\n");
return 0;
@@ -47,6 +51,23 @@ static int __init parse_nosmap(char *p)
}
early_param("nosmap", parse_nosmap);
+void __weak setup_kuep(bool disabled)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled)
+ return;
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+}
+
+void setup_kup(void)
+{
+ setup_kuap(disable_kuap);
+ setup_kuep(disable_kuep);
+}
+
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
memset(addr, 0, sizeof(void *) << (shift)); \
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 3d690be48e84..d4cc3749e621 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
-#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu.h>
#include <asm/smp.h>
@@ -70,44 +69,10 @@ EXPORT_SYMBOL(agp_special_page);
void MMU_init(void);
-/*
- * this tells the system to map all of ram with the segregs
- * (i.e. page tables) instead of the bats.
- * -- Cort
- */
-int __map_without_bats;
-int __map_without_ltlbs;
-
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
- * Check for command-line options that affect what MMU_init will do.
- */
-static void __init MMU_setup(void)
-{
- /* Check for nobats option (used in mapin_ram). */
- if (strstr(boot_command_line, "nobats")) {
- __map_without_bats = 1;
- }
-
- if (strstr(boot_command_line, "noltlbs")) {
- __map_without_ltlbs = 1;
- }
- if (IS_ENABLED(CONFIG_PPC_8xx))
- return;
-
- if (IS_ENABLED(CONFIG_KFENCE))
- __map_without_ltlbs = 1;
-
- if (debug_pagealloc_enabled())
- __map_without_ltlbs = 1;
-
- if (strict_kernel_rwx_enabled())
- __map_without_ltlbs = 1;
-}
-
-/*
* MMU_init sets up the basic memory mappings for the kernel,
* including both RAM and possibly some I/O regions,
* and sets up the page tables and the MMU hardware ready to go.
@@ -117,31 +82,15 @@ void __init MMU_init(void)
if (ppc_md.progress)
ppc_md.progress("MMU:enter", 0x111);
- /* parse args from command line */
- MMU_setup();
-
- /*
- * Reserve gigantic pages for hugetlb. This MUST occur before
- * lowmem_end_addr is initialized below.
- */
- if (memblock.memory.cnt > 1) {
-#ifndef CONFIG_WII
- memblock_enforce_memory_limit(memblock.memory.regions[0].size);
- pr_warn("Only using first contiguous memory region\n");
-#else
- wii_memory_fixups();
-#endif
- }
-
total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
lowmem_end_addr = memstart_addr + total_lowmem;
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
* entries, so we need to adjust lowmem to match the amount we can map
* in the fixed entries */
adjust_total_lowmem();
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
if (total_lowmem > __max_low_memory) {
total_lowmem = __max_low_memory;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 386be136026e..05b0d584e50b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -59,6 +59,7 @@
#include <asm/sections.h>
#include <asm/iommu.h>
#include <asm/vdso.h>
+#include <asm/hugetlb.h>
#include <mm/mmu_decl.h>
@@ -110,7 +111,7 @@ static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_m
}
/*
- * vmemmap virtual address space management does not have a traditonal page
+ * vmemmap virtual address space management does not have a traditional page
* table to track which virtual struct pages are backed by physical mapping.
* The virtual to physical mappings are tracked in a simple linked list
* format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
@@ -127,7 +128,7 @@ static struct vmemmap_backing *next;
/*
* The same pointer 'next' tracks individual chunks inside the allocated
- * full page during the boot time and again tracks the freeed nodes during
+ * full page during the boot time and again tracks the freed nodes during
* runtime. It is racy but it does not happen as they are separated by the
* boot process. Will create problem if some how we have memory hotplug
* operation during boot !!
@@ -370,6 +371,12 @@ void register_page_bootmem_memmap(unsigned long section_nr,
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_BOOK3S_64
+unsigned int mmu_lpid_bits;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+EXPORT_SYMBOL_GPL(mmu_lpid_bits);
+#endif
+unsigned int mmu_pid_bits;
+
static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
static int __init parse_disable_radix(char *p)
@@ -437,11 +444,56 @@ static void __init early_check_vec5(void)
}
}
+static int __init dt_scan_mmu_pid_width(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ int size = 0;
+ const __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ /* Find MMU LPID, PID register size */
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size);
+ if (prop && size == 4)
+ mmu_lpid_bits = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+ if (prop && size == 4)
+ mmu_pid_bits = be32_to_cpup(prop);
+
+ if (!mmu_pid_bits && !mmu_lpid_bits)
+ return 0;
+
+ return 1;
+}
+
void __init mmu_early_init_devtree(void)
{
+ bool hvmode = !!(mfmsr() & MSR_HV);
+
/* Disable radix mode based on kernel command line. */
- if (disable_radix)
- cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ if (disable_radix) {
+ if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ else
+ pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+ }
+
+ of_scan_flat_dt(dt_scan_mmu_pid_width, NULL);
+ if (hvmode && !mmu_lpid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ mmu_lpid_bits = 12; /* POWER8-10 */
+ else
+ mmu_lpid_bits = 10; /* POWER7 */
+ }
+ if (!mmu_pid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ mmu_pid_bits = 20; /* POWER9-10 */
+ }
/*
* Check /chosen/ibm,architecture-vec-5 if running as a guest.
@@ -449,11 +501,12 @@ void __init mmu_early_init_devtree(void)
* even though the ibm,architecture-vec-5 property created by
* skiboot doesn't have the necessary bits set.
*/
- if (!(mfmsr() & MSR_HV))
+ if (!hvmode)
early_check_vec5();
if (early_radix_enabled()) {
radix__early_init_devtree();
+
/*
* We have finalized the translation we are going to use by now.
* Radix mode is not limited by RMA / VRMA addressing.
@@ -463,5 +516,12 @@ void __init mmu_early_init_devtree(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
} else
hash__early_init_devtree();
+
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_defaultsize();
+
+ if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) &&
+ !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX))
+ panic("kernel does not support any MMU type offered by platform");
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index 57342154d2b0..4f12504fb405 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -98,23 +98,3 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
return NULL;
}
-
-#ifdef CONFIG_ZONE_DEVICE
-/*
- * Override the generic version in mm/memremap.c.
- *
- * With hash translation, the direct-map range is mapped with just one
- * page size selected by htab_init_page_sizes(). Consult
- * mmu_psize_defs[] to determine the minimum page size alignment.
-*/
-unsigned long memremap_compat_align(void)
-{
- unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
-
- if (radix_enabled())
- return SUBSECTION_SIZE;
- return max(SUBSECTION_SIZE, 1UL << shift);
-
-}
-EXPORT_SYMBOL_GPL(memremap_compat_align);
-#endif
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
index bb1a5408b86b..699eeffd9f55 100644
--- a/arch/powerpc/mm/kasan/Makefile
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -2,6 +2,8 @@
KASAN_SANITIZE := n
-obj-$(CONFIG_PPC32) += kasan_init_32.o
+obj-$(CONFIG_PPC32) += init_32.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o
+obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o
+obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index 202bd260a009..450a67ef0bbe 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -10,47 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
- unsigned long k_cur = k_start;
- int k_size = k_end - k_start;
- int k_size_base = 1 << (ffs(k_size) - 1);
+ unsigned long k_nobat = k_start;
+ unsigned long k_cur;
+ phys_addr_t phys;
int ret;
- void *block;
- block = memblock_alloc(k_size, k_size_base);
-
- if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
- int k_size_more = 1 << (ffs(k_size - k_size_base) - 1);
-
- setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
- if (k_size_more >= SZ_128K)
- setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
- k_size_more, PAGE_KERNEL);
- if (v_block_mapped(k_start))
- k_cur = k_start + k_size_base;
- if (v_block_mapped(k_start + k_size_base))
- k_cur = k_start + k_size_base + k_size_more;
-
- update_bats();
+ while (k_nobat < k_end) {
+ unsigned int k_size = bat_block_size(k_nobat, k_end);
+ int idx = find_free_bat();
+
+ if (idx == -1)
+ break;
+ if (k_size < SZ_128K)
+ break;
+ phys = memblock_phys_alloc_range(k_size, k_size, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ break;
+
+ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+ k_nobat += k_size;
}
+ if (k_nobat != k_start)
+ update_bats();
- if (!block)
- block = memblock_alloc(k_size, PAGE_SIZE);
- if (!block)
- return -ENOMEM;
+ if (k_nobat < k_end) {
+ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ return -ENOMEM;
+ }
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
- kasan_update_early_region(k_start, k_cur, __pte(0));
+ kasan_update_early_region(k_start, k_nobat, __pte(0));
- for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_off_k(k_cur);
- void *va = block + k_cur - k_start;
- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
}
flush_tlb_kernel_range(k_start, k_end);
+ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
return 0;
}
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
new file mode 100644
index 000000000000..a70828a6d935
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <asm/pgalloc.h>
+#include <asm/code-patching.h>
+#include <mm/mmu_decl.h>
+
+static pgprot_t __init kasan_prot_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return PAGE_READONLY;
+
+ return PAGE_KERNEL_RO;
+}
+
+static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+{
+ unsigned long va = (unsigned long)kasan_early_shadow_page;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
+ __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1);
+}
+
+int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+{
+ pmd_t *pmd;
+ unsigned long k_cur, k_next;
+
+ pmd = pmd_off_k(k_start);
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+ pte_t *new;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+
+ if (!new)
+ return -ENOMEM;
+ kasan_populate_pte(new, PAGE_KERNEL);
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+ return 0;
+}
+
+int __init __weak kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block;
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ if (!block)
+ return -ENOMEM;
+
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ void *va = block + k_cur - k_start;
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
+
+void __init
+kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
+{
+ unsigned long k_cur;
+
+ for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+ if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page)))
+ continue;
+
+ __set_pte_at(&init_mm, k_cur, ptep, pte, 0);
+ }
+
+ flush_tlb_kernel_range(k_start, k_end);
+}
+
+static void __init kasan_remap_early_shadow_ro(void)
+{
+ pgprot_t prot = kasan_prot_ro();
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ kasan_populate_pte(kasan_early_shadow_pte, prot);
+
+ kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END,
+ pfn_pte(PHYS_PFN(pa), prot));
+}
+
+static void __init kasan_unmap_early_shadow_vmalloc(void)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
+
+ kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+ k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+ k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+ kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
+}
+
+void __init kasan_mmu_init(void)
+{
+ int ret;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t base, end;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ phys_addr_t top = min(end, total_lowmem);
+
+ if (base >= top)
+ continue;
+
+ ret = kasan_init_region(__va(base), top - base);
+ if (ret)
+ panic("kasan: kasan_init_region() failed");
+ }
+
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+
+ kasan_remap_early_shadow_ro();
+
+ clear_page(kasan_early_shadow_page);
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KASAN init done\n");
+}
+
+void __init kasan_late_init(void)
+{
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_unmap_early_shadow_vmalloc();
+}
+
+void __init kasan_early_init(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pmd_t *pmd = pmd_off_k(addr);
+
+ BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
+
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+ } while (pmd++, addr = next, addr != end);
+}
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
new file mode 100644
index 000000000000..11519e88dc6b
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3e powerpc
+ *
+ * Copyright 2022, Christophe Leroy, CS GROUP France
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/set_memory.h>
+
+#include <asm/pgalloc.h>
+
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud));
+}
+
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd));
+}
+
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+ return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte));
+}
+
+static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ if (kasan_pud_table(*p4dp)) {
+ pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE);
+ p4d_populate(&init_mm, p4dp, pudp);
+ }
+ pudp = pud_offset(p4dp, ea);
+ if (kasan_pmd_table(*pudp)) {
+ pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (kasan_pte_table(*pmdp)) {
+ ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+ __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0);
+
+ return 0;
+}
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+ unsigned long k_start, k_end, k_cur;
+ void *va;
+
+ if (start >= end)
+ return;
+
+ k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+ k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+ va = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+ kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_early_init(void)
+{
+ int i;
+ unsigned long addr;
+ pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START);
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+ kasan_early_shadow_pte);
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+ kasan_early_shadow_pmd);
+
+ for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE)
+ p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud);
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t start, end;
+ u64 i;
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+
+ for_each_mem_range(i, &start, &end)
+ kasan_init_phys_region((void *)start, (void *)end);
+
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE);
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+ /* Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KASAN init done\n");
+}
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
new file mode 100644
index 000000000000..9300d641cf9a
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3S powerpc
+ *
+ * Copyright 2019-2022, Daniel Axtens, IBM Corporation.
+ */
+
+/*
+ * ppc64 turns on virtual memory late in boot, after calling into generic code
+ * like the device-tree parser, so it uses this in conjunction with a hook in
+ * outline mode to avoid invalid access early in boot.
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/sched/task.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+
+DEFINE_STATIC_KEY_FALSE(powerpc_kasan_enabled_key);
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+ unsigned long k_start, k_end, k_cur;
+ void *va;
+
+ if (start >= end)
+ return;
+
+ k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+ k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+ va = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+ map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_init(void)
+{
+ /*
+ * We want to do the following things:
+ * 1) Map real memory into the shadow for all physical memblocks
+ * This takes us from c000... to c008...
+ * 2) Leave a hole over the shadow of vmalloc space. KASAN_VMALLOC
+ * will manage this for us.
+ * This takes us from c008... to c00a...
+ * 3) Map the 'early shadow'/zero page over iomap and vmemmap space.
+ * This takes us up to where we start at c00e...
+ */
+
+ void *k_start = kasan_mem_to_shadow((void *)RADIX_VMALLOC_END);
+ void *k_end = kasan_mem_to_shadow((void *)RADIX_VMEMMAP_END);
+ phys_addr_t start, end;
+ u64 i;
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+ if (!early_radix_enabled()) {
+ pr_warn("KASAN not enabled as it requires radix!");
+ return;
+ }
+
+ for_each_mem_range(i, &start, &end)
+ kasan_init_phys_region((void *)start, (void *)end);
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+ kasan_early_shadow_pte);
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+ kasan_early_shadow_pmd);
+
+ /* map the early shadow over the iomap and vmemmap space */
+ kasan_populate_early_shadow(k_start, k_end);
+
+ /* mark early shadow region as RO and wipe it */
+ zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ /*
+ * clear_page relies on some cache info that hasn't been set up yet.
+ * It ends up looping ~forever and blows up other data.
+ * Use memset instead.
+ */
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+ static_branch_inc(&powerpc_kasan_enabled_key);
+
+ /* Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KASAN init done\n");
+}
+
+void __init kasan_early_init(void) { }
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
deleted file mode 100644
index cf8770b1a692..000000000000
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#define DISABLE_BRANCH_PROFILING
-
-#include <linux/kasan.h>
-#include <linux/printk.h>
-#include <linux/memblock.h>
-#include <linux/sched/task.h>
-#include <asm/pgalloc.h>
-#include <asm/code-patching.h>
-#include <mm/mmu_decl.h>
-
-static pgprot_t __init kasan_prot_ro(void)
-{
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- return PAGE_READONLY;
-
- return PAGE_KERNEL_RO;
-}
-
-static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
-{
- unsigned long va = (unsigned long)kasan_early_shadow_page;
- phys_addr_t pa = __pa(kasan_early_shadow_page);
- int i;
-
- for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
- __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
-}
-
-int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
-{
- pmd_t *pmd;
- unsigned long k_cur, k_next;
-
- pmd = pmd_off_k(k_start);
-
- for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
- pte_t *new;
-
- k_next = pgd_addr_end(k_cur, k_end);
- if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
- continue;
-
- new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
-
- if (!new)
- return -ENOMEM;
- kasan_populate_pte(new, PAGE_KERNEL);
- pmd_populate_kernel(&init_mm, pmd, new);
- }
- return 0;
-}
-
-int __init __weak kasan_init_region(void *start, size_t size)
-{
- unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
- unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
- unsigned long k_cur;
- int ret;
- void *block;
-
- ret = kasan_init_shadow_page_tables(k_start, k_end);
- if (ret)
- return ret;
-
- block = memblock_alloc(k_end - k_start, PAGE_SIZE);
- if (!block)
- return -ENOMEM;
-
- for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
- pmd_t *pmd = pmd_off_k(k_cur);
- void *va = block + k_cur - k_start;
- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
-
- __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
- }
- flush_tlb_kernel_range(k_start, k_end);
- return 0;
-}
-
-void __init
-kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
-{
- unsigned long k_cur;
- phys_addr_t pa = __pa(kasan_early_shadow_page);
-
- for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
- pmd_t *pmd = pmd_off_k(k_cur);
- pte_t *ptep = pte_offset_kernel(pmd, k_cur);
-
- if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
- continue;
-
- __set_pte_at(&init_mm, k_cur, ptep, pte, 0);
- }
-
- flush_tlb_kernel_range(k_start, k_end);
-}
-
-static void __init kasan_remap_early_shadow_ro(void)
-{
- pgprot_t prot = kasan_prot_ro();
- phys_addr_t pa = __pa(kasan_early_shadow_page);
-
- kasan_populate_pte(kasan_early_shadow_pte, prot);
-
- kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END,
- pfn_pte(PHYS_PFN(pa), prot));
-}
-
-static void __init kasan_unmap_early_shadow_vmalloc(void)
-{
- unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
- unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
-
- kasan_update_early_region(k_start, k_end, __pte(0));
-
-#ifdef MODULES_VADDR
- k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
- k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
- kasan_update_early_region(k_start, k_end, __pte(0));
-#endif
-}
-
-void __init kasan_mmu_init(void)
-{
- int ret;
-
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
- ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
-}
-
-void __init kasan_init(void)
-{
- phys_addr_t base, end;
- u64 i;
- int ret;
-
- for_each_mem_range(i, &base, &end) {
- phys_addr_t top = min(end, total_lowmem);
-
- if (base >= top)
- continue;
-
- ret = kasan_init_region(__va(base), top - base);
- if (ret)
- panic("kasan: kasan_init_region() failed");
- }
-
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
-
- kasan_remap_early_shadow_ro();
-
- clear_page(kasan_early_shadow_page);
-
- /* At this point kasan is fully initialized. Enable error messages */
- init_task.kasan_depth = 0;
- pr_info("KASAN init done\n");
-}
-
-void __init kasan_late_init(void)
-{
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
- kasan_unmap_early_shadow_vmalloc();
-}
-
-void __init kasan_early_init(void)
-{
- unsigned long addr = KASAN_SHADOW_START;
- unsigned long end = KASAN_SHADOW_END;
- unsigned long next;
- pmd_t *pmd = pmd_off_k(addr);
-
- BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
-
- kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
-
- do {
- next = pgd_addr_end(addr, end);
- pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
- } while (pmd++, addr = next, addr != end);
-}
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index aad7c47e0030..ea821d0ffe16 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
return is_kernel_addr((unsigned long)unsafe_src);
}
-
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
-{
- unsigned int val, suffix;
- int err;
-
- err = copy_from_kernel_nofault(&val, src, sizeof(val));
- if (err)
- return err;
- if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
- err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix));
- *inst = ppc_inst_prefix(val, suffix);
- } else {
- *inst = ppc_inst(val);
- }
- return err;
-}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index bd5d91a31183..84d171953ba4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -17,16 +17,19 @@
#include <linux/suspend.h>
#include <linux/dma-direct.h>
+#include <asm/swiotlb.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
#include <asm/svm.h>
#include <asm/mmzone.h>
+#include <asm/ftrace.h>
+#include <asm/code-patching.h>
+#include <asm/setup.h>
#include <mm/mmu_decl.h>
unsigned long long memory_limit;
-bool init_mem_is_free;
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -52,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start)
{
return hot_add_scn_to_nid(start);
}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
int __weak create_section_mapping(unsigned long start, unsigned long end,
@@ -103,6 +107,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
vm_unmap_aliases();
}
+/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+ unsigned long end_pfn = PFN_UP(start + size);
+
+ if (end_pfn > max_pfn) {
+ max_pfn = end_pfn;
+ max_low_pfn = end_pfn;
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+ }
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct mhp_params *params)
+{
+ int ret;
+
+ ret = __add_pages(nid, start_pfn, nr_pages, params);
+ if (ret)
+ return ret;
+
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT);
+
+ return ret;
+}
+
int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params)
{
@@ -113,7 +148,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
rc = arch_create_linear_mapping(nid, start, size, params);
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, nr_pages, params);
+ rc = add_pages(nid, start_pfn, nr_pages, params);
if (rc)
arch_remove_linear_mapping(start, size);
return rc;
@@ -249,14 +284,11 @@ void __init mem_init(void)
* back to to-down.
*/
memblock_set_bottom_up(true);
- if (is_secure_guest())
- svm_swiotlb_init();
- else
- swiotlb_init(0);
+ swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags);
#endif
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
- set_max_mapnr(max_low_pfn);
+ set_max_mapnr(max_pfn);
kasan_late_init();
@@ -270,13 +302,13 @@ void __init mem_init(void)
for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
struct page *page = pfn_to_page(pfn);
- if (!memblock_is_reserved(paddr))
+ if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr))
free_highmem_page(page);
}
}
#endif /* CONFIG_HIGHMEM */
-#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP)
/*
* If smp is enabled, next_tlbcam_idx is initialized in the cpu up
* functions.... do it here for the non-smp case.
@@ -312,8 +344,9 @@ void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
mark_initmem_nx();
- init_mem_is_free = true;
+ static_branch_enable(&init_mem_is_free);
free_initmem_default(POISON_FREE_INITMEM);
+ ftrace_free_init_tramp();
}
/*
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
deleted file mode 100644
index ae683fdc716c..000000000000
--- a/arch/powerpc/mm/mmap.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/mm.h>
-#include <linux/elf-randomize.h>
-#include <linux/security.h>
-#include <linux/mman.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave at least a ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline int mmap_is_legacy(struct rlimit *rlim_stack)
-{
- if (current->personality & ADDR_COMPAT_LAYOUT)
- return 1;
-
- if (rlim_stack->rlim_cur == RLIM_INFINITY)
- return 1;
-
- return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
- unsigned long shift, rnd;
-
- shift = mmap_rnd_bits;
-#ifdef CONFIG_COMPAT
- if (is_32bit_task())
- shift = mmap_rnd_compat_bits;
-#endif
- rnd = get_random_long() % (1ul << shift);
-
- return rnd << PAGE_SHIFT;
-}
-
-static inline unsigned long stack_maxrandom_size(void)
-{
- if (!(current->flags & PF_RANDOMIZE))
- return 0;
-
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- return (1<<23);
- else
- return (1<<30);
-}
-
-static inline unsigned long mmap_base(unsigned long rnd,
- struct rlimit *rlim_stack)
-{
- unsigned long gap = rlim_stack->rlim_cur;
- unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-
- /* Values close to RLIM_INFINITY can overflow. */
- if (gap + pad > gap)
- gap += pad;
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
-}
-
-#ifdef CONFIG_PPC_RADIX_MMU
-/*
- * Same function as generic code used only for radix, because we don't need to overload
- * the generic one. But we will have to duplicate, because hash select
- * HAVE_ARCH_UNMAPPED_AREA
- */
-static unsigned long
-radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int fixed = (flags & MAP_FIXED);
- unsigned long high_limit;
- struct vm_unmapped_area_info info;
-
- high_limit = DEFAULT_MAP_WINDOW;
- if (addr >= high_limit || (fixed && (addr + len > high_limit)))
- high_limit = TASK_SIZE;
-
- if (len > high_limit)
- return -ENOMEM;
-
- if (fixed) {
- if (addr > high_limit - len)
- return -ENOMEM;
- return addr;
- }
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (high_limit - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
- return addr;
- }
-
- info.flags = 0;
- info.length = len;
- info.low_limit = mm->mmap_base;
- info.high_limit = high_limit;
- info.align_mask = 0;
-
- return vm_unmapped_area(&info);
-}
-
-static unsigned long
-radix__arch_get_unmapped_area_topdown(struct file *filp,
- const unsigned long addr0,
- const unsigned long len,
- const unsigned long pgoff,
- const unsigned long flags)
-{
- struct vm_area_struct *vma;
- struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
- int fixed = (flags & MAP_FIXED);
- unsigned long high_limit;
- struct vm_unmapped_area_info info;
-
- high_limit = DEFAULT_MAP_WINDOW;
- if (addr >= high_limit || (fixed && (addr + len > high_limit)))
- high_limit = TASK_SIZE;
-
- if (len > high_limit)
- return -ENOMEM;
-
- if (fixed) {
- if (addr > high_limit - len)
- return -ENOMEM;
- return addr;
- }
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (high_limit - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
- return addr;
- }
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
- info.align_mask = 0;
-
- addr = vm_unmapped_area(&info);
- if (!(addr & ~PAGE_MASK))
- return addr;
- VM_BUG_ON(addr != -ENOMEM);
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-}
-
-static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
- unsigned long random_factor,
- struct rlimit *rlim_stack)
-{
- if (mmap_is_legacy(rlim_stack)) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = radix__arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
- }
-}
-#else
-/* dummy */
-extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
- unsigned long random_factor,
- struct rlimit *rlim_stack);
-#endif
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
- unsigned long random_factor = 0UL;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
-
- if (radix_enabled())
- return radix__arch_pick_mmap_layout(mm, random_factor,
- rlim_stack);
- /*
- * Fall back to the standard layout if the personality
- * bit is set, or if the expected stack growth is unlimited:
- */
- if (mmap_is_legacy(rlim_stack)) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- }
-}
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 74246536b832..1fb9c99f8679 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -18,6 +18,12 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
{
/* 32-bit keeps track of the current PGDIR in the thread struct */
tsk->thread.pgdir = mm->pgd;
+#ifdef CONFIG_PPC_BOOK3S_32
+ tsk->thread.sr0 = mm->context.sr0;
+#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = mm->context.id;
+#endif
}
#elif defined(CONFIG_PPC_BOOK3E_64)
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -25,6 +31,9 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
{
/* 64-bit Book3E keeps track of current PGD in the PACA */
get_paca()->pgd = mm->pgd;
+#ifdef CONFIG_PPC_KUAP
+ tsk->thread.pid = mm->context.id;
+#endif
}
#else
static inline void switch_mm_pgdir(struct task_struct *tsk,
@@ -81,7 +90,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* context
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile ("dssall");
+ asm volatile (PPC_DSSALL);
if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 0dd4c18f8363..bd9784f77f2e 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid)
#else /* CONFIG_40x || CONFIG_PPC_8xx */
extern void _tlbil_all(void);
extern void _tlbil_pid(unsigned int pid);
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
extern void _tlbil_pid_noind(unsigned int pid);
#else
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
@@ -55,7 +55,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
trace_tlbie(0, 0, address, pid, 0, 0, 0);
}
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
extern void _tlbil_va(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind);
#else
@@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
}
#endif /* CONFIG_PPC_8xx */
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
+#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_47x)
extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind);
#else
@@ -92,29 +92,16 @@ extern void mapin_ram(void);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
-extern int __map_without_bats;
-extern unsigned int rtas_data, rtas_size;
-
-struct hash_pte;
extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
extern unsigned long __max_low_memory;
-extern phys_addr_t __initial_memory_limit_addr;
extern phys_addr_t total_memory;
extern phys_addr_t total_lowmem;
extern phys_addr_t memstart_addr;
extern phys_addr_t lowmem_end_addr;
-#ifdef CONFIG_WII
-extern unsigned long wii_hole_start;
-extern unsigned long wii_hole_size;
-
-extern unsigned long wii_mmu_mapin_mem2(unsigned long top);
-extern void wii_memory_fixups(void);
-#endif
-
/* ...and now those things that may be slightly different between processor
* architectures. -- Dan
*/
@@ -124,11 +111,9 @@ void MMU_init_hw_patch(void);
unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
bool dryrun, bool init);
-extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
- phys_addr_t phys);
#ifdef CONFIG_PPC32
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
@@ -155,11 +140,15 @@ struct tlbcam {
u32 MAS3;
u32 MAS7;
};
+
+#define NUM_TLBCAMS 64
+
+extern struct tlbcam TLBCAM[NUM_TLBCAMS];
#endif
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
-/* FSL_BOOKE have TLBCAM */
+/* PPC_85xx have TLBCAM */
/* 8xx have LTLB */
phys_addr_t v_block_mapped(unsigned long va);
unsigned long p_block_mapped(phys_addr_t pa);
@@ -168,7 +157,7 @@ static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; }
static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
#endif
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500)
void mmu_mark_initmem_nx(void);
void mmu_mark_rodata_ro(void);
#else
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
index 95751c322f6c..3684d6e570fb 100644
--- a/arch/powerpc/mm/nohash/40x.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -32,7 +32,6 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
-#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/mmu.h>
@@ -44,7 +43,6 @@
#include <mm/mmu_decl.h>
-extern int __map_without_ltlbs;
/*
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
*/
@@ -95,7 +93,13 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
p = 0;
s = total_lowmem;
- if (__map_without_ltlbs)
+ if (IS_ENABLED(CONFIG_KFENCE))
+ return 0;
+
+ if (debug_pagealloc_enabled())
+ return 0;
+
+ if (strict_kernel_rwx_enabled())
return 0;
while (s >= LARGE_PAGE_SIZE_16M) {
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
index e079f26b267e..1beae802bb1c 100644
--- a/arch/powerpc/mm/nohash/44x.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -38,7 +38,7 @@ int icache_44x_need_flush;
unsigned long tlb_47x_boltmap[1024/8];
-static void ppc44x_update_tlb_hwater(void)
+static void __init ppc44x_update_tlb_hwater(void)
{
/* The TLB miss handlers hard codes the watermark in a cmpli
* instruction to improve performances rather than loading it
@@ -122,7 +122,7 @@ static void __init ppc47x_update_boltmap(void)
/*
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
*/
-static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
+static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
{
unsigned int rA;
int bolted;
@@ -240,19 +240,3 @@ void __init mmu_init_secondary(int cpu)
}
}
#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_PPC_KUEP
-void setup_kuep(bool disabled)
-{
- if (smp_processor_id() != boot_cpuid)
- return;
-
- if (disabled)
- patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP()));
- else
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- if (IS_ENABLED(CONFIG_PPC_47x) && disabled)
- patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP()));
-}
-#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 0df9fe29dd56..dbbfe897455d 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -8,18 +8,12 @@
*/
#include <linux/memblock.h>
-#include <linux/mmu_context.h>
#include <linux/hugetlb.h>
-#include <asm/fixmap.h>
-#include <asm/code-patching.h>
-#include <asm/inst.h>
#include <mm/mmu_decl.h>
#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
-extern int __map_without_ltlbs;
-
static unsigned long block_mapped_ram;
/*
@@ -32,8 +26,6 @@ phys_addr_t v_block_mapped(unsigned long va)
if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
return p + va - VIRT_IMMR_BASE;
- if (__map_without_ltlbs)
- return 0;
if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
return __pa(va);
return 0;
@@ -49,8 +41,6 @@ unsigned long p_block_mapped(phys_addr_t pa)
if (pa >= p && pa < p + IMMR_SIZE)
return VIRT_IMMR_BASE + pa - p;
- if (__map_without_ltlbs)
- return 0;
if (pa < block_mapped_ram)
return (unsigned long)__va(pa);
return 0;
@@ -157,9 +147,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
mmu_mapin_immr();
- if (__map_without_ltlbs)
- return 0;
-
mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
if (debug_pagealloc_enabled_or_kfence()) {
top = boundary;
@@ -183,8 +170,8 @@ void mmu_mark_initmem_nx(void)
unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
- mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false);
- mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+ if (!debug_pagealloc_enabled_or_kfence())
+ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
mmu_pin_tlb(block_mapped_ram, false);
}
@@ -212,35 +199,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
}
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
- if (disabled)
- return;
-
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- mtspr(SPRN_MI_AP, MI_APG_KUEP);
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
-void __init setup_kuap(bool disabled)
-{
- if (disabled) {
- static_branch_enable(&disable_kuap_key);
- return;
- }
-
- pr_info("Activating Kernel Userspace Access Protection\n");
-
- mtspr(SPRN_MD_AP, MD_APG_KUAP);
-}
-#endif
-
int pud_clear_huge(pud_t *pud)
{
return 0;
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index b1f630d423d8..f3894e79d5f7 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -2,18 +2,18 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y += mmu_context.o tlb.o tlb_low.o
+obj-y += mmu_context.o tlb.o tlb_low.o kup.o
obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
obj-$(CONFIG_40x) += 40x.o
obj-$(CONFIG_44x) += 44x.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_book3e.o
+obj-$(CONFIG_PPC_E500) += e500.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o
ifdef CONFIG_HUGETLB_PAGE
-obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
+obj-$(CONFIG_PPC_E500) += e500_hugetlbpage.o
endif
# Disable kcov instrumentation on sensitive code
# This is necessary for booting with kcov enabled on book3e machines
KCOV_INSTRUMENT_tlb.o := n
-KCOV_INSTRUMENT_fsl_book3e.o := n
+KCOV_INSTRUMENT_e500.o := n
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
deleted file mode 100644
index 8b88be91b622..000000000000
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PPC Huge TLB Page Support for Book3E MMU
- *
- * Copyright (C) 2009 David Gibson, IBM Corporation.
- * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
- *
- */
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-
-#include <asm/mmu.h>
-
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-
-static inline int tlb1_next(void)
-{
- struct paca_struct *paca = get_paca();
- struct tlb_core_data *tcd;
- int this, next;
-
- tcd = paca->tcd_ptr;
- this = tcd->esel_next;
-
- next = this + 1;
- if (next >= tcd->esel_max)
- next = tcd->esel_first;
-
- tcd->esel_next = next;
- return this;
-}
-
-static inline void book3e_tlb_lock(void)
-{
- struct paca_struct *paca = get_paca();
- unsigned long tmp;
- int token = smp_processor_id() + 1;
-
- /*
- * Besides being unnecessary in the absence of SMT, this
- * check prevents trying to do lbarx/stbcx. on e5500 which
- * doesn't implement either feature.
- */
- if (!cpu_has_feature(CPU_FTR_SMT))
- return;
-
- asm volatile("1: lbarx %0, 0, %1;"
- "cmpwi %0, 0;"
- "bne 2f;"
- "stbcx. %2, 0, %1;"
- "bne 1b;"
- "b 3f;"
- "2: lbzx %0, 0, %1;"
- "cmpwi %0, 0;"
- "bne 2b;"
- "b 1b;"
- "3:"
- : "=&r" (tmp)
- : "r" (&paca->tcd_ptr->lock), "r" (token)
- : "memory");
-}
-
-static inline void book3e_tlb_unlock(void)
-{
- struct paca_struct *paca = get_paca();
-
- if (!cpu_has_feature(CPU_FTR_SMT))
- return;
-
- isync();
- paca->tcd_ptr->lock = 0;
-}
-#else
-static inline int tlb1_next(void)
-{
- int index, ncams;
-
- ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-
- index = this_cpu_read(next_tlbcam_idx);
-
- /* Just round-robin the entries and wrap when we hit the end */
- if (unlikely(index == ncams - 1))
- __this_cpu_write(next_tlbcam_idx, tlbcam_index);
- else
- __this_cpu_inc(next_tlbcam_idx);
-
- return index;
-}
-
-static inline void book3e_tlb_lock(void)
-{
-}
-
-static inline void book3e_tlb_unlock(void)
-{
-}
-#endif
-
-static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
-{
- int found = 0;
-
- mtspr(SPRN_MAS6, pid << 16);
- if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
- asm volatile(
- "li %0,0\n"
- "tlbsx. 0,%1\n"
- "bne 1f\n"
- "li %0,1\n"
- "1:\n"
- : "=&r"(found) : "r"(ea));
- } else {
- asm volatile(
- "tlbsx 0,%1\n"
- "mfspr %0,0x271\n"
- "srwi %0,%0,31\n"
- : "=&r"(found) : "r"(ea));
- }
-
- return found;
-}
-
-static void
-book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
-{
- unsigned long mas1, mas2;
- u64 mas7_3;
- unsigned long psize, tsize, shift;
- unsigned long flags;
- struct mm_struct *mm;
- int index;
-
- if (unlikely(is_kernel_addr(ea)))
- return;
-
- mm = vma->vm_mm;
-
- psize = vma_mmu_pagesize(vma);
- shift = __ilog2(psize);
- tsize = shift - 10;
- /*
- * We can't be interrupted while we're setting up the MAS
- * regusters or after we've confirmed that no tlb exists.
- */
- local_irq_save(flags);
-
- book3e_tlb_lock();
-
- if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
- book3e_tlb_unlock();
- local_irq_restore(flags);
- return;
- }
-
- /* We have to use the CAM(TLB1) on FSL parts for hugepages */
- index = tlb1_next();
- mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-
- mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
- mas2 = ea & ~((1UL << shift) - 1);
- mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
- mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
- mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
- if (!pte_dirty(pte))
- mas7_3 &= ~(MAS3_SW|MAS3_UW);
-
- mtspr(SPRN_MAS1, mas1);
- mtspr(SPRN_MAS2, mas2);
-
- if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
- mtspr(SPRN_MAS7_MAS3, mas7_3);
- } else {
- if (mmu_has_feature(MMU_FTR_BIG_PHYS))
- mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
- mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
- }
-
- asm volatile ("tlbwe");
-
- book3e_tlb_unlock();
- local_irq_restore(flags);
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- *
- * This must always be called with the pte lock held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
-{
- if (is_vm_hugetlb_page(vma))
- book3e_hugetlb_preload(vma, address, *ptep);
-}
-
-void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
- struct hstate *hstate = hstate_file(vma->vm_file);
- unsigned long tsize = huge_page_shift(hstate) - 10;
-
- __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
-}
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 77884e24281d..b80fc4a91a53 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -10,6 +10,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/dma.h>
+#include <asm/code-patching.h>
#include <mm/mmu_decl.h>
@@ -95,8 +96,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
pgdp = pgd_offset_k(ea);
p4dp = p4d_offset(pgdp, ea);
if (p4d_none(*p4dp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- p4d_populate(&init_mm, p4dp, pmdp);
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, ea);
if (pud_none(*pudp)) {
@@ -105,7 +106,7 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
}
pmdp = pmd_offset(pudp, ea);
if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
+ ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, ea);
@@ -115,3 +116,17 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
smp_wmb();
return 0;
}
+
+void __patch_exception(int exc, unsigned long addr)
+{
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /*
+ * Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one.
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c
new file mode 100644
index 000000000000..40a4e69ae1a9
--- /dev/null
+++ b/arch/powerpc/mm/nohash/e500.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004,2010 Freescale Semiconductor, Inc.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ * -- paulus
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned int tlbcam_index;
+
+struct tlbcam TLBCAM[NUM_TLBCAMS];
+
+static struct {
+ unsigned long start;
+ unsigned long limit;
+ phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+#ifdef CONFIG_PPC_85xx
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+ int b;
+ for (b = 0; b < tlbcam_index; ++b)
+ if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+ return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+ return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ int b;
+ for (b = 0; b < tlbcam_index; ++b)
+ if (pa >= tlbcam_addrs[b].phys
+ && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+ +tlbcam_addrs[b].phys)
+ return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+ return 0;
+}
+#endif
+
+/*
+ * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
+ * in particular size must be a power of 4 between 4k and the max supported by
+ * an implementation; max may further be limited by what can be represented in
+ * an unsigned long (for example, 32-bit implementations cannot support a 4GB
+ * size).
+ */
+static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+ unsigned long size, unsigned long flags, unsigned int pid)
+{
+ unsigned int tsize;
+
+ tsize = __ilog2(size) - 10;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+ if ((flags & _PAGE_NO_CACHE) == 0)
+ flags |= _PAGE_COHERENT;
+#endif
+
+ TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+ TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+ TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+ TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+ TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+ TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+ TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+ TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+ TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0;
+ if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+ TLBCAM[index].MAS7 = (u64)phys >> 32;
+
+ /* Below is unlikely -- only for large user pages or similar */
+ if (pte_user(__pte(flags))) {
+ TLBCAM[index].MAS3 |= MAS3_UR;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0;
+ } else {
+ TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0;
+ }
+
+ tlbcam_addrs[index].start = virt;
+ tlbcam_addrs[index].limit = virt + size - 1;
+ tlbcam_addrs[index].phys = phys;
+}
+
+static unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
+ phys_addr_t phys)
+{
+ unsigned int camsize = __ilog2(ram);
+ unsigned int align = __ffs(virt | phys);
+ unsigned long max_cam;
+
+ if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+ /* Convert (4^max) kB to (2^max) bytes */
+ max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
+ camsize &= ~1U;
+ align &= ~1U;
+ } else {
+ /* Convert (2^max) kB to (2^max) bytes */
+ max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
+ }
+
+ if (camsize > align)
+ camsize = align;
+ if (camsize > max_cam)
+ camsize = max_cam;
+
+ return 1UL << camsize;
+}
+
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+ unsigned long ram, int max_cam_idx,
+ bool dryrun, bool init)
+{
+ int i;
+ unsigned long amount_mapped = 0;
+ unsigned long boundary;
+
+ if (strict_kernel_rwx_enabled())
+ boundary = (unsigned long)(_sinittext - _stext);
+ else
+ boundary = ram;
+
+ /* Calculate CAM values */
+ for (i = 0; boundary && i < max_cam_idx; i++) {
+ unsigned long cam_sz;
+ pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL_ROX;
+
+ cam_sz = calc_cam_sz(boundary, virt, phys);
+ if (!dryrun)
+ settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
+
+ boundary -= cam_sz;
+ amount_mapped += cam_sz;
+ virt += cam_sz;
+ phys += cam_sz;
+ }
+ for (ram -= amount_mapped; ram && i < max_cam_idx; i++) {
+ unsigned long cam_sz;
+ pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL;
+
+ cam_sz = calc_cam_sz(ram, virt, phys);
+ if (!dryrun)
+ settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
+
+ ram -= cam_sz;
+ amount_mapped += cam_sz;
+ virt += cam_sz;
+ phys += cam_sz;
+ }
+
+ if (dryrun)
+ return amount_mapped;
+
+ if (init) {
+ loadcam_multi(0, i, max_cam_idx);
+ tlbcam_index = i;
+ } else {
+ loadcam_multi(0, i, 0);
+ WARN_ON(i > tlbcam_index);
+ }
+
+#ifdef CONFIG_PPC64
+ get_paca()->tcd.esel_next = i;
+ get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+ get_paca()->tcd.esel_first = i;
+#endif
+
+ return amount_mapped;
+}
+
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init)
+{
+ unsigned long virt = PAGE_OFFSET;
+ phys_addr_t phys = memstart_addr;
+
+ return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun, init);
+}
+
+#ifdef CONFIG_PPC32
+
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
+}
+
+void flush_instruction_cache(void)
+{
+ unsigned long tmp;
+
+ tmp = mfspr(SPRN_L1CSR1);
+ tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+ mtspr(SPRN_L1CSR1, tmp);
+ isync();
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+ flush_instruction_cache();
+}
+
+static unsigned long __init tlbcam_sz(int idx)
+{
+ return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
+void __init adjust_total_lowmem(void)
+{
+ unsigned long ram;
+ int i;
+
+ /* adjust lowmem size to __max_low_memory */
+ ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
+
+ i = switch_to_as1();
+ __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false, true);
+ restore_to_as0(i, 0, NULL, 1);
+
+ pr_info("Memory CAM mapping: ");
+ for (i = 0; i < tlbcam_index - 1; i++)
+ pr_cont("%lu/", tlbcam_sz(i) >> 20);
+ pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
+ (unsigned int)((total_lowmem - __max_low_memory) >> 20));
+
+ memblock_set_current_limit(memstart_addr + __max_low_memory);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mmu_mark_rodata_ro(void)
+{
+ unsigned long remapped;
+
+ remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false);
+
+ WARN_ON(__max_low_memory != remapped);
+}
+#endif
+
+void mmu_mark_initmem_nx(void)
+{
+ /* Everything is done in mmu_mark_rodata_ro() */
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ phys_addr_t limit = first_memblock_base + first_memblock_size;
+
+ /* 64M mapped initially according to head_fsl_booke.S */
+ memblock_set_current_limit(min_t(u64, limit, 0x04000000));
+}
+
+#ifdef CONFIG_RELOCATABLE
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
+{
+ unsigned long base = kernstart_virt_addr;
+ phys_addr_t size;
+
+ kernstart_addr = start;
+ if (is_second_reloc) {
+ virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ kaslr_late_init();
+ return;
+ }
+
+ /*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries. Before we get the real memstart_addr,
+ * We will compute the virt_phys_offset like this:
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0x3ffffff) +
+ * (kernstart_addr & 0x3ffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+ * (kernstart_addr & ~0x3ffffff)
+ *
+ */
+ start &= ~0x3ffffff;
+ base &= ~0x3ffffff;
+ virt_phys_offset = base - start;
+ early_get_first_memblock_info(__va(dt_ptr), &size);
+ /*
+ * We now get the memstart_addr, then we should check if this
+ * address is the same as what the PAGE_OFFSET map to now. If
+ * not we have to change the map of PAGE_OFFSET to memstart_addr
+ * and do a second relocation.
+ */
+ if (start != memstart_addr) {
+ int n;
+ long offset = start - memstart_addr;
+
+ is_second_reloc = 1;
+ n = switch_to_as1();
+ /* map a 64M area for the second relocation */
+ if (memstart_addr > start)
+ map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
+ false, true);
+ else
+ map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
+ 0x4000000, CONFIG_LOWMEM_CAM_NUM,
+ false, true);
+ restore_to_as0(n, offset, __va(dt_ptr), 1);
+ /* We should never reach here */
+ panic("Relocation error");
+ }
+
+ kaslr_early_init(__va(dt_ptr), size);
+}
+#endif
+#endif
diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
new file mode 100644
index 000000000000..c7d4b317a823
--- /dev/null
+++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC Huge TLB Page Support for Book3E MMU
+ *
+ * Copyright (C) 2009 David Gibson, IBM Corporation.
+ * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
+ *
+ */
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
+static inline int tlb1_next(void)
+{
+ struct paca_struct *paca = get_paca();
+ struct tlb_core_data *tcd;
+ int this, next;
+
+ tcd = paca->tcd_ptr;
+ this = tcd->esel_next;
+
+ next = this + 1;
+ if (next >= tcd->esel_max)
+ next = tcd->esel_first;
+
+ tcd->esel_next = next;
+ return this;
+}
+
+static inline void book3e_tlb_lock(void)
+{
+ struct paca_struct *paca = get_paca();
+ unsigned long tmp;
+ int token = smp_processor_id() + 1;
+
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
+ asm volatile("1: lbarx %0, 0, %1;"
+ "cmpwi %0, 0;"
+ "bne 2f;"
+ "stbcx. %2, 0, %1;"
+ "bne 1b;"
+ "b 3f;"
+ "2: lbzx %0, 0, %1;"
+ "cmpwi %0, 0;"
+ "bne 2b;"
+ "b 1b;"
+ "3:"
+ : "=&r" (tmp)
+ : "r" (&paca->tcd_ptr->lock), "r" (token)
+ : "memory");
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+ struct paca_struct *paca = get_paca();
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
+ isync();
+ paca->tcd_ptr->lock = 0;
+}
+#else
+static inline int tlb1_next(void)
+{
+ int index, ncams;
+
+ ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ index = this_cpu_read(next_tlbcam_idx);
+
+ /* Just round-robin the entries and wrap when we hit the end */
+ if (unlikely(index == ncams - 1))
+ __this_cpu_write(next_tlbcam_idx, tlbcam_index);
+ else
+ __this_cpu_inc(next_tlbcam_idx);
+
+ return index;
+}
+
+static inline void book3e_tlb_lock(void)
+{
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+}
+#endif
+
+static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
+{
+ int found = 0;
+
+ mtspr(SPRN_MAS6, pid << 16);
+ asm volatile(
+ "tlbsx 0,%1\n"
+ "mfspr %0,0x271\n"
+ "srwi %0,%0,31\n"
+ : "=&r"(found) : "r"(ea));
+
+ return found;
+}
+
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
+{
+ unsigned long mas1, mas2;
+ u64 mas7_3;
+ unsigned long psize, tsize, shift;
+ unsigned long flags;
+ struct mm_struct *mm;
+ int index;
+
+ if (unlikely(is_kernel_addr(ea)))
+ return;
+
+ mm = vma->vm_mm;
+
+ psize = vma_mmu_pagesize(vma);
+ shift = __ilog2(psize);
+ tsize = shift - 10;
+ /*
+ * We can't be interrupted while we're setting up the MAS
+ * registers or after we've confirmed that no tlb exists.
+ */
+ local_irq_save(flags);
+
+ book3e_tlb_lock();
+
+ if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
+ book3e_tlb_unlock();
+ local_irq_restore(flags);
+ return;
+ }
+
+ /* We have to use the CAM(TLB1) on FSL parts for hugepages */
+ index = tlb1_next();
+ mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
+
+ mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
+ mas2 = ea & ~((1UL << shift) - 1);
+ mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+ mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
+ mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
+ if (!pte_dirty(pte))
+ mas7_3 &= ~(MAS3_SW|MAS3_UW);
+
+ mtspr(SPRN_MAS1, mas1);
+ mtspr(SPRN_MAS2, mas2);
+
+ if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+ mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+ mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
+
+ asm volatile ("tlbwe");
+
+ book3e_tlb_unlock();
+ local_irq_restore(flags);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ if (is_vm_hugetlb_page(vma))
+ book3e_hugetlb_preload(vma, address, *ptep);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ struct hstate *hstate = hstate_file(vma->vm_file);
+ unsigned long tsize = huge_page_shift(hstate) - 10;
+
+ __flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
+}
diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c
deleted file mode 100644
index b231a54f540c..000000000000
--- a/arch/powerpc/mm/nohash/fsl_book3e.c
+++ /dev/null
@@ -1,379 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
- * E500 Book E processors.
- *
- * Copyright 2004,2010 Freescale Semiconductor, Inc.
- *
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-#include <asm/paca.h>
-
-#include <mm/mmu_decl.h>
-
-unsigned int tlbcam_index;
-
-#define NUM_TLBCAMS (64)
-struct tlbcam TLBCAM[NUM_TLBCAMS];
-
-struct tlbcamrange {
- unsigned long start;
- unsigned long limit;
- phys_addr_t phys;
-} tlbcam_addrs[NUM_TLBCAMS];
-
-unsigned long tlbcam_sz(int idx)
-{
- return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
-}
-
-#ifdef CONFIG_FSL_BOOKE
-/*
- * Return PA for this VA if it is mapped by a CAM, or 0
- */
-phys_addr_t v_block_mapped(unsigned long va)
-{
- int b;
- for (b = 0; b < tlbcam_index; ++b)
- if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
- return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
- return 0;
-}
-
-/*
- * Return VA for a given PA or 0 if not mapped
- */
-unsigned long p_block_mapped(phys_addr_t pa)
-{
- int b;
- for (b = 0; b < tlbcam_index; ++b)
- if (pa >= tlbcam_addrs[b].phys
- && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
- +tlbcam_addrs[b].phys)
- return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
- return 0;
-}
-#endif
-
-/*
- * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
- * in particular size must be a power of 4 between 4k and the max supported by
- * an implementation; max may further be limited by what can be represented in
- * an unsigned long (for example, 32-bit implementations cannot support a 4GB
- * size).
- */
-static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
- unsigned long size, unsigned long flags, unsigned int pid)
-{
- unsigned int tsize;
-
- tsize = __ilog2(size) - 10;
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
- if ((flags & _PAGE_NO_CACHE) == 0)
- flags |= _PAGE_COHERENT;
-#endif
-
- TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
- TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
- TLBCAM[index].MAS2 = virt & PAGE_MASK;
-
- TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
- TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
- TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
- TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
- TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
-
- TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR;
- TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0;
- if (mmu_has_feature(MMU_FTR_BIG_PHYS))
- TLBCAM[index].MAS7 = (u64)phys >> 32;
-
- /* Below is unlikely -- only for large user pages or similar */
- if (pte_user(__pte(flags))) {
- TLBCAM[index].MAS3 |= MAS3_UR;
- TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0;
- TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0;
- } else {
- TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0;
- }
-
- tlbcam_addrs[index].start = virt;
- tlbcam_addrs[index].limit = virt + size - 1;
- tlbcam_addrs[index].phys = phys;
-}
-
-unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
- phys_addr_t phys)
-{
- unsigned int camsize = __ilog2(ram);
- unsigned int align = __ffs(virt | phys);
- unsigned long max_cam;
-
- if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
- /* Convert (4^max) kB to (2^max) bytes */
- max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
- camsize &= ~1U;
- align &= ~1U;
- } else {
- /* Convert (2^max) kB to (2^max) bytes */
- max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
- }
-
- if (camsize > align)
- camsize = align;
- if (camsize > max_cam)
- camsize = max_cam;
-
- return 1UL << camsize;
-}
-
-static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
- unsigned long ram, int max_cam_idx,
- bool dryrun, bool init)
-{
- int i;
- unsigned long amount_mapped = 0;
- unsigned long boundary;
-
- if (strict_kernel_rwx_enabled())
- boundary = (unsigned long)(_sinittext - _stext);
- else
- boundary = ram;
-
- /* Calculate CAM values */
- for (i = 0; boundary && i < max_cam_idx; i++) {
- unsigned long cam_sz;
- pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL_ROX;
-
- cam_sz = calc_cam_sz(boundary, virt, phys);
- if (!dryrun)
- settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
-
- boundary -= cam_sz;
- amount_mapped += cam_sz;
- virt += cam_sz;
- phys += cam_sz;
- }
- for (ram -= amount_mapped; ram && i < max_cam_idx; i++) {
- unsigned long cam_sz;
- pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL;
-
- cam_sz = calc_cam_sz(ram, virt, phys);
- if (!dryrun)
- settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
-
- ram -= cam_sz;
- amount_mapped += cam_sz;
- virt += cam_sz;
- phys += cam_sz;
- }
-
- if (dryrun)
- return amount_mapped;
-
- if (init) {
- loadcam_multi(0, i, max_cam_idx);
- tlbcam_index = i;
- } else {
- loadcam_multi(0, i, 0);
- WARN_ON(i > tlbcam_index);
- }
-
-#ifdef CONFIG_PPC64
- get_paca()->tcd.esel_next = i;
- get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
- get_paca()->tcd.esel_first = i;
-#endif
-
- return amount_mapped;
-}
-
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init)
-{
- unsigned long virt = PAGE_OFFSET;
- phys_addr_t phys = memstart_addr;
-
- return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun, init);
-}
-
-#ifdef CONFIG_PPC32
-
-#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
-#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
-#endif
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
- return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
-}
-
-void flush_instruction_cache(void)
-{
- unsigned long tmp;
-
- tmp = mfspr(SPRN_L1CSR1);
- tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
- mtspr(SPRN_L1CSR1, tmp);
- isync();
-}
-
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
- flush_instruction_cache();
-}
-
-void __init adjust_total_lowmem(void)
-{
- unsigned long ram;
- int i;
-
- /* adjust lowmem size to __max_low_memory */
- ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
-
- i = switch_to_as1();
- __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false, true);
- restore_to_as0(i, 0, 0, 1);
-
- pr_info("Memory CAM mapping: ");
- for (i = 0; i < tlbcam_index - 1; i++)
- pr_cont("%lu/", tlbcam_sz(i) >> 20);
- pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
- (unsigned int)((total_lowmem - __max_low_memory) >> 20));
-
- memblock_set_current_limit(memstart_addr + __max_low_memory);
-}
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void mmu_mark_rodata_ro(void)
-{
- /* Everything is done in mmu_mark_initmem_nx() */
-}
-#endif
-
-void mmu_mark_initmem_nx(void)
-{
- unsigned long remapped;
-
- if (!strict_kernel_rwx_enabled())
- return;
-
- remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false);
-
- WARN_ON(__max_low_memory != remapped);
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- phys_addr_t limit = first_memblock_base + first_memblock_size;
-
- /* 64M mapped initially according to head_fsl_booke.S */
- memblock_set_current_limit(min_t(u64, limit, 0x04000000));
-}
-
-#ifdef CONFIG_RELOCATABLE
-int __initdata is_second_reloc;
-notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
-{
- unsigned long base = kernstart_virt_addr;
- phys_addr_t size;
-
- kernstart_addr = start;
- if (is_second_reloc) {
- virt_phys_offset = PAGE_OFFSET - memstart_addr;
- kaslr_late_init();
- return;
- }
-
- /*
- * Relocatable kernel support based on processing of dynamic
- * relocation entries. Before we get the real memstart_addr,
- * We will compute the virt_phys_offset like this:
- * virt_phys_offset = stext.run - kernstart_addr
- *
- * stext.run = (KERNELBASE & ~0x3ffffff) +
- * (kernstart_addr & 0x3ffffff)
- * When we relocate, we have :
- *
- * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
- *
- * hence:
- * virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
- * (kernstart_addr & ~0x3ffffff)
- *
- */
- start &= ~0x3ffffff;
- base &= ~0x3ffffff;
- virt_phys_offset = base - start;
- early_get_first_memblock_info(__va(dt_ptr), &size);
- /*
- * We now get the memstart_addr, then we should check if this
- * address is the same as what the PAGE_OFFSET map to now. If
- * not we have to change the map of PAGE_OFFSET to memstart_addr
- * and do a second relocation.
- */
- if (start != memstart_addr) {
- int n;
- long offset = start - memstart_addr;
-
- is_second_reloc = 1;
- n = switch_to_as1();
- /* map a 64M area for the second relocation */
- if (memstart_addr > start)
- map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
- false, true);
- else
- map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
- 0x4000000, CONFIG_LOWMEM_CAM_NUM,
- false, true);
- restore_to_as0(n, offset, __va(dt_ptr), 1);
- /* We should never reach here */
- panic("Relocation error");
- }
-
- kaslr_early_init(__va(dt_ptr), size);
-}
-#endif
-#endif
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 6ec978967da0..0d04f9d5da8d 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -14,11 +14,11 @@
#include <linux/memblock.h>
#include <linux/libfdt.h>
#include <linux/crash_core.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <asm/cacheflush.h>
-#include <asm/prom.h>
#include <asm/kdump.h>
#include <mm/mmu_decl.h>
-#include <generated/compile.h>
#include <generated/utsrelease.h>
struct regions {
@@ -36,17 +36,11 @@ struct regions {
int reserved_mem_size_cells;
};
-/* Simplified build-specific string for starting entropy. */
-static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-
struct regions __initdata regions;
static __init void kaslr_get_cmdline(void *fdt)
{
- int node = fdt_path_offset(fdt, "/chosen");
-
- early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+ early_init_dt_scan_chosen(boot_command_line);
}
static unsigned long __init rotate_xor(unsigned long hash, const void *area,
@@ -72,7 +66,8 @@ static unsigned long __init get_boot_seed(void *fdt)
{
unsigned long hash = 0;
- hash = rotate_xor(hash, build_str, sizeof(build_str));
+ /* build-specific string for starting entropy. */
+ hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
return hash;
@@ -317,7 +312,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size
ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
linear_sz = min_t(unsigned long, ram, SZ_512M);
- /* If the linear size is smaller than 64M, do not randmize */
+ /* If the linear size is smaller than 64M, do not randomize */
if (linear_sz < SZ_64M)
return 0;
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
new file mode 100644
index 000000000000..552becf90e97
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing kernel userspace protection
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_PPC_KUAP
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
+void setup_kuap(bool disabled)
+{
+ if (disabled) {
+ if (IS_ENABLED(CONFIG_40x))
+ disable_kuep = true;
+ if (smp_processor_id() == boot_cpuid)
+ static_branch_enable(&disable_kuap_key);
+ return;
+ }
+
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ __prevent_user_access(KUAP_READ_WRITE);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
index 44b2b5e7cabe..ccd5819b1bd9 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -33,6 +33,7 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/smp.h>
+#include <asm/kup.h>
#include <mm/mmu_decl.h>
@@ -217,7 +218,7 @@ static void set_context(unsigned long id, pgd_t *pgd)
/* sync */
mb();
- } else {
+ } else if (kuap_is_disabled()) {
if (IS_ENABLED(CONFIG_40x))
mb(); /* sync */
@@ -305,6 +306,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
if (IS_ENABLED(CONFIG_BDI_SWITCH))
abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = id;
+#endif
raw_spin_unlock(&context_lock);
}
@@ -313,15 +317,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
*/
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
- /*
- * We have MMU_NO_CONTEXT set to be ~0. Hence check
- * explicitly against context.id == 0. This ensures that we properly
- * initialize context slice details for newly allocated mm's (which will
- * have id == 0) and don't alter context slice inherited via fork (which
- * will have id != 0).
- */
- if (mm->context.id == 0)
- slice_init_new_context_exec(mm);
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
pte_frag_set(&mm->context, NULL);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 647bf454a0fa..2c15c86c7015 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -49,8 +49,7 @@
* other sizes not listed here. The .ind field is only used on MMUs that have
* indirect page table entries.
*/
-#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx)
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,
@@ -81,7 +80,20 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
.enc = BOOK3E_PAGESZ_1GB,
},
};
-#elif defined(CONFIG_PPC_8xx)
+
+static inline int mmu_get_tsize(int psize)
+{
+ return mmu_psize_defs[psize].enc;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+ /* This isn't used on !Book3E for now */
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC_8xx
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,
@@ -96,53 +108,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
.shift = 23,
},
};
-#else
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
- [MMU_PAGE_4K] = {
- .shift = 12,
- .ind = 20,
- .enc = BOOK3E_PAGESZ_4K,
- },
- [MMU_PAGE_16K] = {
- .shift = 14,
- .enc = BOOK3E_PAGESZ_16K,
- },
- [MMU_PAGE_64K] = {
- .shift = 16,
- .ind = 28,
- .enc = BOOK3E_PAGESZ_64K,
- },
- [MMU_PAGE_1M] = {
- .shift = 20,
- .enc = BOOK3E_PAGESZ_1M,
- },
- [MMU_PAGE_16M] = {
- .shift = 24,
- .ind = 36,
- .enc = BOOK3E_PAGESZ_16M,
- },
- [MMU_PAGE_256M] = {
- .shift = 28,
- .enc = BOOK3E_PAGESZ_256M,
- },
- [MMU_PAGE_1G] = {
- .shift = 30,
- .enc = BOOK3E_PAGESZ_1GB,
- },
-};
-#endif /* CONFIG_FSL_BOOKE */
-
-static inline int mmu_get_tsize(int psize)
-{
- return mmu_psize_defs[psize].enc;
-}
-#else
-static inline int mmu_get_tsize(int psize)
-{
- /* This isn't used on !Book3E for now */
- return 0;
-}
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif
/* The variables below are currently only used on 64-bit Book3E
* though this will probably be made common with other nohash
@@ -150,7 +116,6 @@ static inline int mmu_get_tsize(int psize)
*/
#ifdef CONFIG_PPC64
-int mmu_linear_psize; /* Page size used for the linear mapping */
int mmu_pte_psize; /* Page size used for PTE pages */
int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
@@ -167,7 +132,7 @@ int extlb_level_exc;
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
DEFINE_PER_CPU(int, next_tlbcam_idx);
EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
@@ -359,6 +324,7 @@ void __init early_init_mmu_47x(void)
/*
* Flush kernel TLB entries in the given range
*/
+#ifndef CONFIG_PPC_8xx
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
#ifdef CONFIG_SMP
@@ -371,6 +337,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
#endif
}
EXPORT_SYMBOL(flush_tlb_kernel_range);
+#endif
/*
* Currently, for range flushing, we just do a full mm flush. This should
@@ -433,14 +400,14 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
}
}
-static void setup_page_sizes(void)
+static void __init setup_page_sizes(void)
{
unsigned int tlb0cfg;
unsigned int tlb0ps;
unsigned int eptcfg;
int i, psize;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
unsigned int mmucfg = mfspr(SPRN_MMUCFG);
int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
@@ -571,7 +538,7 @@ out:
}
}
-static void setup_mmu_htw(void)
+static void __init setup_mmu_htw(void)
{
/*
* If we want to use HW tablewalk, enable it by patching the TLB miss
@@ -583,7 +550,7 @@ static void setup_mmu_htw(void)
patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
break;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
case PPC_HTW_E6500:
extlb_level_exc = EX_TLB_SIZE;
patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
@@ -626,7 +593,7 @@ static void early_init_this_mmu(void)
}
mtspr(SPRN_MAS4, mas4);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
bool map = true;
@@ -657,14 +624,6 @@ static void early_init_this_mmu(void)
static void __init early_init_mmu_global(void)
{
- /* XXX This will have to be decided at runtime, but right
- * now our boot and TLB miss code hard wires it. Ideally
- * we should find out a suitable page size and patch the
- * TLB miss code (either that or use the PACA to store
- * the value we want)
- */
- mmu_linear_psize = MMU_PAGE_1G;
-
/* XXX This should be decided at runtime based on supported
* page sizes in the TLB, but for now let's assume 16M is
* always there and a good fit (which it probably is)
@@ -687,7 +646,7 @@ static void __init early_init_mmu_global(void)
/* Look for HW tablewalk support */
setup_mmu_htw();
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
if (book3e_htw_mode == PPC_HTW_NONE) {
extlb_level_exc = EX_TLB_SIZE;
@@ -708,7 +667,7 @@ static void __init early_init_mmu_global(void)
static void __init early_mmu_set_memory_limit(void)
{
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
/*
* Limit memory so we dont have linear faults.
@@ -757,7 +716,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
* We crop it to the size of the first MEMBLOCK to
* avoid going over total available memory just in case...
*/
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned long linear_sz;
unsigned int num_cams;
@@ -782,9 +741,5 @@ void __init early_init_mmu(void)
#ifdef CONFIG_PPC_47x
early_init_mmu_47x();
#endif
-
-#ifdef CONFIG_PPC_MM_SLICES
- mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
-#endif
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index dd39074de9af..e1199608ff4d 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -186,7 +186,7 @@ _GLOBAL(_tlbivax_bcast)
isync
PPC_TLBIVAX(0, R3)
isync
- eieio
+ mbar
tlbsync
BEGIN_FTR_SECTION
b 1f
@@ -221,7 +221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
blr
#endif /* CONFIG_PPC_47x */
-#elif defined(CONFIG_FSL_BOOKE)
+#elif defined(CONFIG_PPC_85xx)
/*
* FSL BookE implementations.
*
@@ -294,7 +294,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
isync
1: wrtee r10
blr
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
/*
* New Book3E (>= 2.06) implementation
*
@@ -355,7 +355,7 @@ _GLOBAL(_tlbivax_bcast)
rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
PPC_TLBIVAX(0,R3)
- eieio
+ mbar
tlbsync
sync
wrtee r10
@@ -364,7 +364,7 @@ _GLOBAL(_tlbivax_bcast)
#error Unsupported processor type !
#endif
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
/*
* extern void loadcam_entry(unsigned int index)
*
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 9235e720e357..76cf456d7976 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -61,7 +61,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
ld r14,PACAPGD(r13)
std r15,EX_TLB_R15(r12)
std r10,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1
andi. r10,r11,MSR_PR
@@ -70,14 +69,11 @@ START_BTB_FLUSH_SECTION
1:
END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
-#endif
.endm
.macro tlb_epilog_bolted
ld r14,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
ld r7,EX_TLB_R7(r12)
-#endif
ld r10,EX_TLB_R10(r12)
ld r11,EX_TLB_R11(r12)
ld r13,EX_TLB_R13(r12)
@@ -128,6 +124,13 @@ END_BTB_FLUSH_SECTION
bne tlb_miss_kernel_bolted
+tlb_miss_user_bolted:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- tlb_miss_fault_bolted /* KUAP fault */
+#endif
+
tlb_miss_common_bolted:
/*
* This is the guts of the TLB miss handler for bolted-linear.
@@ -145,16 +148,7 @@ tlb_miss_common_bolted:
clrrdi r15,r15,3
beq tlb_miss_fault_bolted /* No PGDIR, bail */
-BEGIN_MMU_FTR_SECTION
- /* Set the TLB reservation and search for existing entry. Then load
- * the entry.
- */
- PPC_TLBSRX_DOT(0,R16)
- ldx r14,r14,r15 /* grab pgd entry */
- beq tlb_miss_done_bolted /* tlb exists already, bail */
-MMU_FTR_SECTION_ELSE
ldx r14,r14,r15 /* grab pgd entry */
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
@@ -215,10 +209,11 @@ itlb_miss_kernel_bolted:
tlb_miss_kernel_bolted:
mfspr r10,SPRN_MAS1
ld r14,PACA_KERNELPGD(r13)
- cmpldi cr0,r15,8 /* Check for vmalloc region */
+ srdi r15,r16,44 /* get kernel region */
+ andi. r15,r15,1 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
- beq+ tlb_miss_common_bolted
+ bne+ tlb_miss_common_bolted
tlb_miss_fault_bolted:
/* We need to check if it was an instruction miss */
@@ -246,10 +241,9 @@ itlb_miss_fault_bolted:
cmpldi cr0,r15,0 /* Check for user region */
oris r11,r11,_PAGE_ACCESSED@h
- beq tlb_miss_common_bolted
+ beq tlb_miss_user_bolted
b itlb_miss_kernel_bolted
-#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* TLB miss handling for e6500 and derivatives, using hardware tablewalk.
*
@@ -500,7 +494,9 @@ tlb_miss_huge_e6500:
tlb_miss_kernel_e6500:
ld r14,PACA_KERNELPGD(r13)
- cmpldi cr1,r15,8 /* Check for vmalloc region */
+ srdi r15,r16,44 /* get kernel region */
+ xoris r15,r15,0xc /* Check for vmalloc region */
+ cmplwi cr1,r15,1
beq+ cr1,tlb_miss_common_e6500
tlb_miss_fault_e6500:
@@ -514,7 +510,6 @@ dtlb_miss_fault_e6500:
itlb_miss_fault_e6500:
tlb_epilog_bolted
b exc_instruction_storage_book3e
-#endif /* CONFIG_PPC_FSL_BOOK3E */
/**********************************************************************
* *
@@ -534,16 +529,18 @@ itlb_miss_fault_e6500:
*/
mfspr r14,SPRN_ESR
mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r15,r16,60 /* get region */
- cmpldi cr0,r15,0xc /* linear mapping ? */
+ srdi r15,r16,44 /* get region */
+ xoris r15,r15,0xc
+ cmpldi cr0,r15,0 /* linear mapping ? */
beq tlb_load_linear /* yes -> go to linear map load */
+ cmpldi cr1,r15,1 /* vmalloc mapping ? */
/* The page tables are mapped virtually linear. At this point, though,
* we don't know whether we are trying to fault in a first level
* virtual address or a virtual page table address. We can get that
* from bit 0x1 of the region ID which we have set for a page table
*/
- andi. r10,r15,0x1
+ andis. r10,r15,0x1
bne- virt_page_table_tlb_miss
std r14,EX_TLB_ESR(r12); /* save ESR */
@@ -555,7 +552,7 @@ itlb_miss_fault_e6500:
/* We do the user/kernel test for the PID here along with the RW test
*/
- cmpldi cr0,r15,0 /* Check for user region */
+ srdi. r15,r16,60 /* Check for user region */
/* We pre-test some combination of permissions to avoid double
* faults:
@@ -576,13 +573,12 @@ itlb_miss_fault_e6500:
*/
rlwimi r11,r14,32-19,27,27
rlwimi r11,r14,32-16,19,19
- beq normal_tlb_miss
+ beq normal_tlb_miss_user
/* XXX replace the RMW cycles with immediate loads + writes */
1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r15,8 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
- beq+ normal_tlb_miss
+ beq+ cr1,normal_tlb_miss
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
@@ -608,27 +604,28 @@ itlb_miss_fault_e6500:
*
* Faulting address is SRR0 which is already in r16
*/
- srdi r15,r16,60 /* get region */
- cmpldi cr0,r15,0xc /* linear mapping ? */
+ srdi r15,r16,44 /* get region */
+ xoris r15,r15,0xc
+ cmpldi cr0,r15,0 /* linear mapping ? */
beq tlb_load_linear /* yes -> go to linear map load */
+ cmpldi cr1,r15,1 /* vmalloc mapping ? */
/* We do the user/kernel test for the PID here along with the RW test
*/
li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */
oris r11,r11,_PAGE_ACCESSED@h
- cmpldi cr0,r15,0 /* Check for user region */
+ srdi. r15,r16,60 /* Check for user region */
std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */
- beq normal_tlb_miss
+ beq normal_tlb_miss_user
li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
oris r11,r11,_PAGE_ACCESSED@h
/* XXX replace the RMW cycles with immediate loads + writes */
mfspr r10,SPRN_MAS1
- cmpldi cr0,r15,8 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
- beq+ normal_tlb_miss
+ beq+ cr1,normal_tlb_miss
/* We got a crappy address, just fault */
TLB_MISS_EPILOG_ERROR
@@ -646,6 +643,12 @@ itlb_miss_fault_e6500:
* r11 = PTE permission mask
* r10 = crap (free to use)
*/
+normal_tlb_miss_user:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r14,SPRN_MAS1
+ rlwinm. r14,r14,0,0x3fff0000
+ beq- normal_tlb_miss_access_fault /* KUAP fault */
+#endif
normal_tlb_miss:
/* So we first construct the page table address. We do that by
* shifting the bottom of the address (not the region ID) by
@@ -655,22 +658,14 @@ normal_tlb_miss:
* NOTE: For 64K pages, we do things slightly differently in
* order to handle the weird page table format used by linux
*/
- ori r10,r15,0x1
+ srdi r15,r16,44
+ oris r10,r15,0x1
rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
- sldi r15,r10,60
- clrrdi r14,r14,3
+ sldi r15,r10,44
+ clrrdi r14,r14,19
or r10,r15,r14
-BEGIN_MMU_FTR_SECTION
- /* Set the TLB reservation and search for existing entry. Then load
- * the entry.
- */
- PPC_TLBSRX_DOT(0,R16)
- ld r14,0(r10)
- beq normal_tlb_miss_done
-MMU_FTR_SECTION_ELSE
ld r14,0(r10)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
finish_normal_tlb_miss:
/* Check if required permissions are met */
@@ -689,13 +684,13 @@ finish_normal_tlb_miss:
*
* TODO: mix up code below for better scheduling
*/
- clrrdi r11,r16,12 /* Clear low crap in EA */
- rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
- mtspr SPRN_MAS2,r11
+ clrrdi r10,r16,12 /* Clear low crap in EA */
+ rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
+ mtspr SPRN_MAS2,r10
/* Check page size, if not standard, update MAS1 */
- rldicl r11,r14,64-8,64-8
- cmpldi cr0,r11,BOOK3E_PAGESZ_4K
+ rldicl r10,r14,64-8,64-8
+ cmpldi cr0,r10,BOOK3E_PAGESZ_4K
beq- 1f
mfspr r11,SPRN_MAS1
rlwimi r11,r14,31,21,24
@@ -714,13 +709,9 @@ finish_normal_tlb_miss:
li r11,MAS3_SW|MAS3_UW
andc r15,r15,r11
1:
-BEGIN_MMU_FTR_SECTION
srdi r16,r15,32
mtspr SPRN_MAS3,r15
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r15
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
@@ -772,6 +763,7 @@ normal_tlb_miss_access_fault:
*/
virt_page_table_tlb_miss:
/* Are we hitting a kernel page table ? */
+ srdi r15,r16,60
andi. r10,r15,0x8
/* The cool thing now is that r10 contains 0 for user and 8 for kernel,
@@ -786,19 +778,22 @@ virt_page_table_tlb_miss:
mfspr r10,SPRN_MAS1
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
+#ifdef CONFIG_PPC_KUAP
+ b 2f
1:
-BEGIN_MMU_FTR_SECTION
- /* Search if we already have a TLB entry for that virtual address, and
- * if we do, bail out.
- */
- PPC_TLBSRX_DOT(0,R16)
- beq virt_page_table_tlb_miss_done
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- virt_page_table_tlb_miss_fault /* KUAP fault */
+2:
+#else
+1:
+#endif
/* Now, we need to walk the page tables. First check if we are in
* range.
*/
- rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+ rldicl r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+ cmpldi r10,0x80
bne- virt_page_table_tlb_miss_fault
/* Get the PGD pointer */
@@ -844,41 +839,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
clrldi r11,r15,4 /* remove region ID from RPN */
ori r10,r11,1 /* Or-in SR */
-BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
mtspr SPRN_MAS3,r10
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
-BEGIN_MMU_FTR_SECTION
-virt_page_table_tlb_miss_done:
-
- /* We have overridden MAS2:EPN but currently our primary TLB miss
- * handler will always restore it so that should not be an issue,
- * if we ever optimize the primary handler to not write MAS2 on
- * some cases, we'll have to restore MAS2:EPN here based on the
- * original fault's DEAR. If we do that we have to modify the
- * ITLB miss handler to also store SRR0 in the exception frame
- * as DEAR.
- *
- * However, one nasty thing we did is we cleared the reservation
- * (well, potentially we did). We do a trick here thus if we
- * are not a level 0 exception (we interrupted the TLB miss) we
- * offset the return address by -4 in order to replay the tlbsrx
- * instruction there
- */
- subf r10,r13,r12
- cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE
- bne- 1f
- ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
- addi r10,r11,-4
- std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-1:
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
/* Return to caller, normal case */
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -946,23 +912,24 @@ virt_page_table_tlb_miss_whacko_fault:
*/
mfspr r14,SPRN_ESR
mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r11,r16,60 /* get region */
- cmpldi cr0,r11,0xc /* linear mapping ? */
+ srdi r11,r16,44 /* get region */
+ xoris r11,r11,0xc
+ cmpldi cr0,r11,0 /* linear mapping ? */
beq tlb_load_linear /* yes -> go to linear map load */
+ cmpldi cr1,r11,1 /* vmalloc mapping ? */
/* We do the user/kernel test for the PID here along with the RW test
*/
- cmpldi cr0,r11,0 /* Check for user region */
+ srdi. r11,r16,60 /* Check for user region */
ld r15,PACAPGD(r13) /* Load user pgdir */
beq htw_tlb_miss
/* XXX replace the RMW cycles with immediate loads + writes */
1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r11,8 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
- beq+ htw_tlb_miss
+ beq+ cr1,htw_tlb_miss
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
@@ -988,19 +955,20 @@ virt_page_table_tlb_miss_whacko_fault:
*
* Faulting address is SRR0 which is already in r16
*/
- srdi r11,r16,60 /* get region */
- cmpldi cr0,r11,0xc /* linear mapping ? */
+ srdi r11,r16,44 /* get region */
+ xoris r11,r11,0xc
+ cmpldi cr0,r11,0 /* linear mapping ? */
beq tlb_load_linear /* yes -> go to linear map load */
+ cmpldi cr1,r11,1 /* vmalloc mapping ? */
/* We do the user/kernel test for the PID here along with the RW test
*/
- cmpldi cr0,r11,0 /* Check for user region */
+ srdi. r11,r16,60 /* Check for user region */
ld r15,PACAPGD(r13) /* Load user pgdir */
beq htw_tlb_miss
/* XXX replace the RMW cycles with immediate loads + writes */
1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r11,8 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
@@ -1027,6 +995,11 @@ virt_page_table_tlb_miss_whacko_fault:
* avoid too much complication, it will save/restore things for us
*/
htw_tlb_miss:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- htw_tlb_miss_fault /* KUAP fault */
+#endif
/* Search if we already have a TLB entry for that virtual address, and
* if we do, bail out.
*
@@ -1088,13 +1061,9 @@ htw_tlb_miss:
*/
ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
mtspr SPRN_MAS3,r10
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
@@ -1149,8 +1118,8 @@ tlb_load_linear:
* we only use 1G pages for now. That might have to be changed in a
* final implementation, especially when dealing with hypervisors
*/
- ld r11,PACATOC(r13)
- ld r11,linear_map_top@got(r11)
+ __LOAD_PACA_TOC(r11)
+ LOAD_REG_ADDR_ALTTOC(r11, r11, linear_map_top)
ld r10,0(r11)
tovirt(10,10)
cmpld cr0,r16,r10
@@ -1175,13 +1144,9 @@ tlb_load_linear:
clrldi r10,r10,4 /* clear region bits */
ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
-BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
mtspr SPRN_MAS3,r10
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 59d3cfcd7887..b44ce71917d7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -26,7 +26,6 @@
#include <linux/slab.h>
#include <asm/cputhreads.h>
#include <asm/sparsemem.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/topology.h>
#include <asm/firmware.h>
@@ -134,7 +133,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
return 0;
}
-static void reset_numa_cpu_lookup_table(void)
+static void __init reset_numa_cpu_lookup_table(void)
{
unsigned int cpu;
@@ -372,7 +371,7 @@ void update_numa_distance(struct device_node *node)
* ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
* ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
*/
-static void initialize_form2_numa_distance_lookup_table(void)
+static void __init initialize_form2_numa_distance_lookup_table(void)
{
int i, j;
struct device_node *root;
@@ -581,7 +580,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
return 0;
}
-static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
+static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
int default_nid = NUMA_NO_NODE;
@@ -956,7 +955,9 @@ static int __init parse_numa_properties(void)
of_node_put(cpu);
}
- node_set_online(nid);
+ /* node_set_online() is an UB if 'nid' is negative */
+ if (likely(nid >= 0))
+ node_set_online(nid);
}
get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -1159,6 +1160,9 @@ void __init mem_topology_setup(void)
{
int cpu;
+ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
+
/*
* Linux/mm assumes node 0 to be online at boot. However this is not
* true on PowerPC, where node 0 is similar to any other node, it
@@ -1203,9 +1207,6 @@ void __init initmem_init(void)
{
int nid;
- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- max_pfn = max_low_pfn;
-
memblock_dump_all();
for_each_online_node(nid) {
@@ -1421,43 +1422,26 @@ out:
return rc;
}
-int find_and_online_cpu_nid(int cpu)
+void find_and_update_cpu_nid(int cpu)
{
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
int new_nid;
/* Use associativity from first thread for all siblings */
if (vphn_get_associativity(cpu, associativity))
- return cpu_to_node(cpu);
+ return;
+ /* Do not have previous associativity, so find it now. */
new_nid = associativity_to_nid(associativity);
- if (new_nid < 0 || !node_possible(new_nid))
- new_nid = first_online_node;
- if (NODE_DATA(new_nid) == NULL) {
-#ifdef CONFIG_MEMORY_HOTPLUG
- /*
- * Need to ensure that NODE_DATA is initialized for a node from
- * available memory (see memblock_alloc_try_nid). If unable to
- * init the node, then default to nearest node that has memory
- * installed. Skip onlining a node if the subsystems are not
- * yet initialized.
- */
- if (!topology_inited || try_online_node(new_nid))
- new_nid = first_online_node;
-#else
- /*
- * Default to using the nearest node that has memory installed.
- * Otherwise, it would be necessary to patch the kernel MM code
- * to deal with more memoryless-node error conditions.
- */
+ if (new_nid < 0 || !node_possible(new_nid))
new_nid = first_online_node;
-#endif
- }
+ else
+ // Associate node <-> cpu, so cpu_up() calls
+ // try_online_node() on the right node.
+ set_cpu_numa_node(cpu, new_nid);
- pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
- cpu, new_nid);
- return new_nid;
+ pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
}
int cpu_to_coregroup_id(int cpu)
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
index edea388e9d3f..6163e484bc6d 100644
--- a/arch/powerpc/mm/pageattr.c
+++ b/arch/powerpc/mm/pageattr.c
@@ -15,12 +15,14 @@
#include <asm/pgtable.h>
+static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
+ unsigned long old, unsigned long new)
+{
+ return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0);
+}
+
/*
- * Updates the attributes of a page in three steps:
- *
- * 1. take the page_table_lock
- * 2. install the new entry with the updated attributes
- * 3. flush the TLB
+ * Updates the attributes of a page atomically.
*
* This sequence is safe against concurrent updates, and also allows updating the
* attributes of a page currently being executed or accessed.
@@ -28,41 +30,40 @@
static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
{
long action = (long)data;
- pte_t pte;
- spin_lock(&init_mm.page_table_lock);
-
- pte = ptep_get(ptep);
-
- /* modify the PTE bits as desired, then apply */
+ addr &= PAGE_MASK;
+ /* modify the PTE bits as desired */
switch (action) {
case SET_MEMORY_RO:
- pte = pte_wrprotect(pte);
+ /* Don't clear DIRTY bit */
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
break;
case SET_MEMORY_RW:
- pte = pte_mkwrite(pte_mkdirty(pte));
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
break;
case SET_MEMORY_NX:
- pte = pte_exprotect(pte);
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO);
break;
case SET_MEMORY_X:
- pte = pte_mkexec(pte);
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
+ break;
+ case SET_MEMORY_NP:
+ pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
+ break;
+ case SET_MEMORY_P:
+ pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
break;
default:
WARN_ON_ONCE(1);
break;
}
- pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0);
-
/* See ptesync comment in radix__set_pte_at() */
if (radix_enabled())
asm volatile("ptesync": : :"memory");
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- spin_unlock(&init_mm.page_table_lock);
-
return 0;
}
@@ -96,36 +97,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action)
return apply_to_existing_page_range(&init_mm, start, size,
change_page_attr, (void *)action);
}
-
-/*
- * Set the attributes of a page:
- *
- * This function is used by PPC32 at the end of init to set final kernel memory
- * protection. It includes changing the maping of the page it is executing from
- * and data pages it is using.
- */
-static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
-{
- pgprot_t prot = __pgprot((unsigned long)data);
-
- spin_lock(&init_mm.page_table_lock);
-
- set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-
- spin_unlock(&init_mm.page_table_lock);
-
- return 0;
-}
-
-int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
-{
- unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
- unsigned long sz = numpages * PAGE_SIZE;
-
- if (numpages <= 0)
- return 0;
-
- return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
- (void *)pgprot_val(prot));
-}
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index 97ae4935da79..20652daa1d7e 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -83,7 +83,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
- * the allocated page with single fragement
+ * the allocated page with single fragment
* count.
*/
if (likely(!pte_frag_get(&mm->context))) {
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index ce9482383144..cb2dcdb18f8e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -81,9 +81,6 @@ static struct page *maybe_pte_to_page(pte_t pte)
static pte_t set_pte_filter_hash(pte_t pte)
{
- if (radix_enabled())
- return pte;
-
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
@@ -112,6 +109,9 @@ static inline pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
+ if (radix_enabled())
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return set_pte_filter_hash(pte);
@@ -144,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return pte;
@@ -203,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
__set_pte_at(mm, addr, ptep, pte, 0);
}
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
/*
* This is called when relaxing access to a PTE. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
@@ -339,7 +351,7 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys);
* (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
*
* So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
+ * we can follow the address down to the page and take a ref on it.
* This function need to be called with interrupts disabled. We use this variant
* when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
*/
@@ -460,3 +472,27 @@ out:
return ret_pte;
}
EXPORT_SYMBOL_GPL(__find_linux_pte);
+
+/* Note due to the way vm flags are laid out, the bits are XWR */
+const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_READONLY,
+ [VM_WRITE] = PAGE_COPY,
+ [VM_WRITE | VM_READ] = PAGE_COPY,
+ [VM_EXEC] = PAGE_READONLY_X,
+ [VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_EXEC | VM_WRITE] = PAGE_COPY_X,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_READONLY,
+ [VM_SHARED | VM_WRITE] = PAGE_SHARED,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
+ [VM_SHARED | VM_EXEC] = PAGE_READONLY_X,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X
+};
+
+#ifndef CONFIG_PPC_BOOK3S_64
+DECLARE_VM_GET_PAGE_PROT
+#endif
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 906e4e4328b2..5c02fd08d61e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -135,10 +135,12 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
- if (v_block_mapped((unsigned long)_sinittext))
- mmu_mark_initmem_nx();
- else
- set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
+ mmu_mark_initmem_nx();
+
+ if (!v_block_mapped((unsigned long)_sinittext)) {
+ set_memory_nx((unsigned long)_sinittext, numpages);
+ set_memory_rw((unsigned long)_sinittext, numpages);
+ }
}
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -146,24 +148,24 @@ void mark_rodata_ro(void)
{
unsigned long numpages;
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n");
+
if (v_block_mapped((unsigned long)_stext + 1)) {
mmu_mark_rodata_ro();
ptdump_check_wx();
return;
}
- numpages = PFN_UP((unsigned long)_etext) -
- PFN_DOWN((unsigned long)_stext);
-
- set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
/*
- * mark .rodata as read only. Use __init_begin rather than __end_rodata
- * to cover NOTES and EXCEPTION_TABLE.
+ * mark text and rodata as read only. __end_rodata is set by
+ * powerpc's linker script and includes tables and data
+ * requiring relocation which are not put in RO_DATA.
*/
- numpages = PFN_UP((unsigned long)__init_begin) -
- PFN_DOWN((unsigned long)__start_rodata);
+ numpages = PFN_UP((unsigned long)__end_rodata) -
+ PFN_DOWN((unsigned long)_stext);
- set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
+ set_memory_ro((unsigned long)_stext, numpages);
// mark_initmem_nx() should have already run by now
ptdump_check_wx();
@@ -179,8 +181,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
return;
if (enable)
- set_memory_attr(addr, numpages, PAGE_KERNEL);
+ set_memory_p(addr, numpages);
else
- set_memory_attr(addr, numpages, __pgprot(0));
+ set_memory_np(addr, numpages);
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 78c8cf01db5f..5ac1fd30341b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -32,7 +32,6 @@
#include <linux/hugetlb.h>
#include <asm/page.h>
-#include <asm/prom.h>
#include <asm/mmu_context.h>
#include <asm/mmu.h>
#include <asm/smp.h>
@@ -102,7 +101,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift);
struct page *p4d_page(p4d_t p4d)
{
if (p4d_is_leaf(p4d)) {
- VM_WARN_ON(!p4d_huge(p4d));
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!p4d_huge(p4d));
return pte_page(p4d_pte(p4d));
}
return virt_to_page(p4d_pgtable(p4d));
@@ -112,7 +112,8 @@ struct page *p4d_page(p4d_t p4d)
struct page *pud_page(pud_t pud)
{
if (pud_is_leaf(pud)) {
- VM_WARN_ON(!pud_huge(pud));
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
}
return virt_to_page(pud_pgtable(pud));
@@ -125,7 +126,13 @@ struct page *pud_page(pud_t pud)
struct page *pmd_page(pmd_t pmd)
{
if (pmd_is_leaf(pmd)) {
- VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+ /*
+ * vmalloc_to_page may be called on any vmap address (not only
+ * vmalloc), and it uses pmd_page() etc., when huge vmap is
+ * enabled so these checks can't be used.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
return pte_page(pmd_pte(pmd));
}
return virt_to_page(pmd_page_vaddr(pmd));
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 4050cbb55acf..dc896d2874f3 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -4,11 +4,11 @@ obj-y += ptdump.o
obj-$(CONFIG_4xx) += shared.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
-obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o
+obj-$(CONFIG_PPC_E500) += shared.o
obj-$(CONFIG_PPC_BOOK3S_32) += shared.o
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o
ifdef CONFIG_PTDUMP_DEBUGFS
obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o
+obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o
endif
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index c7f824d294b2..9a601587836b 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -238,7 +238,10 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
{
- struct hash_pte ptes[4];
+ struct {
+ unsigned long v;
+ unsigned long r;
+ } ptes[4];
unsigned long vsid, vpn, hash, hpte_group, want_v;
int i, j, ssize = mmu_kernel_ssize;
long lpar_rc = 0;
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index bf251191e78d..2313053fe679 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <asm/fixmap.h>
#include <linux/const.h>
+#include <linux/kasan.h>
#include <asm/page.h>
#include <asm/hugetlb.h>
@@ -123,7 +124,7 @@ static struct ptdump_range ptdump_range[] __ro_after_init = {
void pt_dump_size(struct seq_file *m, unsigned long size)
{
- static const char units[] = "KMGTPE";
+ static const char units[] = " KMGTPE";
const char *unit = units;
/* Work out what appropriate unit to use */
@@ -176,14 +177,14 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
- pt_dump_size(st->seq, (addr - st->start_address) >> 10);
+ pt_dump_size(st->seq, addr - st->start_address);
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
pte_t pte = __pte(st->current_flags);
- if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
+ if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx)
return;
if (!pte_write(pte) || !pte_exec(pte))
@@ -289,11 +290,11 @@ static void populate_markers(void)
#endif
address_markers[i++].start_address = FIXADDR_START;
address_markers[i++].start_address = FIXADDR_TOP;
+#endif /* CONFIG_PPC64 */
#ifdef CONFIG_KASAN
address_markers[i++].start_address = KASAN_SHADOW_START;
address_markers[i++].start_address = KASAN_SHADOW_END;
#endif
-#endif /* CONFIG_PPC64 */
}
static int ptdump_show(struct seq_file *m, void *v)
@@ -315,7 +316,7 @@ static int ptdump_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump);
-static void build_pgtable_complete_mask(void)
+static void __init build_pgtable_complete_mask(void)
{
unsigned int i, j;
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index 03607ab90c66..f884760ca5cf 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = {
.clear = " ",
}, {
.mask = _PAGE_RW,
- .val = _PAGE_RW,
- .set = "rw",
- .clear = "r ",
+ .val = 0,
+ .set = "r ",
+ .clear = "rw",
}, {
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,