aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/book3s32/Makefile1
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S6
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c33
-rw-r--r--arch/powerpc/mm/book3s32/kuep.c42
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c20
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c48
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c24
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c8
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c39
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c272
-rw-r--r--arch/powerpc/mm/fault.c4
-rw-r--r--arch/powerpc/mm/ioremap_32.c4
-rw-r--r--arch/powerpc/mm/ioremap_64.c2
-rw-r--r--arch/powerpc/mm/maccess.c4
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/powerpc/mm/mmu_context.c4
-rw-r--r--arch/powerpc/mm/nohash/44x.c17
-rw-r--r--arch/powerpc/mm/nohash/8xx.c42
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c173
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S13
-rw-r--r--arch/powerpc/mm/pageattr.c134
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_32.c60
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c22
26 files changed, 581 insertions, 418 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c3df3a8501d4..eae4ec2988fc 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
+obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o \
@@ -13,7 +13,7 @@ obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 7f0c8a78ba0c..15f4773643d2 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o
obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
obj-$(CONFIG_PPC_KUEP) += kuep.o
+obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index fb4233a5bdf7..6925ce998557 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -27,8 +27,10 @@
#include <asm/code-patching-asm.h>
#ifdef CONFIG_PTE_64BIT
+#define PTE_T_SIZE 8
#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */
#else
+#define PTE_T_SIZE 4
#define PTE_FLAGS_OFFSET 0
#endif
@@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages)
bne 2f
ble cr1,19f
addi r4,r4,0x1000
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r6,r6,-1
b 1b
@@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages)
8: ble cr1,9f /* if all ptes checked */
81: addi r6,r6,-1
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r4,r4,0x1000
lwz r0,0(r5) /* check next pte */
cmpwi cr1,r6,1
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
new file mode 100644
index 000000000000..0f920f09af57
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
+void kuap_lock_all_ool(void)
+{
+ kuap_lock_all();
+}
+EXPORT_SYMBOL(kuap_lock_all_ool);
+
+void kuap_unlock_all_ool(void)
+{
+ kuap_unlock_all();
+}
+EXPORT_SYMBOL(kuap_unlock_all_ool);
+
+void setup_kuap(bool disabled)
+{
+ if (!disabled)
+ kuap_lock_all_ool();
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ if (disabled)
+ static_branch_enable(&disable_kuap_key);
+ else
+ pr_info("Activating Kernel Userspace Access Protection\n");
+}
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
index 8ed1b8634839..c20733d6e02c 100644
--- a/arch/powerpc/mm/book3s32/kuep.c
+++ b/arch/powerpc/mm/book3s32/kuep.c
@@ -1,40 +1,20 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <asm/kup.h>
-#include <asm/reg.h>
-#include <asm/task_size_32.h>
-#include <asm/mmu.h>
+#include <asm/smp.h>
-#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \
- if (TASK_SIZE > ((n) << 28)) \
- mtsr(val1, (n) << 28); \
- if (TASK_SIZE > (((n) + 1) << 28)) \
- mtsr(val2, ((n) + 1) << 28); \
- val1 = (val1 + 0x222) & 0xf0ffffff; \
- val2 = (val2 + 0x222) & 0xf0ffffff; \
-} while (0)
+struct static_key_false disable_kuep_key;
-static __always_inline void kuep_update(u32 val)
+void setup_kuep(bool disabled)
{
- int val1 = val;
- int val2 = (val + 0x111) & 0xf0ffffff;
+ if (!disabled)
+ kuep_lock();
- KUEP_UPDATE_TWO_USER_SEGMENTS(0);
- KUEP_UPDATE_TWO_USER_SEGMENTS(2);
- KUEP_UPDATE_TWO_USER_SEGMENTS(4);
- KUEP_UPDATE_TWO_USER_SEGMENTS(6);
- KUEP_UPDATE_TWO_USER_SEGMENTS(8);
- KUEP_UPDATE_TWO_USER_SEGMENTS(10);
- KUEP_UPDATE_TWO_USER_SEGMENTS(12);
- KUEP_UPDATE_TWO_USER_SEGMENTS(14);
-}
+ if (smp_processor_id() != boot_cpuid)
+ return;
-void kuep_lock(void)
-{
- kuep_update(mfsr(0) | SR_NX);
-}
-
-void kuep_unlock(void)
-{
- kuep_update(mfsr(0) & ~SR_NX);
+ if (disabled)
+ static_branch_enable(&disable_kuep_key);
+ else
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 159930351d9f..27061583a010 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -445,26 +445,6 @@ void __init print_system_hash_info(void)
pr_info("Hash_mask = 0x%lx\n", Hash_mask);
}
-#ifdef CONFIG_PPC_KUEP
-void __init setup_kuep(bool disabled)
-{
- pr_info("Activating Kernel Userspace Execution Prevention\n");
-
- if (disabled)
- pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_KUAP
-void __init setup_kuap(bool disabled)
-{
- pr_info("Activating Kernel Userspace Access Protection\n");
-
- if (disabled)
- pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
-}
-#endif
-
void __init early_init_mmu(void)
{
}
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 218996e40a8e..e2708e387dc3 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -24,6 +24,12 @@
#include <asm/mmu_context.h>
/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+void *abatron_pteptrs[2];
+
+/*
* On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
* (virtual segment identifiers) for each context. Although the
* hardware supports 24-bit VSIDs, and thus >1 million contexts,
@@ -39,19 +45,6 @@
#define LAST_CONTEXT 32767
#define FIRST_CONTEXT 1
-/*
- * This function defines the mapping from contexts to VSIDs (virtual
- * segment IDs). We use a skew on both the context and the high 4 bits
- * of the 32-bit virtual address (the "effective segment ID") in order
- * to spread out the entries in the MMU hash table. Note, if this
- * function is changed then arch/ppc/mm/hashtable.S will have to be
- * changed to correspond.
- *
- *
- * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
- * & 0xffffff)
- */
-
static unsigned long next_mmu_context;
static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
@@ -111,3 +104,32 @@ void __init mmu_context_init(void)
context_map[0] = (1 << FIRST_CONTEXT) - 1;
next_mmu_context = FIRST_CONTEXT;
}
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+ long id = next->context.id;
+ unsigned long val;
+
+ if (id < 0)
+ panic("mm_struct %p has no context ID", next);
+
+ isync();
+
+ val = CTX_TO_VSID(id, 0);
+ if (!kuep_is_disabled())
+ val |= SR_NX;
+ if (!kuap_is_disabled())
+ val |= SR_KS;
+
+ update_user_segments(val);
+
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff);
+
+ mb(); /* sync */
+ isync();
+}
+EXPORT_SYMBOL(switch_mmu_context);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 96d9aa164007..ac5720371c0d 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
-DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
+DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
+DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
{
unsigned long ea = regs->dar;
unsigned long dsisr = regs->dsisr;
@@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
unsigned int region_id;
long err;
+ if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+ hash__do_page_fault(regs);
+ return;
+ }
+
region_id = get_region_id(ea);
if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
@@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
bad_page_fault(regs, SIGBUS);
}
err = 0;
- }
- return err;
+ } else if (err) {
+ hash__do_page_fault(regs);
+ }
}
/*
@@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
*/
DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
{
- unsigned long dsisr = regs->dsisr;
-
- if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
- hash__do_page_fault(regs);
- return 0;
- }
-
/*
* If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
* don't call hash_page, just fail the fault. This is required to
@@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
return 0;
}
- if (__do_hash_fault(regs))
- hash__do_page_fault(regs);
+ __do_hash_fault(regs);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cb91071eef52..23d3e08911d3 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -32,7 +32,13 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
struct hstate *hstate = hstate_file(vma->vm_file);
psize = hstate_get_psize(hstate);
- radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ /*
+ * Flush PWC even if we get PUD_SIZE hugetlb invalidate to keep this simpler.
+ */
+ if (end - start >= PUD_SIZE)
+ radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
+ else
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b463..e50ddf129c15 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
@@ -357,30 +358,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
- mmu_base_pid = 1;
- } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * When KVM is possible, we only use the top half of the
- * PID space to avoid collisions between host and guest PIDs
- * which can cause problems due to prefetch when exiting the
- * guest with AIL=3
+ * Older versions of KVM on these machines perfer if the
+ * guest only uses the low 19 PID bits.
*/
- mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
- mmu_base_pid = 1;
-#endif
- } else {
- /* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
- mmu_base_pid = 1;
+ } else {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
}
+ mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
@@ -486,6 +476,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
def = &mmu_psize_defs[idx];
def->shift = shift;
def->ap = ap;
+ def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
}
/* needed ? */
@@ -560,9 +551,13 @@ void __init radix__early_init_devtree(void)
*/
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+ mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_4K);
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_64K);
}
/*
@@ -825,7 +820,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
}
- pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+ pmd_base = pud_pgtable(*pud);
remove_pmd_table(pmd_base, addr, next);
free_pmd_table(pmd_base, pud);
}
@@ -859,7 +854,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
continue;
}
- pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_base = p4d_pgtable(*p4d);
remove_pud_table(pud_base, addr, next);
free_pud_table(pud_base, p4d);
}
@@ -1110,7 +1105,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
pmd_t *pmd;
int i;
- pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd = pud_pgtable(*pud);
pud_clear(pud);
flush_tlb_kernel_range(addr, addr + PUD_SIZE);
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e61210789..aefc100d79a7 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -20,10 +20,6 @@
#include "internal.h"
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
/*
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
@@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
@@ -291,22 +354,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
asm volatile("ptesync": : :"memory");
- /*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
- */
- __tlbiel_pid(pid, 0, ric);
+ switch (ric) {
+ case RIC_FLUSH_PWC:
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
ppc_after_tlbiel_barrier();
return;
+ case RIC_FLUSH_TLB:
+ __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ /*
+ * Flush the first set of the TLB, and if
+ * we're doing a RIC_FLUSH_ALL, also flush
+ * the entire Page Walk Cache.
+ */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
}
if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -344,6 +415,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -469,6 +565,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -549,6 +659,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -989,14 +1111,13 @@ static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
-
{
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool fullmm = (end == TLB_FLUSH_ALL);
- bool flush_pid;
+ bool flush_pid, flush_pwc = false;
enum tlb_flush_type type;
pid = mm->context.id;
@@ -1015,8 +1136,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
else
flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+ /*
+ * full pid flush already does the PWC flush. if it is not full pid
+ * flush check the range is more than PMD and force a pwc flush
+ * mremap() depends on this behaviour.
+ */
+ if (!flush_pid && (end - start) >= PMD_SIZE)
+ flush_pwc = true;
if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+ unsigned long type = H_RPTI_TYPE_TLB;
unsigned long tgt = H_RPTI_TARGET_CMMU;
unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
@@ -1024,19 +1153,20 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
if (atomic_read(&mm->context.copros) > 0)
tgt |= H_RPTI_TARGET_NMMU;
- pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
- start, end);
+ if (flush_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
} else if (flush_pid) {
+ /*
+ * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
+ */
if (type == FLUSH_TYPE_LOCAL) {
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
} else {
if (cputlb_use_tlbie()) {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
} else {
- _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
}
} else {
@@ -1052,6 +1182,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (type == FLUSH_TYPE_LOCAL) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
@@ -1059,6 +1192,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
ppc_after_tlbiel_barrier();
} else if (cputlb_use_tlbie()) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
@@ -1066,10 +1201,10 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
- start, end, pid, page_size, mmu_virtual_psize, false);
+ start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
if (hflush)
_tlbiel_va_range_multicast(mm,
- hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
}
}
out:
@@ -1143,9 +1278,6 @@ void radix__flush_all_lpid_guest(unsigned int lpid)
_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize);
-
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
@@ -1176,7 +1308,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
}
}
-static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
unsigned long start, unsigned long end,
int psize, bool also_pwc)
{
@@ -1252,8 +1384,8 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
{
__radix__flush_tlb_range_psize(mm, start, end, psize, true);
}
@@ -1338,47 +1470,57 @@ void radix__flush_tlb_all(void)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
{
- unsigned long pid = mm->context.id;
+ unsigned long psize, nr_pages;
+ struct mmu_psize_def *def;
+ bool flush_pid;
- if (unlikely(pid == MMU_NO_CONTEXT))
+ /*
+ * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+ * do a single IS=1 based flush.
+ */
+ if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
return;
+ }
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- return;
+ if (type & H_RPTI_TYPE_PWC)
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- /*
- * If this context hasn't run on that CPU before and KVM is
- * around, there's a slim chance that the guest on another
- * CPU just brought in obsolete translation into the TLB of
- * this CPU due to a bad prefetch using the guest PID on
- * the way into the hypervisor.
- *
- * We work around this here. If KVM is possible, we check if
- * any sibling thread is in KVM. If it is, the window may exist
- * and thus we flush that PID from the core.
- *
- * A potential future improvement would be to mark which PIDs
- * have never been used on the system and avoid it if the PID
- * is new and the process has no other cpumask bit set.
- */
- if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
- int cpu = smp_processor_id();
- int sib = cpu_first_thread_sibling(cpu);
- bool flush = false;
-
- for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
- if (sib == cpu)
- continue;
- if (!cpu_possible(sib))
- continue;
- if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
- flush = true;
+ /* Full PID flush */
+ if (start == 0 && end == -1)
+ return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+ /* Do range invalidation for all the valid page sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+ /*
+ * If the number of pages spanning the range is above
+ * the ceiling, convert the request into a full PID flush.
+ * And since PID flush takes out all the page sizes, there
+ * is no need to consider remaining page sizes.
+ */
+ if (flush_pid) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ return;
}
- if (flush)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ _tlbie_va_range_lpid(start, end, pid, lpid,
+ (1UL << def->shift), psize, false);
}
}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 34f641d4a2fe..a8d0ce85d39a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -199,9 +199,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
{
int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
- /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
- if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
- DSISR_PROTFAULT))) {
+ if (is_exec) {
pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
address >= TASK_SIZE ? "exec-protected" : "user",
address,
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
index 743e11384dea..9d13143b8be4 100644
--- a/arch/powerpc/mm/ioremap_32.c
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -70,10 +70,10 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
*/
pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
- err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+ err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot);
if (err)
return NULL;
- ioremap_bot -= size;
+ ioremap_bot -= size + PAGE_SIZE;
return (void __iomem *)ioremap_bot + offset;
}
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index ba5cbb0d66bd..3acece00b33e 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -38,7 +38,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
return NULL;
ret = (void __iomem *)ioremap_bot + offset;
- ioremap_bot += size;
+ ioremap_bot += size + PAGE_SIZE;
return ret;
}
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index a3c30a884076..aad7c47e0030 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return is_kernel_addr((unsigned long)unsafe_src);
}
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
{
unsigned int val, suffix;
int err;
@@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
if (err)
return err;
if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
- err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
+ err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix));
*inst = ppc_inst_prefix(val, suffix);
} else {
*inst = ppc_inst(val);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index a6b36a40897a..ad198b439222 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -28,6 +28,9 @@
unsigned long long memory_limit;
bool init_mem_is_free;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -127,7 +130,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
}
#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -162,7 +165,7 @@ static int __init mark_nonram_nosave(void)
return 0;
}
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
static int __init mark_nonram_nosave(void)
{
return 0;
@@ -299,6 +302,10 @@ void __init mem_init(void)
ioremap_bot, IOREMAP_TOP);
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
+#ifdef MODULES_VADDR
+ pr_info(" * 0x%08lx..0x%08lx : modules\n",
+ MODULES_VADDR, MODULES_END);
+#endif
#endif /* CONFIG_PPC32 */
}
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index a857af401738..74246536b832 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
- else
+ if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
index 3d6ae7c72412..e079f26b267e 100644
--- a/arch/powerpc/mm/nohash/44x.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -25,6 +25,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
+#include <asm/smp.h>
#include <mm/mmu_decl.h>
@@ -239,3 +240,19 @@ void __init mmu_init_secondary(int cpu)
}
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
+{
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ if (disabled)
+ patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP()));
+ else
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (IS_ENABLED(CONFIG_PPC_47x) && disabled)
+ patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP()));
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 71bfdbedacee..60780e089118 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -212,37 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
}
-/*
- * Set up to use a given MMU context.
- * id is context number, pgd is PGD pointer.
- *
- * We place the physical address of the new task page directory loaded
- * into the MMU base register, and set the ASID compare register with
- * the new "context."
- */
-void set_context(unsigned long id, pgd_t *pgd)
-{
- s16 offset = (s16)(__pa(swapper_pg_dir));
-
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- if (IS_ENABLED(CONFIG_BDI_SWITCH))
- abatron_pteptrs[1] = pgd;
-
- /* Register M_TWB will contain base address of level 1 table minus the
- * lower part of the kernel PGDIR base address, so that all accesses to
- * level 1 table are done relative to lower part of kernel PGDIR base
- * address.
- */
- mtspr(SPRN_M_TWB, __pa(pgd) - offset);
-
- /* Update context */
- mtspr(SPRN_M_CASID, id - 1);
- /* sync */
- mb();
-}
-
#ifdef CONFIG_PPC_KUEP
void __init setup_kuep(bool disabled)
{
@@ -256,12 +225,17 @@ void __init setup_kuep(bool disabled)
#endif
#ifdef CONFIG_PPC_KUAP
+struct static_key_false disable_kuap_key;
+EXPORT_SYMBOL(disable_kuap_key);
+
void __init setup_kuap(bool disabled)
{
- pr_info("Activating Kernel Userspace Access Protection\n");
+ if (disabled) {
+ static_branch_enable(&disable_kuap_key);
+ return;
+ }
- if (disabled)
- pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+ pr_info("Activating Kernel Userspace Access Protection\n");
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
index aac81c9f84a5..44b2b5e7cabe 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -21,21 +21,6 @@
* also clear mm->cpu_vm_mask bits when processes are migrated
*/
-//#define DEBUG_MAP_CONSISTENCY
-//#define DEBUG_CLAMP_LAST_CONTEXT 31
-//#define DEBUG_HARDER
-
-/* We don't use DEBUG because it tends to be compiled in always nowadays
- * and this would generate way too much output
- */
-#ifdef DEBUG_HARDER
-#define pr_hard(args...) printk(KERN_DEBUG args)
-#define pr_hardcont(args...) printk(KERN_CONT args)
-#else
-#define pr_hard(args...) do { } while(0)
-#define pr_hardcont(args...) do { } while(0)
-#endif
-
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
@@ -47,10 +32,17 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+#include <asm/smp.h>
#include <mm/mmu_decl.h>
/*
+ * Room for two PTE table pointers, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+void *abatron_pteptrs[2];
+
+/*
* The MPC8xx has only 16 contexts. We rotate through them on each task switch.
* A better way would be to keep track of tasks that own contexts, and implement
* an LRU usage. That way very active tasks don't always have to pay the TLB
@@ -68,9 +60,7 @@
* -- BenH
*/
#define FIRST_CONTEXT 1
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
-#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
-#elif defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_8xx)
#define LAST_CONTEXT 16
#elif defined(CONFIG_PPC_47x)
#define LAST_CONTEXT 65535
@@ -80,9 +70,7 @@
static unsigned int next_context, nr_free_contexts;
static unsigned long *context_map;
-#ifdef CONFIG_SMP
static unsigned long *stale_map[NR_CPUS];
-#endif
static struct mm_struct **context_mm;
static DEFINE_RAW_SPINLOCK(context_lock);
@@ -105,7 +93,6 @@ static DEFINE_RAW_SPINLOCK(context_lock);
* the stale map as we can just flush the local CPU
* -- benh
*/
-#ifdef CONFIG_SMP
static unsigned int steal_context_smp(unsigned int id)
{
struct mm_struct *mm;
@@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id)
id = FIRST_CONTEXT;
continue;
}
- pr_hardcont(" | steal %d from 0x%p", id, mm);
/* Mark this mm has having no context anymore */
mm->context.id = MMU_NO_CONTEXT;
@@ -158,34 +144,25 @@ static unsigned int steal_context_smp(unsigned int id)
/* This will cause the caller to try again */
return MMU_NO_CONTEXT;
}
-#endif /* CONFIG_SMP */
static unsigned int steal_all_contexts(void)
{
struct mm_struct *mm;
-#ifdef CONFIG_SMP
int cpu = smp_processor_id();
-#endif
unsigned int id;
for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
/* Pick up the victim mm */
mm = context_mm[id];
- pr_hardcont(" | steal %d from 0x%p", id, mm);
-
/* Mark this mm as having no context anymore */
mm->context.id = MMU_NO_CONTEXT;
if (id != FIRST_CONTEXT) {
context_mm[id] = NULL;
__clear_bit(id, context_map);
-#ifdef DEBUG_MAP_CONSISTENCY
- mm->context.active = 0;
-#endif
}
-#ifdef CONFIG_SMP
- __clear_bit(id, stale_map[cpu]);
-#endif
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
}
/* Flush the TLB for all contexts (not to be used on SMP) */
@@ -204,15 +181,11 @@ static unsigned int steal_all_contexts(void)
static unsigned int steal_context_up(unsigned int id)
{
struct mm_struct *mm;
-#ifdef CONFIG_SMP
int cpu = smp_processor_id();
-#endif
/* Pick up the victim mm */
mm = context_mm[id];
- pr_hardcont(" | steal %d from 0x%p", id, mm);
-
/* Flush the TLB for that context */
local_flush_tlb_mm(mm);
@@ -220,81 +193,64 @@ static unsigned int steal_context_up(unsigned int id)
mm->context.id = MMU_NO_CONTEXT;
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
-#ifdef CONFIG_SMP
- __clear_bit(id, stale_map[cpu]);
-#endif
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
return id;
}
-#ifdef DEBUG_MAP_CONSISTENCY
-static void context_check_map(void)
+static void set_context(unsigned long id, pgd_t *pgd)
{
- unsigned int id, nrf, nact;
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ s16 offset = (s16)(__pa(swapper_pg_dir));
+
+ /*
+ * Register M_TWB will contain base address of level 1 table minus the
+ * lower part of the kernel PGDIR base address, so that all accesses to
+ * level 1 table are done relative to lower part of kernel PGDIR base
+ * address.
+ */
+ mtspr(SPRN_M_TWB, __pa(pgd) - offset);
- nrf = nact = 0;
- for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
- int used = test_bit(id, context_map);
- if (!used)
- nrf++;
- if (used != (context_mm[id] != NULL))
- pr_err("MMU: Context %d is %s and MM is %p !\n",
- id, used ? "used" : "free", context_mm[id]);
- if (context_mm[id] != NULL)
- nact += context_mm[id]->context.active;
- }
- if (nrf != nr_free_contexts) {
- pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
- nr_free_contexts, nrf);
- nr_free_contexts = nrf;
+ /* Update context */
+ mtspr(SPRN_M_CASID, id - 1);
+
+ /* sync */
+ mb();
+ } else {
+ if (IS_ENABLED(CONFIG_40x))
+ mb(); /* sync */
+
+ mtspr(SPRN_PID, id);
+ isync();
}
- if (nact > num_online_cpus())
- pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
- nact, num_online_cpus());
- if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
- pr_err("MMU: Context 0 has been freed !!!\n");
}
-#else
-static void context_check_map(void) { }
-#endif
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned int id;
-#ifdef CONFIG_SMP
unsigned int i, cpu = smp_processor_id();
-#endif
unsigned long *map;
/* No lockless fast path .. yet */
raw_spin_lock(&context_lock);
- pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
- cpu, next, next->context.active, next->context.id);
-
-#ifdef CONFIG_SMP
- /* Mark us active and the previous one not anymore */
- next->context.active++;
- if (prev) {
- pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
- WARN_ON(prev->context.active < 1);
- prev->context.active--;
+ if (IS_ENABLED(CONFIG_SMP)) {
+ /* Mark us active and the previous one not anymore */
+ next->context.active++;
+ if (prev) {
+ WARN_ON(prev->context.active < 1);
+ prev->context.active--;
+ }
}
again:
-#endif /* CONFIG_SMP */
/* If we already have a valid assigned context, skip all that */
id = next->context.id;
- if (likely(id != MMU_NO_CONTEXT)) {
-#ifdef DEBUG_MAP_CONSISTENCY
- if (context_mm[id] != next)
- pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
- next, id, id, context_mm[id]);
-#endif
+ if (likely(id != MMU_NO_CONTEXT))
goto ctxt_ok;
- }
/* We really don't have a context, let's try to acquire one */
id = next_context;
@@ -304,14 +260,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
/* No more free contexts, let's try to steal one */
if (nr_free_contexts == 0) {
-#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
id = steal_context_smp(id);
if (id == MMU_NO_CONTEXT)
goto again;
goto stolen;
}
-#endif /* CONFIG_SMP */
if (IS_ENABLED(CONFIG_PPC_8xx))
id = steal_all_contexts();
else
@@ -330,20 +284,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
next_context = id + 1;
context_mm[id] = next;
next->context.id = id;
- pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
- context_check_map();
ctxt_ok:
/* If that context got marked stale on this CPU, then flush the
* local TLB for it and unmark it before we use it
*/
-#ifdef CONFIG_SMP
- if (test_bit(id, stale_map[cpu])) {
- pr_hardcont(" | stale flush %d [%d..%d]",
- id, cpu_first_thread_sibling(cpu),
- cpu_last_thread_sibling(cpu));
-
+ if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
local_flush_tlb_mm(next);
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
@@ -353,10 +300,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
__clear_bit(id, stale_map[i]);
}
}
-#endif
/* Flick the MMU and release lock */
- pr_hardcont(" -> %d\n", id);
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
raw_spin_unlock(&context_lock);
}
@@ -366,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
*/
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
- pr_hard("initing context for mm @%p\n", mm);
-
/*
* We have MMU_NO_CONTEXT set to be ~0. Hence check
* explicitly against context.id == 0. This ensures that we properly
@@ -401,16 +346,12 @@ void destroy_context(struct mm_struct *mm)
if (id != MMU_NO_CONTEXT) {
__clear_bit(id, context_map);
mm->context.id = MMU_NO_CONTEXT;
-#ifdef DEBUG_MAP_CONSISTENCY
- mm->context.active = 0;
-#endif
context_mm[id] = NULL;
nr_free_contexts++;
}
raw_spin_unlock_irqrestore(&context_lock, flags);
}
-#ifdef CONFIG_SMP
static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
/* We don't touch CPU 0 map, it's allocated at aboot and kept
@@ -419,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu)
if (cpu == boot_cpuid)
return 0;
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
return 0;
}
@@ -430,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
if (cpu == boot_cpuid)
return 0;
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
kfree(stale_map[cpu]);
stale_map[cpu] = NULL;
@@ -440,8 +379,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu)
return 0;
}
-#endif /* CONFIG_SMP */
-
/*
* Initialize the context management stuff.
*/
@@ -465,16 +402,16 @@ void __init mmu_context_init(void)
if (!context_mm)
panic("%s: Failed to allocate %zu bytes\n", __func__,
sizeof(void *) * (LAST_CONTEXT + 1));
-#ifdef CONFIG_SMP
- stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
- if (!stale_map[boot_cpuid])
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- CTX_MAP_SIZE);
-
- cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
- "powerpc/mmu/ctx:prepare",
- mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
-#endif
+ if (IS_ENABLED(CONFIG_SMP)) {
+ stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+ if (!stale_map[boot_cpuid])
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ CTX_MAP_SIZE);
+
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+ }
printk(KERN_INFO
"MMU: Allocated %zu bytes of context maps for %d contexts\n",
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 68797e072f55..4613bf8e9aae 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -360,19 +360,6 @@ _GLOBAL(_tlbivax_bcast)
sync
wrtee r10
blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
#else
#error Unsupported processor type !
#endif
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index 000000000000..0876216ceee6
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+/*
+ * Updates the attributes of a page in three steps:
+ *
+ * 1. invalidate the page table entry
+ * 2. flush the TLB
+ * 3. install the new entry with the updated attributes
+ *
+ * Invalidating the pte means there are situations where this will not work
+ * when in theory it should.
+ * For example:
+ * - removing write from page whilst it is being executed
+ * - setting a page read-only whilst it is being read by another CPU
+ *
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+ long action = (long)data;
+ pte_t pte;
+
+ spin_lock(&init_mm.page_table_lock);
+
+ /* invalidate the PTE so it's safe to modify */
+ pte = ptep_get_and_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ /* modify the PTE bits as desired, then apply */
+ switch (action) {
+ case SET_MEMORY_RO:
+ pte = pte_wrprotect(pte);
+ break;
+ case SET_MEMORY_RW:
+ pte = pte_mkwrite(pte_mkdirty(pte));
+ break;
+ case SET_MEMORY_NX:
+ pte = pte_exprotect(pte);
+ break;
+ case SET_MEMORY_X:
+ pte = pte_mkexec(pte);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ set_pte_at(&init_mm, addr, ptep, pte);
+
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+int change_memory_attr(unsigned long addr, int numpages, long action)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ unsigned long size = numpages * PAGE_SIZE;
+
+ if (!numpages)
+ return 0;
+
+ if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
+ is_vm_area_hugepages((void *)addr)))
+ return -EINVAL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * On hash, the linear mapping is not in the Linux page table so
+ * apply_to_existing_page_range() will have no effect. If in the future
+ * the set_memory_* functions are used on the linear map this will need
+ * to be updated.
+ */
+ if (!radix_enabled()) {
+ int region = get_region_id(addr);
+
+ if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
+ return -EINVAL;
+ }
+#endif
+
+ return apply_to_existing_page_range(&init_mm, start, size,
+ change_page_attr, (void *)action);
+}
+
+/*
+ * Set the attributes of a page:
+ *
+ * This function is used by PPC32 at the end of init to set final kernel memory
+ * protection. It includes changing the maping of the page it is executing from
+ * and data pages it is using.
+ */
+static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+ pgprot_t prot = __pgprot((unsigned long)data);
+
+ spin_lock(&init_mm.page_table_lock);
+
+ set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ unsigned long sz = numpages * PAGE_SIZE;
+
+ if (numpages <= 0)
+ return 0;
+
+ return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
+ (void *)pgprot_val(prot));
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 354611940118..cd16b407f47e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -28,6 +28,14 @@
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
+
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index e0ec67a16887..dcf5ecca19d9 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -23,6 +23,7 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
@@ -132,64 +133,20 @@ void __init mapin_ram(void)
}
}
-static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
-{
- pte_t *kpte;
- unsigned long address;
-
- BUG_ON(PageHighMem(page));
- address = (unsigned long)page_address(page);
-
- if (v_block_mapped(address))
- return 0;
- kpte = virt_to_kpte(address);
- if (!kpte)
- return -EINVAL;
- __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
-
- return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
- */
-static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- int i, err = 0;
- unsigned long flags;
- struct page *start = page;
-
- local_irq_save(flags);
- for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr_noflush(page, prot);
- if (err)
- break;
- }
- wmb();
- local_irq_restore(flags);
- flush_tlb_kernel_range((unsigned long)page_address(start),
- (unsigned long)page_address(page));
- return err;
-}
-
void mark_initmem_nx(void)
{
- struct page *page = virt_to_page(_sinittext);
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
if (v_block_mapped((unsigned long)_sinittext))
mmu_mark_initmem_nx();
else
- change_page_attr(page, numpages, PAGE_KERNEL);
+ set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
- struct page *page;
unsigned long numpages;
if (v_block_mapped((unsigned long)_stext + 1)) {
@@ -198,20 +155,18 @@ void mark_rodata_ro(void)
return;
}
- page = virt_to_page(_stext);
numpages = PFN_UP((unsigned long)_etext) -
PFN_DOWN((unsigned long)_stext);
- change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+ set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use __init_begin rather than __end_rodata
* to cover NOTES and EXCEPTION_TABLE.
*/
- page = virt_to_page(__start_rodata);
numpages = PFN_UP((unsigned long)__init_begin) -
PFN_DOWN((unsigned long)__start_rodata);
- change_page_attr(page, numpages, PAGE_KERNEL_RO);
+ set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
// mark_initmem_nx() should have already run by now
ptdump_check_wx();
@@ -221,9 +176,14 @@ void mark_rodata_ro(void)
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
+ unsigned long addr = (unsigned long)page_address(page);
+
if (PageHighMem(page))
return;
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ if (enable)
+ set_memory_attr(addr, numpages, PAGE_KERNEL);
+ else
+ set_memory_attr(addr, numpages, __pgprot(0));
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index cc6e2f94517f..78c8cf01db5f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -105,7 +105,7 @@ struct page *p4d_page(p4d_t p4d)
VM_WARN_ON(!p4d_huge(p4d));
return pte_page(p4d_pte(p4d));
}
- return virt_to_page(p4d_page_vaddr(p4d));
+ return virt_to_page(p4d_pgtable(p4d));
}
#endif
@@ -115,7 +115,7 @@ struct page *pud_page(pud_t pud)
VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
}
- return virt_to_page(pud_page_vaddr(pud));
+ return virt_to_page(pud_pgtable(pud));
}
/*
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index aca354fb670b..5062c58b1e5b 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,8 +58,6 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long start_address;
unsigned long start_pa;
- unsigned long last_pa;
- unsigned long page_size;
unsigned int level;
u64 current_flags;
bool check_wx;
@@ -163,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
static void dump_addr(struct pg_state *st, unsigned long addr)
{
- unsigned long delta;
-
#ifdef CONFIG_PPC64
#define REG "0x%016lx"
#else
@@ -172,14 +168,8 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
#endif
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
- if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) {
- pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
- delta = st->page_size >> 10;
- } else {
- pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
- delta = (addr - st->start_address) >> 10;
- }
- pt_dump_size(st->seq, delta);
+ pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
+ pt_dump_size(st->seq, (addr - st->start_address) >> 10);
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
@@ -208,7 +198,6 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
st->current_flags = flag;
st->start_address = addr;
st->start_pa = pa;
- st->page_size = page_size;
while (addr >= st->marker[1].start_address) {
st->marker++;
@@ -220,7 +209,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, unsigned long page_size)
{
u64 flag = val & pg_level[level].mask;
- u64 pa = val & PTE_RPN_MASK;
/* At first no level is set */
if (!st->level) {
@@ -232,12 +220,9 @@ static void note_page(struct pg_state *st, unsigned long addr,
* - we change levels in the tree.
* - the address is in a different section of memory and is thus
* used for a different purpose, regardless of the flags.
- * - the pa of this page is not adjacent to the last inspected page
*/
} else if (flag != st->current_flags || level != st->level ||
- addr >= st->marker[1].start_address ||
- (pa != st->last_pa + st->page_size &&
- (pa != st->start_pa || st->start_pa != st->last_pa))) {
+ addr >= st->marker[1].start_address) {
/* Check the PTE flags */
if (st->current_flags) {
@@ -259,7 +244,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
*/
note_page_update_state(st, addr, level, val, page_size);
}
- st->last_pa = pa;
}
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)