aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/arm64/mm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/mmu.c')
-rw-r--r--arch/arm64/mm/mmu.c262
1 files changed, 172 insertions, 90 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 495b732d5af3..00ab1d648db6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/kfence.h>
+#include <linux/pkeys.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -45,6 +46,13 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
+enum pgtable_type {
+ TABLE_PTE,
+ TABLE_PMD,
+ TABLE_PUD,
+ TABLE_P4D,
+};
+
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -106,41 +114,26 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
EXPORT_SYMBOL(phys_mem_access_prot);
-static phys_addr_t __init early_pgtable_alloc(int shift)
+static phys_addr_t __init early_pgtable_alloc(enum pgtable_type pgtable_type)
{
phys_addr_t phys;
- void *ptr;
phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
MEMBLOCK_ALLOC_NOLEAKTRACE);
if (!phys)
panic("Failed to allocate page table page\n");
- /*
- * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
- * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
- * any level of table.
- */
- ptr = pte_set_fixmap(phys);
-
- memset(ptr, 0, PAGE_SIZE);
-
- /*
- * Implicit barriers also ensure the zeroed page is visible to the page
- * table walker
- */
- pte_clear_fixmap();
-
return phys;
}
-bool pgattr_change_is_safe(u64 old, u64 new)
+bool pgattr_change_is_safe(pteval_t old, pteval_t new)
{
/*
* The following mapping attributes may be updated in live
* kernel mappings without the need for break-before-make.
*/
- pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+ pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG |
+ PTE_SWBITS_MASK;
/* creating or taking down mappings is always safe */
if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
@@ -172,16 +165,25 @@ bool pgattr_change_is_safe(u64 old, u64 new)
return ((old ^ new) & ~mask) == 0;
}
-static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot)
+static void init_clear_pgtable(void *table)
{
- pte_t *ptep;
+ clear_page(table);
- ptep = pte_set_fixmap_offset(pmdp, addr);
+ /* Ensure the zeroing is observed by page table walks. */
+ dsb(ishst);
+}
+
+static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot)
+{
do {
pte_t old_pte = __ptep_get(ptep);
- __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+ /*
+ * Required barriers to make this visible to the table walker
+ * are deferred to the end of alloc_init_cont_pte().
+ */
+ __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/*
* After the PTE entry has been populated once, we
@@ -192,32 +194,35 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys += PAGE_SIZE;
} while (ptep++, addr += PAGE_SIZE, addr != end);
-
- pte_clear_fixmap();
}
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
pmd_t pmd = READ_ONCE(*pmdp);
+ pte_t *ptep;
BUG_ON(pmd_sect(pmd));
if (pmd_none(pmd)) {
- pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
+ pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
phys_addr_t pte_phys;
if (flags & NO_EXEC_MAPPINGS)
pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pte_phys = pgtable_alloc(PAGE_SHIFT);
+ pte_phys = pgtable_alloc(TABLE_PTE);
+ ptep = pte_set_fixmap(pte_phys);
+ init_clear_pgtable(ptep);
+ ptep += pte_index(addr);
__pmd_populate(pmdp, pte_phys, pmdval);
- pmd = READ_ONCE(*pmdp);
+ } else {
+ BUG_ON(pmd_bad(pmd));
+ ptep = pte_set_fixmap_offset(pmdp, addr);
}
- BUG_ON(pmd_bad(pmd));
do {
pgprot_t __prot = prot;
@@ -229,20 +234,26 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pte(pmdp, addr, next, phys, __prot);
+ init_pte(ptep, addr, next, phys, __prot);
+ ptep += pte_index(next) - pte_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
+
+ /*
+ * Note: barriers and maintenance necessary to clear the fixmap slot
+ * ensure that all previous pgtable writes are visible to the table
+ * walker.
+ */
+ pte_clear_fixmap();
}
-static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
+static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags)
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags)
{
unsigned long next;
- pmd_t *pmdp;
- pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
pmd_t old_pmd = READ_ONCE(*pmdp);
@@ -268,34 +279,38 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
}
phys += next - addr;
} while (pmdp++, addr = next, addr != end);
-
- pmd_clear_fixmap();
}
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags)
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
+ pmd_t *pmdp;
/*
* Check for initial section mappings in the pgd/pud.
*/
BUG_ON(pud_sect(pud));
if (pud_none(pud)) {
- pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
+ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
phys_addr_t pmd_phys;
if (flags & NO_EXEC_MAPPINGS)
pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pmd_phys = pgtable_alloc(PMD_SHIFT);
+ pmd_phys = pgtable_alloc(TABLE_PMD);
+ pmdp = pmd_set_fixmap(pmd_phys);
+ init_clear_pgtable(pmdp);
+ pmdp += pmd_index(addr);
__pud_populate(pudp, pmd_phys, pudval);
- pud = READ_ONCE(*pudp);
+ } else {
+ BUG_ON(pud_bad(pud));
+ pmdp = pmd_set_fixmap_offset(pudp, addr);
}
- BUG_ON(pud_bad(pud));
do {
pgprot_t __prot = prot;
@@ -307,15 +322,18 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
+ init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
+ pmdp += pmd_index(next) - pmd_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
+
+ pmd_clear_fixmap();
}
static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
@@ -323,19 +341,22 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
pud_t *pudp;
if (p4d_none(p4d)) {
- p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
+ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF;
phys_addr_t pud_phys;
if (flags & NO_EXEC_MAPPINGS)
p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- pud_phys = pgtable_alloc(PUD_SHIFT);
+ pud_phys = pgtable_alloc(TABLE_PUD);
+ pudp = pud_set_fixmap(pud_phys);
+ init_clear_pgtable(pudp);
+ pudp += pud_index(addr);
__p4d_populate(p4dp, pud_phys, p4dval);
- p4d = READ_ONCE(*p4dp);
+ } else {
+ BUG_ON(p4d_bad(p4d));
+ pudp = pud_set_fixmap_offset(p4dp, addr);
}
- BUG_ON(p4d_bad(p4d));
- pudp = pud_set_fixmap_offset(p4dp, addr);
do {
pud_t old_pud = READ_ONCE(*pudp);
@@ -370,7 +391,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long next;
@@ -378,19 +399,22 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
p4d_t *p4dp;
if (pgd_none(pgd)) {
- pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN;
+ pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN | PGD_TABLE_AF;
phys_addr_t p4d_phys;
if (flags & NO_EXEC_MAPPINGS)
pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
- p4d_phys = pgtable_alloc(P4D_SHIFT);
+ p4d_phys = pgtable_alloc(TABLE_P4D);
+ p4dp = p4d_set_fixmap(p4d_phys);
+ init_clear_pgtable(p4dp);
+ p4dp += p4d_index(addr);
__pgd_populate(pgdp, p4d_phys, pgdval);
- pgd = READ_ONCE(*pgdp);
+ } else {
+ BUG_ON(pgd_bad(pgd));
+ p4dp = p4d_set_fixmap_offset(pgdp, addr);
}
- BUG_ON(pgd_bad(pgd));
- p4dp = p4d_set_fixmap_offset(pgdp, addr);
do {
p4d_t old_p4d = READ_ONCE(*p4dp);
@@ -411,7 +435,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
unsigned long addr, end, next;
@@ -439,7 +463,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int),
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
int flags)
{
mutex_lock(&fixmap_lock);
@@ -452,38 +476,48 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
extern __alias(__create_pgd_mapping_locked)
void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags);
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags);
#endif
-static phys_addr_t __pgd_pgtable_alloc(int shift)
-{
- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
- BUG_ON(!ptr);
+static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
+ enum pgtable_type pgtable_type)
+{
+ /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
+ phys_addr_t pa;
+
+ BUG_ON(!ptdesc);
+ pa = page_to_phys(ptdesc_page(ptdesc));
+
+ switch (pgtable_type) {
+ case TABLE_PTE:
+ BUG_ON(!pagetable_pte_ctor(mm, ptdesc));
+ break;
+ case TABLE_PMD:
+ BUG_ON(!pagetable_pmd_ctor(mm, ptdesc));
+ break;
+ case TABLE_PUD:
+ pagetable_pud_ctor(ptdesc);
+ break;
+ case TABLE_P4D:
+ pagetable_p4d_ctor(ptdesc);
+ break;
+ }
- /* Ensure the zeroed page is visible to the page table walker */
- dsb(ishst);
- return __pa(ptr);
+ return pa;
}
-static phys_addr_t pgd_pgtable_alloc(int shift)
+static phys_addr_t __maybe_unused
+pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
{
- phys_addr_t pa = __pgd_pgtable_alloc(shift);
- struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa));
-
- /*
- * Call proper page table ctor in case later we need to
- * call core mm functions like apply_to_page_range() on
- * this pre-allocated page table.
- *
- * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
- * folded, and if so pagetable_pte_ctor() becomes nop.
- */
- if (shift == PAGE_SHIFT)
- BUG_ON(!pagetable_pte_ctor(ptdesc));
- else if (shift == PMD_SHIFT)
- BUG_ON(!pagetable_pmd_ctor(ptdesc));
+ return __pgd_pgtable_alloc(&init_mm, pgtable_type);
+}
- return pa;
+static phys_addr_t
+pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
+{
+ return __pgd_pgtable_alloc(NULL, pgtable_type);
}
/*
@@ -515,7 +549,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc, flags);
+ pgd_pgtable_alloc_special_mm, flags);
}
static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
@@ -729,7 +763,7 @@ static int __init map_entry_trampoline(void)
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
entry_tramp_text_size(), prot,
- __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
+ pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);
/* Map both the text and data into the kernel page table */
for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
@@ -1154,15 +1188,19 @@ int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
unsigned long addr, unsigned long next)
{
vmemmap_verify((pte_t *)pmdp, node, addr, next);
- return 1;
+
+ return pmd_sect(READ_ONCE(*pmdp));
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+ /* [start, end] should be within one section */
+ WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page));
- if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
+ if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) ||
+ (end - start < PAGES_PER_SECTION * sizeof(struct page)))
return vmemmap_populate_basepages(start, end, node, altmap);
else
return vmemmap_populate_hugepages(start, end, node, altmap);
@@ -1267,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
next = addr;
end = addr + PUD_SIZE;
do {
- pmd_free_pte_page(pmdp, next);
+ if (pmd_present(pmdp_get(pmdp)))
+ pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
@@ -1331,7 +1370,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, params->pgprot, __pgd_pgtable_alloc,
+ size, params->pgprot, pgd_pgtable_alloc_init_mm,
flags);
memblock_clear_nomap(start, size);
@@ -1342,7 +1381,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__remove_pgd_mapping(swapper_pg_dir,
__phys_to_virt(start), size);
else {
- max_pfn = PFN_UP(start + size);
+ /* Address of hotplugged memory can be smaller */
+ max_pfn = max(max_pfn, PFN_UP(start + size));
max_low_pfn = max_pfn;
}
@@ -1535,3 +1575,45 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
cpu_uninstall_idmap();
}
+
+#ifdef CONFIG_ARCH_HAS_PKEYS
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
+{
+ u64 new_por;
+ u64 old_por;
+
+ if (!system_supports_poe())
+ return -ENOSPC;
+
+ /*
+ * This code should only be called with valid 'pkey'
+ * values originating from in-kernel users. Complain
+ * if a bad value is observed.
+ */
+ if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
+ return -EINVAL;
+
+ /* Set the bits we need in POR: */
+ new_por = POE_RWX;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_por &= ~POE_W;
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_por &= ~POE_RW;
+ if (init_val & PKEY_DISABLE_READ)
+ new_por &= ~POE_R;
+ if (init_val & PKEY_DISABLE_EXECUTE)
+ new_por &= ~POE_X;
+
+ /* Shift the bits in to the correct place in POR for pkey: */
+ new_por = POR_ELx_PERM_PREP(pkey, new_por);
+
+ /* Get old POR and mask off any old bits in place: */
+ old_por = read_sysreg_s(SYS_POR_EL0);
+ old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey));
+
+ /* Write old part along with new part: */
+ write_sysreg_s(old_por | new_por, SYS_POR_EL0);
+
+ return 0;
+}
+#endif