aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 09:53:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 09:53:37 -0700
commit04759194dc447ff0b9ef35bc641ce3bb076c2930 (patch)
tree92eca3b7aa1e0d5013db254ae9f5bc130bd7e735 /arch/arm64/mm/hugetlbpage.c
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux (diff)
parentarm64: cleanup {COMPAT_,}SET_PERSONALITY() macro (diff)
downloadlinux-dev-04759194dc447ff0b9ef35bc641ce3bb076c2930.tar.xz
linux-dev-04759194dc447ff0b9ef35bc641ce3bb076c2930.zip
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - VMAP_STACK support, allowing the kernel stacks to be allocated in the vmalloc space with a guard page for trapping stack overflows. One of the patches introduces THREAD_ALIGN and changes the generic alloc_thread_stack_node() to use this instead of THREAD_SIZE (no functional change for other architectures) - Contiguous PTE hugetlb support re-enabled (after being reverted a couple of times). We now have the semantics agreed in the generic mm layer together with API improvements so that the architecture code can detect between contiguous and non-contiguous huge PTEs - Initial support for persistent memory on ARM: DC CVAP instruction exposed to user space (HWCAP) and the in-kernel pmem API implemented - raid6 improvements for arm64: faster algorithm for the delta syndrome and implementation of the recovery routines using Neon - FP/SIMD refactoring and removal of support for Neon in interrupt context. This is in preparation for full SVE support - PTE accessors converted from inline asm to cmpxchg so that we can use LSE atomics if available (ARMv8.1) - Perf support for Cortex-A35 and A73 - Non-urgent fixes and cleanups * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (75 commits) arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro arm64: introduce separated bits for mm_context_t flags arm64: hugetlb: Cleanup setup_hugepagesz arm64: Re-enable support for contiguous hugepages arm64: hugetlb: Override set_huge_swap_pte_at() to support contiguous hugepages arm64: hugetlb: Override huge_pte_clear() to support contiguous hugepages arm64: hugetlb: Handle swap entries in huge_pte_offset() for contiguous hugepages arm64: hugetlb: Add break-before-make logic for contiguous entries arm64: hugetlb: Spring clean huge pte accessors arm64: hugetlb: Introduce pte_pgprot helper arm64: hugetlb: set_huge_pte_at Add WARN_ON on !pte_present arm64: kexec: have own crash_smp_send_stop() for crash dump for nonpanic cores arm64: dma-mapping: Mark atomic_pool as __ro_after_init arm64: dma-mapping: Do not pass data to gen_pool_set_algo() arm64: Remove the !CONFIG_ARM64_HW_AFDBM alternative code paths arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect() arm64: Move PTE_RDONLY bit handling out of set_pte_at() kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to cmpxchg() arm64: Convert pte handling from inline asm to using (cmp)xchg arm64: neon/efi: Make EFI fpsimd save/restore variables static ...
Diffstat (limited to 'arch/arm64/mm/hugetlbpage.c')
-rw-r--r--arch/arm64/mm/hugetlbpage.c314
1 files changed, 230 insertions, 84 deletions
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 656e0ece2289..6cb0fa92a651 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,6 +41,16 @@ int pud_huge(pud_t pud)
#endif
}
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
@@ -58,15 +68,107 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
return CONT_PTES;
}
+static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
+{
+ int contig_ptes = 0;
+
+ *pgsize = size;
+
+ switch (size) {
+#ifdef CONFIG_ARM64_4K_PAGES
+ case PUD_SIZE:
+#endif
+ case PMD_SIZE:
+ contig_ptes = 1;
+ break;
+ case CONT_PMD_SIZE:
+ *pgsize = PMD_SIZE;
+ contig_ptes = CONT_PMDS;
+ break;
+ case CONT_PTE_SIZE:
+ *pgsize = PAGE_SIZE;
+ contig_ptes = CONT_PTES;
+ break;
+ }
+
+ return contig_ptes;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step.
+ */
+static pte_t get_clear_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ struct vm_area_struct vma = { .vm_mm = mm };
+ pte_t orig_pte = huge_ptep_get(ptep);
+ bool valid = pte_valid(orig_pte);
+ unsigned long i, saddr = addr;
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+ pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+ /*
+ * If HW_AFDBM is enabled, then the HW could turn on
+ * the dirty bit for any page in the set, so check
+ * them all. All hugetlb entries are already young.
+ */
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+ }
+
+ if (valid)
+ flush_tlb_range(&vma, saddr, addr);
+ return orig_pte;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step for use cases where the
+ * original pte is not needed.
+ */
+static void clear_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ struct vm_area_struct vma = { .vm_mm = mm };
+ unsigned long i, saddr = addr;
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+ pte_clear(mm, addr, ptep);
+
+ flush_tlb_range(&vma, saddr, addr);
+}
+
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
size_t pgsize;
int i;
int ncontig;
- unsigned long pfn;
+ unsigned long pfn, dpfn;
pgprot_t hugeprot;
+ /*
+ * Code needs to be expanded to handle huge swap and migration
+ * entries. Needed for HUGETLB and MEMORY_FAILURE.
+ */
+ WARN_ON(!pte_present(pte));
+
if (!pte_cont(pte)) {
set_pte_at(mm, addr, ptep, pte);
return;
@@ -74,17 +176,30 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
pfn = pte_pfn(pte);
- hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
- for (i = 0; i < ncontig; i++) {
+ dpfn = pgsize >> PAGE_SHIFT;
+ hugeprot = pte_pgprot(pte);
+
+ clear_flush(mm, addr, ptep, pgsize, ncontig);
+
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
pte_val(pfn_pte(pfn, hugeprot)));
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
- ptep++;
- pfn += pgsize >> PAGE_SHIFT;
- addr += pgsize;
}
}
+void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned long sz)
+{
+ int i, ncontig;
+ size_t pgsize;
+
+ ncontig = num_contig_ptes(sz, &pgsize);
+
+ for (i = 0; i < ncontig; i++, ptep++)
+ set_pte(ptep, pte);
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
@@ -144,19 +259,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ if (sz != PUD_SIZE && pud_none(*pud))
return NULL;
- /* swap or huge page */
- if (!pud_present(*pud) || pud_huge(*pud))
+ /* hugepage or swap? */
+ if (pud_huge(*pud) || !pud_present(*pud))
return (pte_t *)pud;
/* table; check the next level */
+ if (sz == CONT_PMD_SIZE)
+ addr &= CONT_PMD_MASK;
+
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
+ pmd_none(*pmd))
return NULL;
- if (!pmd_present(*pmd) || pmd_huge(*pmd))
+ if (pmd_huge(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
+ if (sz == CONT_PTE_SIZE) {
+ pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
+ return pte;
+ }
+
return NULL;
}
@@ -176,111 +300,133 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
return entry;
}
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
+{
+ int i, ncontig;
+ size_t pgsize;
+
+ ncontig = num_contig_ptes(sz, &pgsize);
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+ pte_clear(mm, addr, ptep);
+}
+
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte;
+ int ncontig;
+ size_t pgsize;
+ pte_t orig_pte = huge_ptep_get(ptep);
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize;
- bool is_dirty = false;
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- /* save the 1st pte to return */
- pte = ptep_get_and_clear(mm, addr, ptep);
- for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
- /*
- * If HW_AFDBM is enabled, then the HW could
- * turn on the dirty bit for any of the page
- * in the set, so check them all.
- */
- ++ptep;
- if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
- is_dirty = true;
- }
- if (is_dirty)
- return pte_mkdirty(pte);
- else
- return pte;
- } else {
+ if (!pte_cont(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
- }
+
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+
+ return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- if (pte_cont(pte)) {
- int ncontig, i, changed = 0;
- size_t pgsize = 0;
- unsigned long pfn = pte_pfn(pte);
- /* Select all bits except the pfn */
- pgprot_t hugeprot =
- __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
- pte_val(pte));
-
- pfn = pte_pfn(pte);
- ncontig = find_num_contig(vma->vm_mm, addr, ptep,
- &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
- changed |= ptep_set_access_flags(vma, addr, ptep,
- pfn_pte(pfn,
- hugeprot),
- dirty);
- pfn += pgsize >> PAGE_SHIFT;
- }
- return changed;
- } else {
+ int ncontig, i, changed = 0;
+ size_t pgsize = 0;
+ unsigned long pfn = pte_pfn(pte), dpfn;
+ pgprot_t hugeprot;
+ pte_t orig_pte;
+
+ if (!pte_cont(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- }
+
+ ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+ dpfn = pgsize >> PAGE_SHIFT;
+
+ orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+ if (!pte_same(orig_pte, pte))
+ changed = 1;
+
+ /* Make sure we don't lose the dirty state */
+ if (pte_dirty(orig_pte))
+ pte = pte_mkdirty(pte);
+
+ hugeprot = pte_pgprot(pte);
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+ set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
+
+ return changed;
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize = 0;
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
- ptep_set_wrprotect(mm, addr, ptep);
- } else {
+ unsigned long pfn, dpfn;
+ pgprot_t hugeprot;
+ int ncontig, i;
+ size_t pgsize;
+ pte_t pte;
+
+ if (!pte_cont(*ptep)) {
ptep_set_wrprotect(mm, addr, ptep);
+ return;
}
+
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ dpfn = pgsize >> PAGE_SHIFT;
+
+ pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+ pte = pte_wrprotect(pte);
+
+ hugeprot = pte_pgprot(pte);
+ pfn = pte_pfn(pte);
+
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+ set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}
void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize = 0;
-
- ncontig = find_num_contig(vma->vm_mm, addr, ptep,
- &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
- ptep_clear_flush(vma, addr, ptep);
- } else {
+ size_t pgsize;
+ int ncontig;
+
+ if (!pte_cont(*ptep)) {
ptep_clear_flush(vma, addr, ptep);
+ return;
}
+
+ ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+ clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
}
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
- if (ps == PMD_SIZE) {
- hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (ps == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else {
- hugetlb_bad_size();
- pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
- return 0;
+ switch (ps) {
+#ifdef CONFIG_ARM64_4K_PAGES
+ case PUD_SIZE:
+#endif
+ case PMD_SIZE * CONT_PMDS:
+ case PMD_SIZE:
+ case PAGE_SIZE * CONT_PTES:
+ hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
+ return 1;
}
- return 1;
+
+ hugetlb_bad_size();
+ pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
+ return 0;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+ if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+ hugetlb_add_hstate(CONT_PTE_SHIFT);
+ return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif