diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 109 |
1 files changed, 91 insertions, 18 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 24ad53b4dfc0..6ecd1045113b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -326,7 +326,7 @@ static struct attribute *hugepage_attr[] = { &defrag_attr.attr, &use_zero_page_attr.attr, &hpage_pmd_size_attr.attr, -#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) +#ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, #endif #ifdef CONFIG_DEBUG_VM @@ -597,6 +597,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); + count_vm_event(THP_FAULT_FALLBACK_CHARGE); return VM_FAULT_FALLBACK; } @@ -824,11 +825,24 @@ out_unlock: pte_free(mm, pgtable); } -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) +/** + * vmf_insert_pfn_pmd_prot - insert a pmd size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pmd size pfn. See vmf_insert_pfn() for additional info and + * also consult the vmf_insert_mixed_prot() documentation when + * @pgprot != @vmf->vma->vm_page_prot. + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write) { unsigned long addr = vmf->address & PMD_MASK; struct vm_area_struct *vma = vmf->vma; - pgprot_t pgprot = vma->vm_page_prot; pgtable_t pgtable = NULL; /* @@ -856,7 +870,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } -EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) @@ -902,11 +916,24 @@ out_unlock: spin_unlock(ptl); } -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) +/** + * vmf_insert_pfn_pud_prot - insert a pud size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pud size pfn. See vmf_insert_pfn() for additional info and + * also consult the vmf_insert_mixed_prot() documentation when + * @pgprot != @vmf->vma->vm_page_prot. + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write) { unsigned long addr = vmf->address & PUD_MASK; struct vm_area_struct *vma = vmf->vma; - pgprot_t pgprot = vma->vm_page_prot; /* * If we had pud_special, we could avoid all these restrictions, @@ -927,7 +954,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write); return VM_FAULT_NOPAGE; } -EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); +EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud_prot); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -958,6 +985,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, */ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); + /* FOLL_GET and FOLL_PIN are mutually exclusive. */ + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) + return NULL; + if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; @@ -973,7 +1005,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, * device mapped pages can only be returned if the * caller will manage the page reference count. */ - if (!(flags & FOLL_GET)) + if (!(flags & (FOLL_GET | FOLL_PIN))) return ERR_PTR(-EEXIST); pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; @@ -981,7 +1013,8 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); - get_page(page); + if (!try_grab_page(page, flags)) + page = ERR_PTR(-ENOMEM); return page; } @@ -1011,6 +1044,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = -EAGAIN; pmd = *src_pmd; + /* + * Make sure the _PAGE_UFFD_WP bit is cleared if the new VMA + * does not have the VM_UFFD_WP, which means that the uffd + * fork event is not enabled. + */ + if (!(vma->vm_flags & VM_UFFD_WP)) + pmd = pmd_clear_uffd_wp(pmd); + #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION if (unlikely(is_swap_pmd(pmd))) { swp_entry_t entry = pmd_to_swp_entry(pmd); @@ -1101,6 +1142,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, if (flags & FOLL_WRITE && !pud_write(*pud)) return NULL; + /* FOLL_GET and FOLL_PIN are mutually exclusive. */ + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) + return NULL; + if (pud_present(*pud) && pud_devmap(*pud)) /* pass */; else @@ -1112,8 +1158,10 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, /* * device mapped pages can only be returned if the * caller will manage the page reference count. + * + * At least one of FOLL_GET | FOLL_PIN must be set, so assert that here: */ - if (!(flags & FOLL_GET)) + if (!(flags & (FOLL_GET | FOLL_PIN))) return ERR_PTR(-EEXIST); pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; @@ -1121,7 +1169,8 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); - get_page(page); + if (!try_grab_page(page, flags)) + page = ERR_PTR(-ENOMEM); return page; } @@ -1406,6 +1455,7 @@ alloc: put_page(page); ret |= VM_FAULT_FALLBACK; count_vm_event(THP_FAULT_FALLBACK); + count_vm_event(THP_FAULT_FALLBACK_CHARGE); goto out; } @@ -1497,8 +1547,13 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + + if (!try_grab_page(page, flags)) + return ERR_PTR(-ENOMEM); + if (flags & FOLL_TOUCH) touch_pmd(vma, addr, pmd, flags); + if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid @@ -1535,8 +1590,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, skip_mlock: page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); - if (flags & FOLL_GET) - get_page(page); out: return page; @@ -1802,7 +1855,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); - if (vma_is_dax(vma)) { + if (vma_is_special_huge(vma)) { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); @@ -1934,13 +1987,16 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, * - HPAGE_PMD_NR is protections changed and TLB flush necessary */ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, pgprot_t newprot, int prot_numa) + unsigned long addr, pgprot_t newprot, unsigned long cp_flags) { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; pmd_t entry; bool preserve_write; int ret; + bool prot_numa = cp_flags & MM_CP_PROT_NUMA; + bool uffd_wp = cp_flags & MM_CP_UFFD_WP; + bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; ptl = __pmd_trans_huge_lock(pmd, vma); if (!ptl) @@ -2007,6 +2063,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, entry = pmd_modify(entry, newprot); if (preserve_write) entry = pmd_mk_savedwrite(entry); + if (uffd_wp) { + entry = pmd_wrprotect(entry); + entry = pmd_mkuffd_wp(entry); + } else if (uffd_wp_resolve) { + /* + * Leave the write bit to be handled by PF interrupt + * handler, then things like COW could be properly + * handled. + */ + entry = pmd_clear_uffd_wp(entry); + } ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); @@ -2066,7 +2133,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, */ pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm); tlb_remove_pud_tlb_entry(tlb, pud, addr); - if (vma_is_dax(vma)) { + if (vma_is_special_huge(vma)) { spin_unlock(ptl); /* No zero page support yet */ } else { @@ -2155,7 +2222,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct page *page; pgtable_t pgtable; pmd_t old_pmd, _pmd; - bool young, write, soft_dirty, pmd_migration = false; + bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false; unsigned long addr; int i; @@ -2175,7 +2242,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, */ if (arch_needs_pgtable_deposit()) zap_deposited_table(mm, pmd); - if (vma_is_dax(vma)) + if (vma_is_special_huge(vma)) return; page = pmd_page(_pmd); if (!PageDirty(page) && pmd_dirty(_pmd)) @@ -2230,6 +2297,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, write = is_write_migration_entry(entry); young = false; soft_dirty = pmd_swp_soft_dirty(old_pmd); + uffd_wp = pmd_swp_uffd_wp(old_pmd); } else { page = pmd_page(old_pmd); if (pmd_dirty(old_pmd)) @@ -2237,6 +2305,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, write = pmd_write(old_pmd); young = pmd_young(old_pmd); soft_dirty = pmd_soft_dirty(old_pmd); + uffd_wp = pmd_uffd_wp(old_pmd); } VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); @@ -2261,6 +2330,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = swp_entry_to_pte(swp_entry); if (soft_dirty) entry = pte_swp_mksoft_dirty(entry); + if (uffd_wp) + entry = pte_swp_mkuffd_wp(entry); } else { entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); entry = maybe_mkwrite(entry, vma); @@ -2270,6 +2341,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_mkold(entry); if (soft_dirty) entry = pte_mksoft_dirty(entry); + if (uffd_wp) + entry = pte_mkuffd_wp(entry); } pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); |