aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c204
1 files changed, 130 insertions, 74 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 098f00d05461..15322b73636b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -63,6 +63,7 @@
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
+#include <linux/dax.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
@@ -789,6 +790,46 @@ out:
return pfn_to_page(pfn);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t pmd)
+{
+ unsigned long pfn = pmd_pfn(pmd);
+
+ /*
+ * There is no pmd_special() but there may be special pmds, e.g.
+ * in a direct-access (dax) mapping, so let's just replicate the
+ * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+ */
+ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+ if (vma->vm_flags & VM_MIXEDMAP) {
+ if (!pfn_valid(pfn))
+ return NULL;
+ goto out;
+ } else {
+ unsigned long off;
+ off = (addr - vma->vm_start) >> PAGE_SHIFT;
+ if (pfn == vma->vm_pgoff + off)
+ return NULL;
+ if (!is_cow_mapping(vma->vm_flags))
+ return NULL;
+ }
+ }
+
+ if (is_zero_pfn(pfn))
+ return NULL;
+ if (unlikely(pfn > highest_memmap_pfn))
+ return NULL;
+
+ /*
+ * NOTE! We still have PageReserved() pages in the page tables.
+ * eg. VDSO mappings can cause them to exist.
+ */
+out:
+ return pfn_to_page(pfn);
+}
+#endif
+
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
@@ -1182,15 +1223,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) {
-#ifdef CONFIG_DEBUG_VM
- if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
- pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
- __func__, addr, end,
- vma->vm_start,
- vma->vm_end);
- BUG();
- }
-#endif
+ VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
+ !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
split_huge_pmd(vma, pmd, addr);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
@@ -1711,6 +1745,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long next;
unsigned long end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
+ unsigned long remap_pfn = pfn;
int err;
/*
@@ -1737,7 +1772,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
vma->vm_pgoff = pfn;
}
- err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+ err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
if (err)
return -EINVAL;
@@ -1756,7 +1791,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
} while (pgd++, addr = next, addr != end);
if (err)
- untrack_pfn(vma, pfn, PAGE_ALIGN(size));
+ untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
return err;
}
@@ -2054,7 +2089,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
VM_BUG_ON_PAGE(PageAnon(page), page);
mapping = page->mapping;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if ((dirtied || page_mkwrite) && mapping) {
/*
@@ -2188,7 +2223,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
}
if (new_page)
- page_cache_release(new_page);
+ put_page(new_page);
pte_unmap_unlock(page_table, ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -2203,14 +2238,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
munlock_vma_page(old_page);
unlock_page(old_page);
}
- page_cache_release(old_page);
+ put_page(old_page);
}
return page_copied ? VM_FAULT_WRITE : 0;
oom_free_new:
- page_cache_release(new_page);
+ put_page(new_page);
oom:
if (old_page)
- page_cache_release(old_page);
+ put_page(old_page);
return VM_FAULT_OOM;
}
@@ -2258,7 +2293,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
{
int page_mkwrite = 0;
- page_cache_get(old_page);
+ get_page(old_page);
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp;
@@ -2267,7 +2302,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
tmp = do_page_mkwrite(vma, old_page, address);
if (unlikely(!tmp || (tmp &
(VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
- page_cache_release(old_page);
+ put_page(old_page);
return tmp;
}
/*
@@ -2281,7 +2316,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_same(*page_table, orig_pte)) {
unlock_page(old_page);
pte_unmap_unlock(page_table, ptl);
- page_cache_release(old_page);
+ put_page(old_page);
return 0;
}
page_mkwrite = 1;
@@ -2340,8 +2375,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* not dirty accountable.
*/
if (PageAnon(old_page) && !PageKsm(old_page)) {
+ int total_mapcount;
if (!trylock_page(old_page)) {
- page_cache_get(old_page);
+ get_page(old_page);
pte_unmap_unlock(page_table, ptl);
lock_page(old_page);
page_table = pte_offset_map_lock(mm, pmd, address,
@@ -2349,18 +2385,23 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_same(*page_table, orig_pte)) {
unlock_page(old_page);
pte_unmap_unlock(page_table, ptl);
- page_cache_release(old_page);
+ put_page(old_page);
return 0;
}
- page_cache_release(old_page);
+ put_page(old_page);
}
- if (reuse_swap_page(old_page)) {
- /*
- * The page is all ours. Move it to our anon_vma so
- * the rmap code will not search our parent or siblings.
- * Protected against the rmap code by the page lock.
- */
- page_move_anon_rmap(old_page, vma, address);
+ if (reuse_swap_page(old_page, &total_mapcount)) {
+ if (total_mapcount == 1) {
+ /*
+ * The page is all ours. Move it to
+ * our anon_vma so the rmap code will
+ * not search our parent or siblings.
+ * Protected against the rmap code by
+ * the page lock.
+ */
+ page_move_anon_rmap(compound_head(old_page),
+ vma, address);
+ }
unlock_page(old_page);
return wp_page_reuse(mm, vma, address, page_table, ptl,
orig_pte, old_page, 0, 0);
@@ -2375,7 +2416,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
/*
* Ok, we need to copy. Oh, well..
*/
- page_cache_get(old_page);
+ get_page(old_page);
pte_unmap_unlock(page_table, ptl);
return wp_page_copy(mm, vma, address, page_table, pmd,
@@ -2400,7 +2441,6 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
vba = vma->vm_pgoff;
vea = vba + vma_pages(vma) - 1;
- /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
zba = details->first_index;
if (zba < vba)
zba = vba;
@@ -2453,8 +2493,6 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
-
- /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
@@ -2585,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
dec_mm_counter_fast(mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
- if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+ if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
@@ -2619,7 +2657,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* parallel locked swapcache.
*/
unlock_page(swapcache);
- page_cache_release(swapcache);
+ put_page(swapcache);
}
if (flags & FAULT_FLAG_WRITE) {
@@ -2641,10 +2679,10 @@ out_nomap:
out_page:
unlock_page(page);
out_release:
- page_cache_release(page);
+ put_page(page);
if (page != swapcache) {
unlock_page(swapcache);
- page_cache_release(swapcache);
+ put_page(swapcache);
}
return ret;
}
@@ -2752,7 +2790,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(page_table, ptl);
mem_cgroup_cancel_charge(page, memcg, false);
- page_cache_release(page);
+ put_page(page);
return handle_userfault(vma, address, flags,
VM_UFFD_MISSING);
}
@@ -2771,10 +2809,10 @@ unlock:
return 0;
release:
mem_cgroup_cancel_charge(page, memcg, false);
- page_cache_release(page);
+ put_page(page);
goto unlock;
oom_free_page:
- page_cache_release(page);
+ put_page(page);
oom:
return VM_FAULT_OOM;
}
@@ -2786,7 +2824,8 @@ oom:
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags,
- struct page *cow_page, struct page **page)
+ struct page *cow_page, struct page **page,
+ void **entry)
{
struct vm_fault vmf;
int ret;
@@ -2801,13 +2840,15 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
- if (!vmf.page)
- goto out;
+ if (ret & VM_FAULT_DAX_LOCKED) {
+ *entry = vmf.entry;
+ return ret;
+ }
if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
unlock_page(vmf.page);
- page_cache_release(vmf.page);
+ put_page(vmf.page);
return VM_FAULT_HWPOISON;
}
@@ -2816,7 +2857,6 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
- out:
*page = vmf.page;
return ret;
}
@@ -2837,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
* vm_ops->map_pages.
*/
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
- struct page *page, pte_t *pte, bool write, bool anon)
+ struct page *page, pte_t *pte, bool write, bool anon, bool old)
{
pte_t entry;
@@ -2845,6 +2885,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = mk_pte(page, vma->vm_page_prot);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (old)
+ entry = pte_mkold(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address, false);
@@ -2858,8 +2900,16 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
+/*
+ * If architecture emulates "accessed" or "young" bit without HW support,
+ * there is no much gain with fault_around.
+ */
static unsigned long fault_around_bytes __read_mostly =
+#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ PAGE_SIZE;
+#else
rounddown_pow_of_two(65536);
+#endif
#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
@@ -2982,13 +3032,24 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
- do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
goto unlock_out;
+ do_fault_around(vma, address, pte, pgoff, flags);
+ /* Check if the fault is handled by faultaround */
+ if (!pte_same(*pte, orig_pte)) {
+ /*
+ * Faultaround produce old pte, but the pte we've
+ * handler fault for should be young.
+ */
+ pte_t entry = pte_mkyoung(*pte);
+ if (ptep_set_access_flags(vma, address, pte, entry, 0))
+ update_mmu_cache(vma, address, pte);
+ goto unlock_out;
+ }
pte_unmap_unlock(pte, ptl);
}
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -2996,10 +3057,10 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
- page_cache_release(fault_page);
+ put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, false, false);
+ do_set_pte(vma, address, fault_page, pte, false, false, false);
unlock_page(fault_page);
unlock_out:
pte_unmap_unlock(pte, ptl);
@@ -3011,6 +3072,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ void *fault_entry;
struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
@@ -3024,51 +3086,45 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
- page_cache_release(new_page);
+ put_page(new_page);
return VM_FAULT_OOM;
}
- ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
+ &fault_entry);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;
- if (fault_page)
+ if (!(ret & VM_FAULT_DAX_LOCKED))
copy_user_highpage(new_page, fault_page, address, vma);
__SetPageUptodate(new_page);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
- if (fault_page) {
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
- page_cache_release(fault_page);
+ put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping,
+ pgoff);
}
goto uncharge_out;
}
- do_set_pte(vma, address, new_page, pte, true, true);
+ do_set_pte(vma, address, new_page, pte, true, true, false);
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
- if (fault_page) {
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
- page_cache_release(fault_page);
+ put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
}
return ret;
uncharge_out:
mem_cgroup_cancel_charge(new_page, memcg, false);
- page_cache_release(new_page);
+ put_page(new_page);
return ret;
}
@@ -3083,7 +3139,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -3096,7 +3152,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
tmp = do_page_mkwrite(vma, fault_page, address);
if (unlikely(!tmp ||
(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
- page_cache_release(fault_page);
+ put_page(fault_page);
return tmp;
}
}
@@ -3105,10 +3161,10 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
- page_cache_release(fault_page);
+ put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, true, false);
+ do_set_pte(vma, address, fault_page, pte, true, false, false);
pte_unmap_unlock(pte, ptl);
if (set_page_dirty(fault_page))
@@ -3736,7 +3792,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
buf, maddr + offset, bytes);
}
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
len -= bytes;
buf += bytes;