aboutsummaryrefslogtreecommitdiffstats
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c674
1 files changed, 435 insertions, 239 deletions
diff --git a/mm/gup.c b/mm/gup.c
index ddde097cf9e4..43b7d875de37 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -134,6 +134,7 @@ void put_user_pages(struct page **pages, unsigned long npages)
}
EXPORT_SYMBOL(put_user_pages);
+#ifdef CONFIG_MMU
static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags)
{
@@ -515,7 +516,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
* an error pointer if there is a mapping to something not represented
* by a page descriptor (see also vm_normal_page()).
*/
-struct page *follow_page_mask(struct vm_area_struct *vma,
+static struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
struct follow_page_context *ctx)
{
@@ -585,11 +586,14 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
pgd = pgd_offset_k(address);
else
pgd = pgd_offset_gate(mm, address);
- BUG_ON(pgd_none(*pgd));
+ if (pgd_none(*pgd))
+ return -EFAULT;
p4d = p4d_offset(pgd, address);
- BUG_ON(p4d_none(*p4d));
+ if (p4d_none(*p4d))
+ return -EFAULT;
pud = pud_offset(p4d, address);
- BUG_ON(pud_none(*pud));
+ if (pud_none(*pud))
+ return -EFAULT;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return -EFAULT;
@@ -1101,86 +1105,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
}
/*
- * We can leverage the VM_FAULT_RETRY functionality in the page fault
- * paths better by using either get_user_pages_locked() or
- * get_user_pages_unlocked().
- *
- * get_user_pages_locked() is suitable to replace the form:
- *
- * down_read(&mm->mmap_sem);
- * do_something()
- * get_user_pages(tsk, mm, ..., pages, NULL);
- * up_read(&mm->mmap_sem);
- *
- * to:
- *
- * int locked = 1;
- * down_read(&mm->mmap_sem);
- * do_something()
- * get_user_pages_locked(tsk, mm, ..., pages, &locked);
- * if (locked)
- * up_read(&mm->mmap_sem);
- */
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- int *locked)
-{
- /*
- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. As there are no users of this flag in this call we simply
- * disallow this option for now.
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
- return -EINVAL;
-
- return __get_user_pages_locked(current, current->mm, start, nr_pages,
- pages, NULL, locked,
- gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(get_user_pages_locked);
-
-/*
- * get_user_pages_unlocked() is suitable to replace the form:
- *
- * down_read(&mm->mmap_sem);
- * get_user_pages(tsk, mm, ..., pages, NULL);
- * up_read(&mm->mmap_sem);
- *
- * with:
- *
- * get_user_pages_unlocked(tsk, mm, ..., pages);
- *
- * It is functionally equivalent to get_user_pages_fast so
- * get_user_pages_fast should be used instead if specific gup_flags
- * (e.g. FOLL_FORCE) are not required.
- */
-long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- struct page **pages, unsigned int gup_flags)
-{
- struct mm_struct *mm = current->mm;
- int locked = 1;
- long ret;
-
- /*
- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. As there are no users of this flag in this call we simply
- * disallow this option for now.
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
- return -EINVAL;
-
- down_read(&mm->mmap_sem);
- ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
- &locked, gup_flags | FOLL_TOUCH);
- if (locked)
- up_read(&mm->mmap_sem);
- return ret;
-}
-EXPORT_SYMBOL(get_user_pages_unlocked);
-
-/*
* get_user_pages_remote() - pin user pages in memory
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
@@ -1256,6 +1180,198 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
}
EXPORT_SYMBOL(get_user_pages_remote);
+/**
+ * populate_vma_page_range() - populate a range of pages in the vma.
+ * @vma: target vma
+ * @start: start address
+ * @end: end address
+ * @nonblocking:
+ *
+ * This takes care of mlocking the pages too if VM_LOCKED is set.
+ *
+ * return 0 on success, negative error code on error.
+ *
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released. If it's released, *@nonblocking will be set to 0.
+ */
+long populate_vma_page_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, int *nonblocking)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long nr_pages = (end - start) / PAGE_SIZE;
+ int gup_flags;
+
+ VM_BUG_ON(start & ~PAGE_MASK);
+ VM_BUG_ON(end & ~PAGE_MASK);
+ VM_BUG_ON_VMA(start < vma->vm_start, vma);
+ VM_BUG_ON_VMA(end > vma->vm_end, vma);
+ VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
+
+ gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+ if (vma->vm_flags & VM_LOCKONFAULT)
+ gup_flags &= ~FOLL_POPULATE;
+ /*
+ * We want to touch writable mappings with a write fault in order
+ * to break COW, except for shared mappings because these don't COW
+ * and we would not want to dirty them for nothing.
+ */
+ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+ gup_flags |= FOLL_WRITE;
+
+ /*
+ * We want mlock to succeed for regions that have any permissions
+ * other than PROT_NONE.
+ */
+ if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
+ gup_flags |= FOLL_FORCE;
+
+ /*
+ * We made sure addr is within a VMA, so the following will
+ * not result in a stack expansion that recurses back here.
+ */
+ return __get_user_pages(current, mm, start, nr_pages, gup_flags,
+ NULL, NULL, nonblocking);
+}
+
+/*
+ * __mm_populate - populate and/or mlock pages within a range of address space.
+ *
+ * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
+ * flags. VMAs must be already marked with the desired vm_flags, and
+ * mmap_sem must not be held.
+ */
+int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long end, nstart, nend;
+ struct vm_area_struct *vma = NULL;
+ int locked = 0;
+ long ret = 0;
+
+ end = start + len;
+
+ for (nstart = start; nstart < end; nstart = nend) {
+ /*
+ * We want to fault in pages for [nstart; end) address range.
+ * Find first corresponding VMA.
+ */
+ if (!locked) {
+ locked = 1;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, nstart);
+ } else if (nstart >= vma->vm_end)
+ vma = vma->vm_next;
+ if (!vma || vma->vm_start >= end)
+ break;
+ /*
+ * Set [nstart; nend) to intersection of desired address
+ * range with the first VMA. Also, skip undesirable VMA types.
+ */
+ nend = min(end, vma->vm_end);
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+ continue;
+ if (nstart < vma->vm_start)
+ nstart = vma->vm_start;
+ /*
+ * Now fault in a range of pages. populate_vma_page_range()
+ * double checks the vma flags, so that it won't mlock pages
+ * if the vma was already munlocked.
+ */
+ ret = populate_vma_page_range(vma, nstart, nend, &locked);
+ if (ret < 0) {
+ if (ignore_errors) {
+ ret = 0;
+ continue; /* continue at next VMA */
+ }
+ break;
+ }
+ nend = nstart + ret * PAGE_SIZE;
+ ret = 0;
+ }
+ if (locked)
+ up_read(&mm->mmap_sem);
+ return ret; /* 0 or negative error code */
+}
+
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+ struct vm_area_struct *vma;
+ struct page *page;
+
+ if (__get_user_pages(current, current->mm, addr, 1,
+ FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
+ NULL) < 1)
+ return NULL;
+ flush_cache_page(vma, addr, page_to_pfn(page));
+ return page;
+}
+#endif /* CONFIG_ELF_CORE */
+#else /* CONFIG_MMU */
+static long __get_user_pages_locked(struct task_struct *tsk,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long nr_pages, struct page **pages,
+ struct vm_area_struct **vmas, int *locked,
+ unsigned int foll_flags)
+{
+ struct vm_area_struct *vma;
+ unsigned long vm_flags;
+ int i;
+
+ /* calculate required read or write permissions.
+ * If FOLL_FORCE is set, we only require the "MAY" flags.
+ */
+ vm_flags = (foll_flags & FOLL_WRITE) ?
+ (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+ vm_flags &= (foll_flags & FOLL_FORCE) ?
+ (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+
+ for (i = 0; i < nr_pages; i++) {
+ vma = find_vma(mm, start);
+ if (!vma)
+ goto finish_or_fault;
+
+ /* protect what we can, including chardevs */
+ if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+ !(vm_flags & vma->vm_flags))
+ goto finish_or_fault;
+
+ if (pages) {
+ pages[i] = virt_to_page(start);
+ if (pages[i])
+ get_page(pages[i]);
+ }
+ if (vmas)
+ vmas[i] = vma;
+ start = (start + PAGE_SIZE) & PAGE_MASK;
+ }
+
+ return i;
+
+finish_or_fault:
+ return i ? : -EFAULT;
+}
+#endif /* !CONFIG_MMU */
+
#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
{
@@ -1336,25 +1452,31 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
struct vm_area_struct **vmas,
unsigned int gup_flags)
{
- long i;
+ unsigned long i;
+ unsigned long step;
bool drain_allow = true;
bool migrate_allow = true;
LIST_HEAD(cma_page_list);
check_again:
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < nr_pages;) {
+
+ struct page *head = compound_head(pages[i]);
+
+ /*
+ * gup may start from a tail page. Advance step by the left
+ * part.
+ */
+ step = (1 << compound_order(head)) - (pages[i] - head);
/*
* If we get a page from the CMA zone, since we are going to
* be pinning these entries, we might as well move them out
* of the CMA zone if possible.
*/
- if (is_migrate_cma_page(pages[i])) {
-
- struct page *head = compound_head(pages[i]);
-
- if (PageHuge(head)) {
+ if (is_migrate_cma_page(head)) {
+ if (PageHuge(head))
isolate_huge_page(head, &cma_page_list);
- } else {
+ else {
if (!PageLRU(head) && drain_allow) {
lru_add_drain_all();
drain_allow = false;
@@ -1369,6 +1491,8 @@ check_again:
}
}
}
+
+ i += step;
}
if (!list_empty(&cma_page_list)) {
@@ -1417,7 +1541,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
{
return nr_pages;
}
-#endif
+#endif /* CONFIG_CMA */
/*
* __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
@@ -1503,155 +1627,88 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
}
EXPORT_SYMBOL(get_user_pages);
-/**
- * populate_vma_page_range() - populate a range of pages in the vma.
- * @vma: target vma
- * @start: start address
- * @end: end address
- * @nonblocking:
- *
- * This takes care of mlocking the pages too if VM_LOCKED is set.
+/*
+ * We can leverage the VM_FAULT_RETRY functionality in the page fault
+ * paths better by using either get_user_pages_locked() or
+ * get_user_pages_unlocked().
*
- * return 0 on success, negative error code on error.
+ * get_user_pages_locked() is suitable to replace the form:
*
- * vma->vm_mm->mmap_sem must be held.
+ * down_read(&mm->mmap_sem);
+ * do_something()
+ * get_user_pages(tsk, mm, ..., pages, NULL);
+ * up_read(&mm->mmap_sem);
*
- * If @nonblocking is NULL, it may be held for read or write and will
- * be unperturbed.
+ * to:
*
- * If @nonblocking is non-NULL, it must held for read only and may be
- * released. If it's released, *@nonblocking will be set to 0.
+ * int locked = 1;
+ * down_read(&mm->mmap_sem);
+ * do_something()
+ * get_user_pages_locked(tsk, mm, ..., pages, &locked);
+ * if (locked)
+ * up_read(&mm->mmap_sem);
*/
-long populate_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end, int *nonblocking)
+long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
- unsigned long nr_pages = (end - start) / PAGE_SIZE;
- int gup_flags;
-
- VM_BUG_ON(start & ~PAGE_MASK);
- VM_BUG_ON(end & ~PAGE_MASK);
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
-
- gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
- if (vma->vm_flags & VM_LOCKONFAULT)
- gup_flags &= ~FOLL_POPULATE;
- /*
- * We want to touch writable mappings with a write fault in order
- * to break COW, except for shared mappings because these don't COW
- * and we would not want to dirty them for nothing.
- */
- if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
- gup_flags |= FOLL_WRITE;
-
/*
- * We want mlock to succeed for regions that have any permissions
- * other than PROT_NONE.
+ * FIXME: Current FOLL_LONGTERM behavior is incompatible with
+ * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
+ * vmas. As there are no users of this flag in this call we simply
+ * disallow this option for now.
*/
- if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
- gup_flags |= FOLL_FORCE;
+ if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+ return -EINVAL;
- /*
- * We made sure addr is within a VMA, so the following will
- * not result in a stack expansion that recurses back here.
- */
- return __get_user_pages(current, mm, start, nr_pages, gup_flags,
- NULL, NULL, nonblocking);
+ return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ pages, NULL, locked,
+ gup_flags | FOLL_TOUCH);
}
+EXPORT_SYMBOL(get_user_pages_locked);
/*
- * __mm_populate - populate and/or mlock pages within a range of address space.
+ * get_user_pages_unlocked() is suitable to replace the form:
*
- * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
- * flags. VMAs must be already marked with the desired vm_flags, and
- * mmap_sem must not be held.
+ * down_read(&mm->mmap_sem);
+ * get_user_pages(tsk, mm, ..., pages, NULL);
+ * up_read(&mm->mmap_sem);
+ *
+ * with:
+ *
+ * get_user_pages_unlocked(tsk, mm, ..., pages);
+ *
+ * It is functionally equivalent to get_user_pages_fast so
+ * get_user_pages_fast should be used instead if specific gup_flags
+ * (e.g. FOLL_FORCE) are not required.
*/
-int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
+long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+ struct page **pages, unsigned int gup_flags)
{
struct mm_struct *mm = current->mm;
- unsigned long end, nstart, nend;
- struct vm_area_struct *vma = NULL;
- int locked = 0;
- long ret = 0;
+ int locked = 1;
+ long ret;
- end = start + len;
+ /*
+ * FIXME: Current FOLL_LONGTERM behavior is incompatible with
+ * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
+ * vmas. As there are no users of this flag in this call we simply
+ * disallow this option for now.
+ */
+ if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+ return -EINVAL;
- for (nstart = start; nstart < end; nstart = nend) {
- /*
- * We want to fault in pages for [nstart; end) address range.
- * Find first corresponding VMA.
- */
- if (!locked) {
- locked = 1;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, nstart);
- } else if (nstart >= vma->vm_end)
- vma = vma->vm_next;
- if (!vma || vma->vm_start >= end)
- break;
- /*
- * Set [nstart; nend) to intersection of desired address
- * range with the first VMA. Also, skip undesirable VMA types.
- */
- nend = min(end, vma->vm_end);
- if (vma->vm_flags & (VM_IO | VM_PFNMAP))
- continue;
- if (nstart < vma->vm_start)
- nstart = vma->vm_start;
- /*
- * Now fault in a range of pages. populate_vma_page_range()
- * double checks the vma flags, so that it won't mlock pages
- * if the vma was already munlocked.
- */
- ret = populate_vma_page_range(vma, nstart, nend, &locked);
- if (ret < 0) {
- if (ignore_errors) {
- ret = 0;
- continue; /* continue at next VMA */
- }
- break;
- }
- nend = nstart + ret * PAGE_SIZE;
- ret = 0;
- }
+ down_read(&mm->mmap_sem);
+ ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
+ &locked, gup_flags | FOLL_TOUCH);
if (locked)
up_read(&mm->mmap_sem);
- return ret; /* 0 or negative error code */
-}
-
-/**
- * get_dump_page() - pin user page in memory while writing it to core dump
- * @addr: user address
- *
- * Returns struct page pointer of user page pinned for dump,
- * to be freed afterwards by put_page().
- *
- * Returns NULL on any kind of failure - a hole must then be inserted into
- * the corefile, to preserve alignment with its headers; and also returns
- * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
- *
- * Called without mmap_sem, but after all other threads have been killed.
- */
-#ifdef CONFIG_ELF_CORE
-struct page *get_dump_page(unsigned long addr)
-{
- struct vm_area_struct *vma;
- struct page *page;
-
- if (__get_user_pages(current, current->mm, addr, 1,
- FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
- NULL) < 1)
- return NULL;
- flush_cache_page(vma, addr, page_to_pfn(page));
- return page;
+ return ret;
}
-#endif /* CONFIG_ELF_CORE */
+EXPORT_SYMBOL(get_user_pages_unlocked);
/*
- * Generic Fast GUP
+ * Fast GUP
*
* get_user_pages_fast attempts to pin user pages by walking the page
* tables directly and avoids taking locks. Thus the walker needs to be
@@ -1683,20 +1740,64 @@ struct page *get_dump_page(unsigned long addr)
*
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
-#ifdef CONFIG_HAVE_GENERIC_GUP
+#ifdef CONFIG_HAVE_FAST_GUP
+#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking any
+ * locks. For this we would like to load the pointers atomically, but sometimes
+ * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
+ * we do have is the guarantee that a PTE will only either go from not present
+ * to present, or present to not present or both -- it will not switch to a
+ * completely different present page without a TLB flush in between; something
+ * that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
+ * We load pte_high *after* loading pte_low, which ensures we don't see an older
+ * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
+ * picked up a changed pte high. We might have gotten rubbish values from
+ * pte_low and pte_high, but we are guaranteed that pte_low will not have the
+ * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
+ * operates on present ptes we're safe.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+ pte_t pte;
-#ifndef gup_get_pte
+ do {
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ } while (unlikely(pte.pte_low != ptep->pte_low));
+
+ return pte;
+}
+#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
/*
- * We assume that the PTE can be read atomically. If this is not the case for
- * your architecture, please provide the helper.
+ * We require that the PTE can be read atomically.
*/
static inline pte_t gup_get_pte(pte_t *ptep)
{
return READ_ONCE(*ptep);
}
-#endif
+#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
-static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
+ struct page **pages)
{
while ((*nr) - nr_start) {
struct page *page = pages[--(*nr)];
@@ -1877,6 +1978,90 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
}
#endif
+#ifdef CONFIG_ARCH_HAS_HUGEPD
+static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
+ unsigned long sz)
+{
+ unsigned long __boundary = (addr + sz) & ~(sz-1);
+ return (__boundary - 1 < end - 1) ? __boundary : end;
+}
+
+static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long pte_end;
+ struct page *head, *page;
+ pte_t pte;
+ int refs;
+
+ pte_end = (addr + sz) & ~(sz-1);
+ if (pte_end < end)
+ end = pte_end;
+
+ pte = READ_ONCE(*ptep);
+
+ if (!pte_access_permitted(pte, write))
+ return 0;
+
+ /* hugepages are never "special" */
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ refs = 0;
+ head = pte_page(pte);
+
+ page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ head = try_get_compound_head(head, refs);
+ if (!head) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ /* Could be optimized better */
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ SetPageReferenced(head);
+ return 1;
+}
+
+static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+ unsigned int pdshift, unsigned long end, int write,
+ struct page **pages, int *nr)
+{
+ pte_t *ptep;
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
+ unsigned long next;
+
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ do {
+ next = hugepte_addr_end(addr, end, sz);
+ if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ return 0;
+ } while (ptep++, addr = next, addr != end);
+
+ return 1;
+}
+#else
+static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+ unsigned pdshift, unsigned long end, int write,
+ struct page **pages, int *nr)
+{
+ return 0;
+}
+#endif /* CONFIG_ARCH_HAS_HUGEPD */
+
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages, int *nr)
{
@@ -2117,19 +2302,21 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
return;
} while (pgdp++, addr = next, addr != end);
}
+#else
+static inline void gup_pgd_range(unsigned long addr, unsigned long end,
+ unsigned int flags, struct page **pages, int *nr)
+{
+}
+#endif /* CONFIG_HAVE_FAST_GUP */
#ifndef gup_fast_permitted
/*
* Check if it's allowed to use __get_user_pages_fast() for the range, or
* we need to fall back to the slow version:
*/
-bool gup_fast_permitted(unsigned long start, int nr_pages)
+static bool gup_fast_permitted(unsigned long start, unsigned long end)
{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- return end >= start;
+ return true;
}
#endif
@@ -2138,6 +2325,9 @@ bool gup_fast_permitted(unsigned long start, int nr_pages)
* the regular GUP.
* Note a difference with get_user_pages_fast: this always returns the
* number of pages pinned, 0 if no pages were pinned.
+ *
+ * If the architecture does not support this function, simply return with no
+ * pages pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
@@ -2146,10 +2336,12 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
unsigned long flags;
int nr = 0;
- start &= PAGE_MASK;
+ start = untagged_addr(start) & PAGE_MASK;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
+ if (end <= start)
+ return 0;
if (unlikely(!access_ok((void __user *)start, len)))
return 0;
@@ -2165,7 +2357,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* block IPIs that come from THPs splitting.
*/
- if (gup_fast_permitted(start, nr_pages)) {
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
+ gup_fast_permitted(start, end)) {
local_irq_save(flags);
gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
local_irq_restore(flags);
@@ -2173,6 +2366,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
return nr;
}
+EXPORT_SYMBOL_GPL(__get_user_pages_fast);
static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
@@ -2219,18 +2413,21 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned long addr, len, end;
int nr = 0, ret = 0;
- start &= PAGE_MASK;
+ if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM)))
+ return -EINVAL;
+
+ start = untagged_addr(start) & PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if (nr_pages <= 0)
+ if (end <= start)
return 0;
-
if (unlikely(!access_ok((void __user *)start, len)))
return -EFAULT;
- if (gup_fast_permitted(start, nr_pages)) {
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
+ gup_fast_permitted(start, end)) {
local_irq_disable();
gup_pgd_range(addr, end, gup_flags, pages, &nr);
local_irq_enable();
@@ -2256,5 +2453,4 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
return ret;
}
-
-#endif /* CONFIG_HAVE_GENERIC_GUP */
+EXPORT_SYMBOL_GPL(get_user_pages_fast);