aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c336
1 files changed, 194 insertions, 142 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..c467102a5cbc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -238,23 +238,13 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
__tlb_reset_range(tlb);
}
-static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
{
- if (!tlb->end)
- return;
+ struct mmu_gather_batch *batch;
- tlb_flush(tlb);
- mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
- __tlb_reset_range(tlb);
-}
-
-static void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- struct mmu_gather_batch *batch;
-
for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
free_pages_and_swap_cache(batch->pages, batch->nr);
batch->nr = 0;
@@ -330,6 +320,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
* See the comment near struct mmu_table_batch.
*/
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then we still
+ * need to RCU-sched wait while freeing the pages because software
+ * walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+#endif
+}
+
static void tlb_remove_table_smp_sync(void *arg)
{
/* Simply deliver the interrupt */
@@ -366,6 +371,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
struct mmu_table_batch **batch = &tlb->batch;
if (*batch) {
+ tlb_table_invalidate(tlb);
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
}
@@ -375,23 +381,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct mmu_table_batch **batch = &tlb->batch;
- /*
- * When there's less then two users of this mm there cannot be a
- * concurrent page-table walk.
- */
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- __tlb_remove_table(table);
- return;
- }
-
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
+ tlb_table_invalidate(tlb);
tlb_remove_table_one(table);
return;
}
(*batch)->nr = 0;
}
+
(*batch)->tables[(*batch)->nr++] = table;
if ((*batch)->nr == MAX_TABLE_BATCH)
tlb_table_flush(tlb);
@@ -853,6 +852,10 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
return NULL;
}
}
+
+ if (pte_devmap(pte))
+ return NULL;
+
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
@@ -917,6 +920,8 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
}
}
+ if (pmd_devmap(pmd))
+ return NULL;
if (is_zero_pfn(pfn))
return NULL;
if (unlikely(pfn > highest_memmap_pfn))
@@ -1017,7 +1022,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* If it's a COW mapping, write protect it both
* in the parent and the child
*/
- if (is_cow_mapping(vm_flags)) {
+ if (is_cow_mapping(vm_flags) && pte_write(pte)) {
ptep_set_wrprotect(src_mm, addr, src_pte);
pte = pte_wrprotect(pte);
}
@@ -1417,11 +1422,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
do {
next = pmd_addr_end(addr, end);
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
- if (next - addr != HPAGE_PMD_SIZE) {
- VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
- !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+ if (next - addr != HPAGE_PMD_SIZE)
__split_huge_pmd(vma, pmd, addr, false, NULL);
- } else if (zap_huge_pmd(tlb, vma, pmd, addr))
+ else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
}
@@ -1603,20 +1606,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
tlb_gather_mmu(&tlb, mm, start, end);
update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(mm, start, end);
- for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+ for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
unmap_single_vma(&tlb, vma, start, end, NULL);
-
- /*
- * zap_page_range does not specify whether mmap_sem should be
- * held for read or write. That allows parallel zap_page_range
- * operations to unmap a PTE and defer a flush meaning that
- * this call observes pte_none and fails to flush the TLB.
- * Rather than adding a complex API, ensure that no stale
- * TLB entries exist when this call returns.
- */
- flush_tlb_range(vma, start, end);
- }
-
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
@@ -1886,6 +1877,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
+ if (!pfn_modify_allowed(pfn, pgprot))
+ return -EACCES;
+
track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
@@ -1921,6 +1915,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
track_pfn_insert(vma, &pgprot, pfn);
+ if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+ return -EACCES;
+
/*
* If we don't have pte special, then we have to use the pfn_valid()
* based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1982,6 +1979,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
{
pte_t *pte;
spinlock_t *ptl;
+ int err = 0;
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
@@ -1989,12 +1987,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
arch_enter_lazy_mmu_mode();
do {
BUG_ON(!pte_none(*pte));
+ if (!pfn_modify_allowed(pfn, prot)) {
+ err = -EACCES;
+ break;
+ }
set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
- return 0;
+ return err;
}
static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -2003,6 +2005,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
{
pmd_t *pmd;
unsigned long next;
+ int err;
pfn -= addr >> PAGE_SHIFT;
pmd = pmd_alloc(mm, pud, addr);
@@ -2011,9 +2014,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
VM_BUG_ON(pmd_trans_huge(*pmd));
do {
next = pmd_addr_end(addr, end);
- if (remap_pte_range(mm, pmd, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot))
- return -ENOMEM;
+ err = remap_pte_range(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
+ return err;
} while (pmd++, addr = next, addr != end);
return 0;
}
@@ -2024,6 +2028,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
{
pud_t *pud;
unsigned long next;
+ int err;
pfn -= addr >> PAGE_SHIFT;
pud = pud_alloc(mm, p4d, addr);
@@ -2031,9 +2036,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
- if (remap_pmd_range(mm, pud, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot))
- return -ENOMEM;
+ err = remap_pmd_range(mm, pud, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
+ return err;
} while (pud++, addr = next, addr != end);
return 0;
}
@@ -2044,6 +2050,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
{
p4d_t *p4d;
unsigned long next;
+ int err;
pfn -= addr >> PAGE_SHIFT;
p4d = p4d_alloc(mm, pgd, addr);
@@ -2051,9 +2058,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
- if (remap_pud_range(mm, p4d, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot))
- return -ENOMEM;
+ err = remap_pud_range(mm, p4d, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
+ return err;
} while (p4d++, addr = next, addr != end);
return 0;
}
@@ -2369,9 +2377,9 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
*
* We do this without the lock held, so that it can sleep if it needs to.
*/
-static int do_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
{
- int ret;
+ vm_fault_t ret;
struct page *page = vmf->page;
unsigned int old_flags = vmf->flags;
@@ -2475,7 +2483,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
* held to the old page, as well as updating the rmap.
* - In any case, unlock the PTL and drop the reference we took to the old page.
*/
-static int wp_page_copy(struct vm_fault *vmf)
+static vm_fault_t wp_page_copy(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
@@ -2503,7 +2511,7 @@ static int wp_page_copy(struct vm_fault *vmf)
cow_user_page(new_page, old_page, vmf->address, vma);
}
- if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
+ if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
goto oom_free_new;
__SetPageUptodate(new_page);
@@ -2623,7 +2631,7 @@ oom:
* The function expects the page to be locked or other protection against
* concurrent faults / writeback (such as DAX radix tree locks).
*/
-int finish_mkwrite_fault(struct vm_fault *vmf)
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
{
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
@@ -2644,12 +2652,12 @@ int finish_mkwrite_fault(struct vm_fault *vmf)
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
* mapping
*/
-static int wp_pfn_shared(struct vm_fault *vmf)
+static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
- int ret;
+ vm_fault_t ret;
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
@@ -2662,7 +2670,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
return VM_FAULT_WRITE;
}
-static int wp_page_shared(struct vm_fault *vmf)
+static vm_fault_t wp_page_shared(struct vm_fault *vmf)
__releases(vmf->ptl)
{
struct vm_area_struct *vma = vmf->vma;
@@ -2670,7 +2678,7 @@ static int wp_page_shared(struct vm_fault *vmf)
get_page(vmf->page);
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
- int tmp;
+ vm_fault_t tmp;
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
@@ -2713,7 +2721,7 @@ static int wp_page_shared(struct vm_fault *vmf)
* but allow concurrent faults), with pte both mapped and locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int do_wp_page(struct vm_fault *vmf)
+static vm_fault_t do_wp_page(struct vm_fault *vmf)
__releases(vmf->ptl)
{
struct vm_area_struct *vma = vmf->vma;
@@ -2889,7 +2897,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
* We return with the mmap_sem locked or unlocked in the same cases
* as does filemap_fault().
*/
-int do_swap_page(struct vm_fault *vmf)
+vm_fault_t do_swap_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *swapcache;
@@ -2898,7 +2906,7 @@ int do_swap_page(struct vm_fault *vmf)
pte_t pte;
int locked;
int exclusive = 0;
- int ret = 0;
+ vm_fault_t ret = 0;
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
goto out;
@@ -3003,8 +3011,8 @@ int do_swap_page(struct vm_fault *vmf)
goto out_page;
}
- if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
- &memcg, false)) {
+ if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
+ &memcg, false)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -3109,12 +3117,12 @@ out_release:
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int do_anonymous_page(struct vm_fault *vmf)
+static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct mem_cgroup *memcg;
struct page *page;
- int ret = 0;
+ vm_fault_t ret = 0;
pte_t entry;
/* File mapping without ->vm_ops ? */
@@ -3165,7 +3173,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
if (!page)
goto oom;
- if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
+ if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
+ false))
goto oom_free_page;
/*
@@ -3223,10 +3232,10 @@ oom:
* released depending on flags and vma->vm_ops->fault() return value.
* See filemap_fault() and __lock_page_retry().
*/
-static int __do_fault(struct vm_fault *vmf)
+static vm_fault_t __do_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- int ret;
+ vm_fault_t ret;
ret = vma->vm_ops->fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
@@ -3260,7 +3269,7 @@ static int pmd_devmap_trans_unstable(pmd_t *pmd)
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}
-static int pte_alloc_one_map(struct vm_fault *vmf)
+static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -3336,13 +3345,14 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
vmf->prealloc_pte = NULL;
}
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
pmd_t entry;
- int i, ret;
+ int i;
+ vm_fault_t ret;
if (!transhuge_vma_suitable(vma, haddr))
return VM_FAULT_FALLBACK;
@@ -3372,7 +3382,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
if (write)
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
+ add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
page_add_file_rmap(page, true);
/*
* deposit and withdraw with pmd lock held
@@ -3392,7 +3402,7 @@ out:
return ret;
}
#else
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
BUILD_BUG();
return 0;
@@ -3413,13 +3423,13 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
* Target users are page handler itself and implementations of
* vm_ops->map_pages.
*/
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
pte_t entry;
- int ret;
+ vm_fault_t ret;
if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
@@ -3478,10 +3488,10 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
* The function expects the page to be locked and on success it consumes a
* reference of a page being mapped (for the PTE which maps it).
*/
-int finish_fault(struct vm_fault *vmf)
+vm_fault_t finish_fault(struct vm_fault *vmf)
{
struct page *page;
- int ret = 0;
+ vm_fault_t ret = 0;
/* Did we COW the page? */
if ((vmf->flags & FAULT_FLAG_WRITE) &&
@@ -3567,12 +3577,13 @@ late_initcall(fault_around_debugfs);
* (and therefore to page order). This way it's easier to guarantee
* that we don't cross page table boundaries.
*/
-static int do_fault_around(struct vm_fault *vmf)
+static vm_fault_t do_fault_around(struct vm_fault *vmf)
{
unsigned long address = vmf->address, nr_pages, mask;
pgoff_t start_pgoff = vmf->pgoff;
pgoff_t end_pgoff;
- int off, ret = 0;
+ int off;
+ vm_fault_t ret = 0;
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
@@ -3622,10 +3633,10 @@ out:
return ret;
}
-static int do_read_fault(struct vm_fault *vmf)
+static vm_fault_t do_read_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- int ret = 0;
+ vm_fault_t ret = 0;
/*
* Let's call ->map_pages() first and use ->fault() as fallback
@@ -3649,10 +3660,10 @@ static int do_read_fault(struct vm_fault *vmf)
return ret;
}
-static int do_cow_fault(struct vm_fault *vmf)
+static vm_fault_t do_cow_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- int ret;
+ vm_fault_t ret;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
@@ -3661,7 +3672,7 @@ static int do_cow_fault(struct vm_fault *vmf)
if (!vmf->cow_page)
return VM_FAULT_OOM;
- if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
+ if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
&vmf->memcg, false)) {
put_page(vmf->cow_page);
return VM_FAULT_OOM;
@@ -3688,10 +3699,10 @@ uncharge_out:
return ret;
}
-static int do_shared_fault(struct vm_fault *vmf)
+static vm_fault_t do_shared_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- int ret, tmp;
+ vm_fault_t ret, tmp;
ret = __do_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3729,10 +3740,10 @@ static int do_shared_fault(struct vm_fault *vmf)
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
-static int do_fault(struct vm_fault *vmf)
+static vm_fault_t do_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- int ret;
+ vm_fault_t ret;
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
if (!vma->vm_ops->fault)
@@ -3767,7 +3778,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
return mpol_misplaced(page, vma, addr);
}
-static int do_numa_page(struct vm_fault *vmf)
+static vm_fault_t do_numa_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL;
@@ -3857,7 +3868,7 @@ out:
return 0;
}
-static inline int create_huge_pmd(struct vm_fault *vmf)
+static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_anonymous_page(vmf);
@@ -3867,7 +3878,7 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
}
/* `inline' is required to avoid gcc 4.1.2 build error */
-static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3886,7 +3897,7 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
}
-static int create_huge_pud(struct vm_fault *vmf)
+static vm_fault_t create_huge_pud(struct vm_fault *vmf)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* No support for anonymous transparent PUD pages yet */
@@ -3898,7 +3909,7 @@ static int create_huge_pud(struct vm_fault *vmf)
return VM_FAULT_FALLBACK;
}
-static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* No support for anonymous transparent PUD pages yet */
@@ -3925,7 +3936,7 @@ static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
* The mmap_sem may have been released depending on flags and our return value.
* See filemap_fault() and __lock_page_or_retry().
*/
-static int handle_pte_fault(struct vm_fault *vmf)
+static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{
pte_t entry;
@@ -4013,8 +4024,8 @@ unlock:
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
-static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
- unsigned int flags)
+static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
{
struct vm_fault vmf = {
.vma = vma,
@@ -4027,7 +4038,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
p4d_t *p4d;
- int ret;
+ vm_fault_t ret;
pgd = pgd_offset(mm, address);
p4d = p4d_alloc(mm, pgd, address);
@@ -4102,10 +4113,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
-int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
unsigned int flags)
{
- int ret;
+ vm_fault_t ret;
__set_current_state(TASK_RUNNING);
@@ -4125,7 +4136,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
* space. Kernel faults are handled more gracefully.
*/
if (flags & FAULT_FLAG_USER)
- mem_cgroup_oom_enable();
+ mem_cgroup_enter_user_fault();
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
@@ -4133,7 +4144,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
ret = __handle_mm_fault(vma, address, flags);
if (flags & FAULT_FLAG_USER) {
- mem_cgroup_oom_disable();
+ mem_cgroup_exit_user_fault();
/*
* The task may have entered a memcg OOM situation but
* if the allocation error was handled gracefully (no
@@ -4397,6 +4408,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
return -EINVAL;
maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+ if (!maddr)
+ return -ENOMEM;
+
if (write)
memcpy_toio(maddr + offset, buf, len);
else
@@ -4568,71 +4582,93 @@ EXPORT_SYMBOL(__might_fault);
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
-static void clear_gigantic_page(struct page *page,
- unsigned long addr,
- unsigned int pages_per_huge_page)
-{
- int i;
- struct page *p = page;
-
- might_sleep();
- for (i = 0; i < pages_per_huge_page;
- i++, p = mem_map_next(p, page, i)) {
- cond_resched();
- clear_user_highpage(p, addr + i * PAGE_SIZE);
- }
-}
-void clear_huge_page(struct page *page,
- unsigned long addr_hint, unsigned int pages_per_huge_page)
+/*
+ * Process all subpages of the specified huge page with the specified
+ * operation. The target subpage will be processed last to keep its
+ * cache lines hot.
+ */
+static inline void process_huge_page(
+ unsigned long addr_hint, unsigned int pages_per_huge_page,
+ void (*process_subpage)(unsigned long addr, int idx, void *arg),
+ void *arg)
{
int i, n, base, l;
unsigned long addr = addr_hint &
~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
- if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
- clear_gigantic_page(page, addr, pages_per_huge_page);
- return;
- }
-
- /* Clear sub-page to access last to keep its cache lines hot */
+ /* Process target subpage last to keep its cache lines hot */
might_sleep();
n = (addr_hint - addr) / PAGE_SIZE;
if (2 * n <= pages_per_huge_page) {
- /* If sub-page to access in first half of huge page */
+ /* If target subpage in first half of huge page */
base = 0;
l = n;
- /* Clear sub-pages at the end of huge page */
+ /* Process subpages at the end of huge page */
for (i = pages_per_huge_page - 1; i >= 2 * n; i--) {
cond_resched();
- clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+ process_subpage(addr + i * PAGE_SIZE, i, arg);
}
} else {
- /* If sub-page to access in second half of huge page */
+ /* If target subpage in second half of huge page */
base = pages_per_huge_page - 2 * (pages_per_huge_page - n);
l = pages_per_huge_page - n;
- /* Clear sub-pages at the begin of huge page */
+ /* Process subpages at the begin of huge page */
for (i = 0; i < base; i++) {
cond_resched();
- clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+ process_subpage(addr + i * PAGE_SIZE, i, arg);
}
}
/*
- * Clear remaining sub-pages in left-right-left-right pattern
- * towards the sub-page to access
+ * Process remaining subpages in left-right-left-right pattern
+ * towards the target subpage
*/
for (i = 0; i < l; i++) {
int left_idx = base + i;
int right_idx = base + 2 * l - 1 - i;
cond_resched();
- clear_user_highpage(page + left_idx,
- addr + left_idx * PAGE_SIZE);
+ process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg);
+ cond_resched();
+ process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg);
+ }
+}
+
+static void clear_gigantic_page(struct page *page,
+ unsigned long addr,
+ unsigned int pages_per_huge_page)
+{
+ int i;
+ struct page *p = page;
+
+ might_sleep();
+ for (i = 0; i < pages_per_huge_page;
+ i++, p = mem_map_next(p, page, i)) {
cond_resched();
- clear_user_highpage(page + right_idx,
- addr + right_idx * PAGE_SIZE);
+ clear_user_highpage(p, addr + i * PAGE_SIZE);
}
}
+static void clear_subpage(unsigned long addr, int idx, void *arg)
+{
+ struct page *page = arg;
+
+ clear_user_highpage(page + idx, addr);
+}
+
+void clear_huge_page(struct page *page,
+ unsigned long addr_hint, unsigned int pages_per_huge_page)
+{
+ unsigned long addr = addr_hint &
+ ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
+
+ if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+ clear_gigantic_page(page, addr, pages_per_huge_page);
+ return;
+ }
+
+ process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page);
+}
+
static void copy_user_gigantic_page(struct page *dst, struct page *src,
unsigned long addr,
struct vm_area_struct *vma,
@@ -4652,11 +4688,31 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
}
}
+struct copy_subpage_arg {
+ struct page *dst;
+ struct page *src;
+ struct vm_area_struct *vma;
+};
+
+static void copy_subpage(unsigned long addr, int idx, void *arg)
+{
+ struct copy_subpage_arg *copy_arg = arg;
+
+ copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx,
+ addr, copy_arg->vma);
+}
+
void copy_user_huge_page(struct page *dst, struct page *src,
- unsigned long addr, struct vm_area_struct *vma,
+ unsigned long addr_hint, struct vm_area_struct *vma,
unsigned int pages_per_huge_page)
{
- int i;
+ unsigned long addr = addr_hint &
+ ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
+ struct copy_subpage_arg arg = {
+ .dst = dst,
+ .src = src,
+ .vma = vma,
+ };
if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
copy_user_gigantic_page(dst, src, addr, vma,
@@ -4664,11 +4720,7 @@ void copy_user_huge_page(struct page *dst, struct page *src,
return;
}
- might_sleep();
- for (i = 0; i < pages_per_huge_page; i++) {
- cond_resched();
- copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
- }
+ process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
}
long copy_huge_page_from_user(struct page *dst_page,