aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c92
1 files changed, 64 insertions, 28 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 33f45edf8272..32e9b7aec366 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -300,15 +300,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
struct mmu_gather_batch *batch;
VM_BUG_ON(!tlb->end);
-
- if (!tlb->page_size)
- tlb->page_size = page_size;
- else {
- if (page_size != tlb->page_size)
- return true;
- }
+ VM_WARN_ON(tlb->page_size != page_size);
batch = tlb->active;
+ /*
+ * Add the page and check if we are full. If so
+ * force a flush.
+ */
+ batch->pages[batch->nr++] = page;
if (batch->nr == batch->max) {
if (!tlb_next_batch(tlb))
return true;
@@ -316,7 +315,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
}
VM_BUG_ON_PAGE(batch->nr > batch->max, page);
- batch->pages[batch->nr++] = page;
return false;
}
@@ -528,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb,
end -= PMD_SIZE;
if (addr > end - 1)
return;
-
+ /*
+ * We add page table cache pages with PAGE_SIZE,
+ * (see pte_free_tlb()), flush the tlb if we need
+ */
+ tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -1118,8 +1120,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pte_t *start_pte;
pte_t *pte;
swp_entry_t entry;
- struct page *pending_page = NULL;
+ tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
again:
init_rss_vec(rss);
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
@@ -1172,7 +1174,6 @@ again:
print_bad_pte(vma, addr, ptent, page);
if (unlikely(__tlb_remove_page(tlb, page))) {
force_flush = 1;
- pending_page = page;
addr += PAGE_SIZE;
break;
}
@@ -1213,11 +1214,6 @@ again:
if (force_flush) {
force_flush = 0;
tlb_flush_mmu_free(tlb);
- if (pending_page) {
- /* remove the page with new size */
- __tlb_remove_pte_page(tlb, pending_page);
- pending_page = NULL;
- }
if (addr != end)
goto again;
}
@@ -1240,7 +1236,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
if (next - addr != HPAGE_PMD_SIZE) {
VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
- split_huge_pmd(vma, pmd, addr);
+ __split_huge_pmd(vma, pmd, addr, false, NULL);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
@@ -2939,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
return true;
}
+static void deposit_prealloc_pte(struct fault_env *fe)
+{
+ struct vm_area_struct *vma = fe->vma;
+
+ pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
+ /*
+ * We are going to consume the prealloc table,
+ * count that as nr_ptes.
+ */
+ atomic_long_inc(&vma->vm_mm->nr_ptes);
+ fe->prealloc_pte = 0;
+}
+
static int do_set_pmd(struct fault_env *fe, struct page *page)
{
struct vm_area_struct *vma = fe->vma;
@@ -2953,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
ret = VM_FAULT_FALLBACK;
page = compound_head(page);
+ /*
+ * Archs like ppc64 need additonal space to store information
+ * related to pte entry. Use the preallocated table for that.
+ */
+ if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
+ fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
+ if (!fe->prealloc_pte)
+ return VM_FAULT_OOM;
+ smp_wmb(); /* See comment in __pte_alloc() */
+ }
+
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
if (unlikely(!pmd_none(*fe->pmd)))
goto out;
@@ -2966,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
page_add_file_rmap(page, true);
+ /*
+ * deposit and withdraw with pmd lock held
+ */
+ if (arch_needs_pgtable_deposit())
+ deposit_prealloc_pte(fe);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
@@ -2975,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
ret = 0;
count_vm_event(THP_FILE_MAPPED);
out:
+ /*
+ * If we are going to fallback to pte mapping, do a
+ * withdraw with pmd lock held.
+ */
+ if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
+ fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
+ fe->pmd);
spin_unlock(fe->ptl);
return ret;
}
@@ -3014,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
ret = do_set_pmd(fe, page);
if (ret != VM_FAULT_FALLBACK)
- return ret;
+ goto fault_handled;
}
if (!fe->pte) {
ret = pte_alloc_one_map(fe);
if (ret)
- return ret;
+ goto fault_handled;
}
/* Re-check under ptl */
- if (unlikely(!pte_none(*fe->pte)))
- return VM_FAULT_NOPAGE;
+ if (unlikely(!pte_none(*fe->pte))) {
+ ret = VM_FAULT_NOPAGE;
+ goto fault_handled;
+ }
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
@@ -3045,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, fe->address, fe->pte);
+ ret = 0;
- return 0;
+fault_handled:
+ /* preallocated pagetable is unused: free it */
+ if (fe->prealloc_pte) {
+ pte_free(fe->vma->vm_mm, fe->prealloc_pte);
+ fe->prealloc_pte = 0;
+ }
+ return ret;
}
static unsigned long fault_around_bytes __read_mostly =
@@ -3145,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
- /* preallocated pagetable is unused: free it */
- if (fe->prealloc_pte) {
- pte_free(fe->vma->vm_mm, fe->prealloc_pte);
- fe->prealloc_pte = 0;
- }
/* Huge page is mapped? Page fault is solved */
if (pmd_trans_huge(*fe->pmd)) {
ret = VM_FAULT_NOPAGE;
@@ -3454,7 +3490,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
/* COW handled on pte level: split pmd */
VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
- split_huge_pmd(fe->vma, fe->pmd, fe->address);
+ __split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL);
return VM_FAULT_FALLBACK;
}