aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c39
1 files changed, 23 insertions, 16 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9fd722769927..85032de5e20f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -582,7 +582,7 @@ retry_cpuset:
for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) {
- if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
+ if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
page = dequeue_huge_page_node(h, zone_to_nid(zone));
if (page) {
if (avoid_reserve)
@@ -1457,7 +1457,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
return 0;
found:
- BUG_ON((unsigned long)virt_to_phys(m) & (huge_page_size(h) - 1));
+ BUG_ON(!IS_ALIGNED(virt_to_phys(m), huge_page_size(h)));
/* Put them into a private list first because mem_map is not up yet */
list_add(&m->list, &huge_boot_pages);
m->hstate = h;
@@ -2083,7 +2083,7 @@ static void hugetlb_register_node(struct node *node)
* devices of nodes that have memory. All on-line nodes should have
* registered their associated device by this time.
*/
-static void hugetlb_register_all_nodes(void)
+static void __init hugetlb_register_all_nodes(void)
{
int nid;
@@ -2598,8 +2598,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_pte_at(dst, addr, dst_pte, entry);
} else {
- if (cow)
+ if (cow) {
huge_ptep_set_wrprotect(src, addr, src_pte);
+ mmu_notifier_invalidate_range(src, mmun_start,
+ mmun_end);
+ }
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
@@ -2638,8 +2641,9 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb_start_vma(tlb, vma);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+ address = start;
again:
- for (address = start; address < end; address += sz) {
+ for (; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
continue;
@@ -2686,6 +2690,7 @@ again:
page_remove_rmap(page);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
+ address += sz;
spin_unlock(ptl);
break;
}
@@ -2724,9 +2729,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
* on its way out. We're lucky that the flag has such an appropriate
* name, and can in fact be safely cleared here. We could clear it
* before the __unmap_hugepage_range above, but all that's necessary
- * is to clear it before releasing the i_mmap_mutex. This works
+ * is to clear it before releasing the i_mmap_rwsem. This works
* because in the context this is called, the VMA is about to be
- * destroyed and the i_mmap_mutex is held.
+ * destroyed and the i_mmap_rwsem is held.
*/
vma->vm_flags &= ~VM_MAYSHARE;
}
@@ -2772,7 +2777,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* this mapping should be shared between all the VMAs,
* __unmap_hugepage_range() is called as the lock is already held
*/
- mutex_lock(&mapping->i_mmap_mutex);
+ i_mmap_lock_write(mapping);
vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
/* Do not unmap the current VMA */
if (iter_vma == vma)
@@ -2789,7 +2794,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
unmap_hugepage_range(iter_vma, address,
address + huge_page_size(h), page);
}
- mutex_unlock(&mapping->i_mmap_mutex);
+ i_mmap_unlock_write(mapping);
}
/*
@@ -2899,6 +2904,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
+ mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
@@ -3346,7 +3352,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
flush_cache_range(vma, address, end);
mmu_notifier_invalidate_range_start(mm, start, end);
- mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
ptep = huge_pte_offset(mm, address);
@@ -3368,13 +3374,14 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
spin_unlock(ptl);
}
/*
- * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+ * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table:
- * once we release i_mmap_mutex, another task can do the final put_page
+ * once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk.
*/
flush_tlb_range(vma, start, end);
- mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+ mmu_notifier_invalidate_range(mm, start, end);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
return pages << h->order;
@@ -3523,7 +3530,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * pud has to be populated inside the same i_mmap_rwsem section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/
@@ -3542,7 +3549,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr);
- mutex_lock(&mapping->i_mmap_mutex);
+ i_mmap_lock_write(mapping);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
@@ -3570,7 +3577,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
spin_unlock(ptl);
out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
- mutex_unlock(&mapping->i_mmap_mutex);
+ i_mmap_unlock_write(mapping);
return pte;
}