aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c137
1 files changed, 79 insertions, 58 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0bdfc7e1c933..2ca4e8c3163e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,14 +456,12 @@ static int allocate_file_region_entries(struct resv_map *resv,
int regions_needed)
__must_hold(&resv->lock)
{
- struct list_head allocated_regions;
+ LIST_HEAD(allocated_regions);
int to_allocate = 0, i = 0;
struct file_region *trg = NULL, *rg = NULL;
VM_BUG_ON(regions_needed < 0);
- INIT_LIST_HEAD(&allocated_regions);
-
/*
* Check for sufficient descriptors in the cache to accommodate
* the number of in progress add operations plus regions_needed.
@@ -1506,6 +1504,10 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
set_page_private(page, 0);
+ /*
+ * We have to set HPageVmemmapOptimized again as above
+ * set_page_private(page, 0) cleared it.
+ */
SetHPageVmemmapOptimized(page);
/*
@@ -2336,7 +2338,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
static int gather_surplus_pages(struct hstate *h, long delta)
__must_hold(&hugetlb_lock)
{
- struct list_head surplus_list;
+ LIST_HEAD(surplus_list);
struct page *page, *tmp;
int ret;
long i;
@@ -2351,7 +2353,6 @@ static int gather_surplus_pages(struct hstate *h, long delta)
}
allocated = 0;
- INIT_LIST_HEAD(&surplus_list);
ret = -ENOMEM;
retry:
@@ -3474,7 +3475,8 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
* based on pool changes for the demoted page.
*/
h->max_huge_pages--;
- target_hstate->max_huge_pages += pages_per_huge_page(h);
+ target_hstate->max_huge_pages +=
+ pages_per_huge_page(h) / pages_per_huge_page(target_hstate);
return rc;
}
@@ -3767,8 +3769,7 @@ HSTATE_ATTR_WO(demote);
static ssize_t demote_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int nid;
- struct hstate *h = kobj_to_hstate(kobj, &nid);
+ struct hstate *h = kobj_to_hstate(kobj, NULL);
unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K;
return sysfs_emit(buf, "%lukB\n", demote_size);
@@ -3781,7 +3782,6 @@ static ssize_t demote_size_store(struct kobject *kobj,
struct hstate *h, *demote_hstate;
unsigned long demote_size;
unsigned int demote_order;
- int nid;
demote_size = (unsigned long)memparse(buf, NULL);
@@ -3793,7 +3793,7 @@ static ssize_t demote_size_store(struct kobject *kobj,
return -EINVAL;
/* demote order must be smaller than hstate order */
- h = kobj_to_hstate(kobj, &nid);
+ h = kobj_to_hstate(kobj, NULL);
if (demote_order >= h->order)
return -EINVAL;
@@ -3847,15 +3847,22 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
if (retval) {
kobject_put(hstate_kobjs[hi]);
hstate_kobjs[hi] = NULL;
+ return retval;
}
if (h->demote_order) {
- if (sysfs_create_group(hstate_kobjs[hi],
- &hstate_demote_attr_group))
+ retval = sysfs_create_group(hstate_kobjs[hi],
+ &hstate_demote_attr_group);
+ if (retval) {
pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name);
+ sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group);
+ kobject_put(hstate_kobjs[hi]);
+ hstate_kobjs[hi] = NULL;
+ return retval;
+ }
}
- return retval;
+ return 0;
}
static void __init hugetlb_sysfs_init(void)
@@ -3941,10 +3948,15 @@ static void hugetlb_unregister_node(struct node *node)
for_each_hstate(h) {
int idx = hstate_index(h);
- if (nhs->hstate_kobjs[idx]) {
- kobject_put(nhs->hstate_kobjs[idx]);
- nhs->hstate_kobjs[idx] = NULL;
- }
+ struct kobject *hstate_kobj = nhs->hstate_kobjs[idx];
+
+ if (!hstate_kobj)
+ continue;
+ if (h->demote_order)
+ sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group);
+ sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group);
+ kobject_put(hstate_kobj);
+ nhs->hstate_kobjs[idx] = NULL;
}
kobject_put(nhs->hugepages_kobj);
@@ -4019,6 +4031,14 @@ static void hugetlb_register_all_nodes(void) { }
#endif
+#ifdef CONFIG_CMA
+static void __init hugetlb_cma_check(void);
+#else
+static inline __init void hugetlb_cma_check(void)
+{
+}
+#endif
+
static int __init hugetlb_init(void)
{
int i;
@@ -4118,7 +4138,7 @@ void __init hugetlb_add_hstate(unsigned int order)
h->next_nid_to_alloc = first_memory_node;
h->next_nid_to_free = first_memory_node;
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
- huge_page_size(h)/1024);
+ huge_page_size(h)/SZ_1K);
parsed_hstate = h;
}
@@ -4133,11 +4153,11 @@ static void __init hugepages_clear_pages_in_node(void)
if (!hugetlb_max_hstate) {
default_hstate_max_huge_pages = 0;
memset(default_hugepages_in_node, 0,
- MAX_NUMNODES * sizeof(unsigned int));
+ sizeof(default_hugepages_in_node));
} else {
parsed_hstate->max_huge_pages = 0;
memset(parsed_hstate->max_huge_pages_node, 0,
- MAX_NUMNODES * sizeof(unsigned int));
+ sizeof(parsed_hstate->max_huge_pages_node));
}
}
@@ -4332,18 +4352,34 @@ static int __init default_hugepagesz_setup(char *s)
}
__setup("default_hugepagesz=", default_hugepagesz_setup);
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+ struct mempolicy *mpol = get_task_policy(current);
+
+ /*
+ * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+ * (from policy_nodemask) specifically for hugetlb case
+ */
+ if (mpol->mode == MPOL_BIND &&
+ (apply_policy_zone(mpol, gfp_zone(gfp)) &&
+ cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+ return &mpol->nodes;
+#endif
+ return NULL;
+}
+
static unsigned int allowed_mems_nr(struct hstate *h)
{
int node;
unsigned int nr = 0;
- nodemask_t *mpol_allowed;
+ nodemask_t *mbind_nodemask;
unsigned int *array = h->free_huge_pages_node;
gfp_t gfp_mask = htlb_alloc_mask(h);
- mpol_allowed = policy_nodemask_current(gfp_mask);
-
+ mbind_nodemask = policy_mbind_nodemask(gfp_mask);
for_each_node_mask(node, cpuset_current_mems_allowed) {
- if (!mpol_allowed || node_isset(node, *mpol_allowed))
+ if (!mbind_nodemask || node_isset(node, *mbind_nodemask))
nr += array[node];
}
@@ -4723,7 +4759,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma)
{
- pte_t *src_pte, *dst_pte, entry, dst_entry;
+ pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
unsigned long addr;
bool cow = is_cow_mapping(src_vma->vm_flags);
@@ -4768,15 +4804,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
/*
* If the pagetables are shared don't copy or take references.
- * dst_pte == src_pte is the common case of src/dest sharing.
*
+ * dst_pte == src_pte is the common case of src/dest sharing.
* However, src could have 'unshared' and dst shares with
- * another vma. If dst_pte !none, this implies sharing.
- * Check here before taking page table lock, and once again
- * after taking the lock below.
+ * another vma. So page_count of ptep page is checked instead
+ * to reliably determine whether pte is shared.
*/
- dst_entry = huge_ptep_get(dst_pte);
- if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
+ if (page_count(virt_to_page(dst_pte)) > 1) {
addr |= last_addr_mask;
continue;
}
@@ -4785,13 +4819,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
- dst_entry = huge_ptep_get(dst_pte);
again:
- if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+ if (huge_pte_none(entry)) {
/*
- * Skip if src entry none. Also, skip in the
- * unlikely case dst entry !none as this implies
- * sharing with another vma.
+ * Skip if src entry none.
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
@@ -4870,7 +4901,7 @@ again:
restore_reserve_on_error(h, dst_vma, addr,
new);
put_page(new);
- /* dst_entry won't change as in child */
+ /* huge_ptep of dst_pte won't change as in child */
goto again;
}
hugetlb_install_page(dst_vma, dst_pte, addr, new);
@@ -5316,7 +5347,6 @@ retry_avoidcopy:
u32 hash;
put_page(old_page);
- BUG_ON(huge_pte_none(pte));
/*
* Drop hugetlb_fault_mutex and i_mmap_rwsem before
* unmapping. unmapping needs to hold i_mmap_rwsem
@@ -5408,19 +5438,6 @@ out_release_old:
return ret;
}
-/* Return the pagecache page at a given address within a VMA */
-static struct page *hugetlbfs_pagecache_page(struct hstate *h,
- struct vm_area_struct *vma, unsigned long address)
-{
- struct address_space *mapping;
- pgoff_t idx;
-
- mapping = vma->vm_file->f_mapping;
- idx = vma_hugecache_offset(h, vma, address);
-
- return find_lock_page(mapping, idx);
-}
-
/*
* Return whether there is a pagecache page to back given address within VMA.
* Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page.
@@ -5547,7 +5564,6 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
if (idx >= size)
goto out;
-retry:
new_page = false;
page = find_lock_page(mapping, idx);
if (!page) {
@@ -5587,9 +5603,15 @@ retry:
if (vma->vm_flags & VM_MAYSHARE) {
int err = huge_add_to_page_cache(page, mapping, idx);
if (err) {
+ /*
+ * err can't be -EEXIST which implies someone
+ * else consumed the reservation since hugetlb
+ * fault mutex is held when add a hugetlb page
+ * to the page cache. So it's safe to call
+ * restore_reserve_on_error() here.
+ */
+ restore_reserve_on_error(h, vma, haddr, page);
put_page(page);
- if (err == -EEXIST)
- goto retry;
goto out;
}
new_pagecache_page = true;
@@ -5810,7 +5832,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
+ pagecache_page = find_lock_page(mapping, idx);
}
ptl = huge_pte_lock(h, mm, ptep);
@@ -6017,8 +6039,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
page_in_pagecache = true;
}
- ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
- spin_lock(ptl);
+ ptl = huge_pte_lock(h, dst_mm, dst_pte);
/*
* Recheck the i_size after holding PT lock to make sure not
@@ -7334,7 +7355,7 @@ void __init hugetlb_cma_reserve(int order)
hugetlb_cma_size = 0;
}
-void __init hugetlb_cma_check(void)
+static void __init hugetlb_cma_check(void)
{
if (!hugetlb_cma_size || cma_reserve_called)
return;