aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c201
1 files changed, 103 insertions, 98 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 226910cb7c9b..eeceeeb09019 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
#include <linux/node.h>
#include "internal.h"
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
unsigned long hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
@@ -856,7 +855,7 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL;
}
-static void free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
{
/*
* Can't pass hstate in here because it is called from the
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
struct hstate *h;
+ if (!hugepages_supported())
+ return;
+
/* Set scan step to minimum hugepage size */
for_each_hstate(h)
if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
return sprintf(buf, "%lu\n", nr_huge_pages);
}
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
- struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+ struct hstate *h, int nid,
+ unsigned long count, size_t len)
{
int err;
- int nid;
- unsigned long count;
- struct hstate *h;
NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
- err = kstrtoul(buf, 10, &count);
- if (err)
- goto out;
-
- h = kobj_to_hstate(kobj, &nid);
if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
err = -EINVAL;
goto out;
@@ -1784,6 +1778,23 @@ out:
return err;
}
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+ struct kobject *kobj, const char *buf,
+ size_t len)
+{
+ struct hstate *h;
+ unsigned long count;
+ int nid;
+ int err;
+
+ err = kstrtoul(buf, 10, &count);
+ if (err)
+ return err;
+
+ h = kobj_to_hstate(kobj, &nid);
+ return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
static ssize_t nr_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
static ssize_t nr_hugepages_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(false, kobj, attr, buf, len);
+ return nr_hugepages_store_common(false, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages);
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(true, kobj, attr, buf, len);
+ return nr_hugepages_store_common(true, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages_mempolicy);
#endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
- unsigned long tmp;
+ unsigned long tmp = h->max_huge_pages;
int ret;
if (!hugepages_supported())
return -ENOTSUPP;
- tmp = h->max_huge_pages;
-
- if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
- return -EINVAL;
-
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (ret)
goto out;
- if (write) {
- NODEMASK_ALLOC(nodemask_t, nodes_allowed,
- GFP_KERNEL | __GFP_NORETRY);
- if (!(obey_mempolicy &&
- init_nodemask_of_mempolicy(nodes_allowed))) {
- NODEMASK_FREE(nodes_allowed);
- nodes_allowed = &node_states[N_MEMORY];
- }
- h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
- if (nodes_allowed != &node_states[N_MEMORY])
- NODEMASK_FREE(nodes_allowed);
- }
+ if (write)
+ ret = __nr_hugepages_store_common(obey_mempolicy, h,
+ NUMA_NO_NODE, tmp, *length);
out:
return ret;
}
@@ -2520,6 +2516,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
update_mmu_cache(vma, address, ptep);
}
+static int is_hugetlb_entry_migration(pte_t pte)
+{
+ swp_entry_t swp;
+
+ if (huge_pte_none(pte) || pte_present(pte))
+ return 0;
+ swp = pte_to_swp_entry(pte);
+ if (non_swap_entry(swp) && is_migration_entry(swp))
+ return 1;
+ else
+ return 0;
+}
+
+static int is_hugetlb_entry_hwpoisoned(pte_t pte)
+{
+ swp_entry_t swp;
+
+ if (huge_pte_none(pte) || pte_present(pte))
+ return 0;
+ swp = pte_to_swp_entry(pte);
+ if (non_swap_entry(swp) && is_hwpoison_entry(swp))
+ return 1;
+ else
+ return 0;
+}
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
@@ -2559,7 +2580,24 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
- if (!huge_pte_none(huge_ptep_get(src_pte))) {
+ entry = huge_ptep_get(src_pte);
+ if (huge_pte_none(entry)) { /* skip none entry */
+ ;
+ } else if (unlikely(is_hugetlb_entry_migration(entry) ||
+ is_hugetlb_entry_hwpoisoned(entry))) {
+ swp_entry_t swp_entry = pte_to_swp_entry(entry);
+
+ if (is_write_migration_entry(swp_entry) && cow) {
+ /*
+ * COW mappings require pages in both
+ * parent and child to be set to read.
+ */
+ make_migration_entry_read(&swp_entry);
+ entry = swp_entry_to_pte(swp_entry);
+ set_huge_pte_at(src, addr, src_pte, entry);
+ }
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ } else {
if (cow)
huge_ptep_set_wrprotect(src, addr, src_pte);
entry = huge_ptep_get(src_pte);
@@ -2578,32 +2616,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
return ret;
}
-static int is_hugetlb_entry_migration(pte_t pte)
-{
- swp_entry_t swp;
-
- if (huge_pte_none(pte) || pte_present(pte))
- return 0;
- swp = pte_to_swp_entry(pte);
- if (non_swap_entry(swp) && is_migration_entry(swp))
- return 1;
- else
- return 0;
-}
-
-static int is_hugetlb_entry_hwpoisoned(pte_t pte)
-{
- swp_entry_t swp;
-
- if (huge_pte_none(pte) || pte_present(pte))
- return 0;
- swp = pte_to_swp_entry(pte);
- if (non_swap_entry(swp) && is_hwpoison_entry(swp))
- return 1;
- else
- return 0;
-}
-
void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct page *ref_page)
@@ -2738,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
@@ -2778,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address + huge_page_size(h), page);
}
mutex_unlock(&mapping->i_mmap_mutex);
-
- return 1;
}
/*
@@ -2794,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page;
- int outside_reserve = 0;
+ int ret = 0, outside_reserve = 0;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2824,14 +2834,14 @@ retry_avoidcopy:
page_cache_get(old_page);
- /* Drop page table lock as buddy allocator may be called */
+ /*
+ * Drop page table lock as buddy allocator may be called. It will
+ * be acquired again before returning to the caller, as expected.
+ */
spin_unlock(ptl);
new_page = alloc_huge_page(vma, address, outside_reserve);
if (IS_ERR(new_page)) {
- long err = PTR_ERR(new_page);
- page_cache_release(old_page);
-
/*
* If a process owning a MAP_PRIVATE mapping fails to COW,
* it is due to references held by a child and an insufficient
@@ -2840,29 +2850,25 @@ retry_avoidcopy:
* may get SIGKILLed if it later faults.
*/
if (outside_reserve) {
+ page_cache_release(old_page);
BUG_ON(huge_pte_none(pte));
- if (unmap_ref_private(mm, vma, old_page, address)) {
- BUG_ON(huge_pte_none(pte));
- spin_lock(ptl);
- ptep = huge_pte_offset(mm, address & huge_page_mask(h));
- if (likely(ptep &&
- pte_same(huge_ptep_get(ptep), pte)))
- goto retry_avoidcopy;
- /*
- * race occurs while re-acquiring page table
- * lock, and our job is done.
- */
- return 0;
- }
- WARN_ON_ONCE(1);
+ unmap_ref_private(mm, vma, old_page, address);
+ BUG_ON(huge_pte_none(pte));
+ spin_lock(ptl);
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ if (likely(ptep &&
+ pte_same(huge_ptep_get(ptep), pte)))
+ goto retry_avoidcopy;
+ /*
+ * race occurs while re-acquiring page table
+ * lock, and our job is done.
+ */
+ return 0;
}
- /* Caller expects lock to be held */
- spin_lock(ptl);
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- else
- return VM_FAULT_SIGBUS;
+ ret = (PTR_ERR(new_page) == -ENOMEM) ?
+ VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ goto out_release_old;
}
/*
@@ -2870,11 +2876,8 @@ retry_avoidcopy:
* anon_vma prepared.
*/
if (unlikely(anon_vma_prepare(vma))) {
- page_cache_release(new_page);
- page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return VM_FAULT_OOM;
+ ret = VM_FAULT_OOM;
+ goto out_release_all;
}
copy_user_huge_page(new_page, old_page, address, vma,
@@ -2884,6 +2887,7 @@ retry_avoidcopy:
mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
/*
* Retake the page table lock to check for racing updates
* before the page tables are altered
@@ -2904,12 +2908,13 @@ retry_avoidcopy:
}
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
page_cache_release(new_page);
+out_release_old:
page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return 0;
+ spin_lock(ptl); /* Caller expects lock to be held */
+ return ret;
}
/* Return the pagecache page at a given address within a VMA */