aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c117
1 files changed, 102 insertions, 15 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 977c641f78cf..48ba9729062e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -127,6 +127,32 @@ static struct mempolicy default_policy = {
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
+/**
+ * numa_map_to_online_node - Find closest online node
+ * @nid: Node id to start the search
+ *
+ * Lookup the next closest node by distance if @nid is not online.
+ */
+int numa_map_to_online_node(int node)
+{
+ int min_dist = INT_MAX, dist, n, min_node;
+
+ if (node == NUMA_NO_NODE || node_online(node))
+ return node;
+
+ min_node = node;
+ for_each_online_node(n) {
+ dist = node_distance(node, n);
+ if (dist < min_dist) {
+ min_dist = dist;
+ min_node = n;
+ }
+ }
+
+ return min_node;
+}
+EXPORT_SYMBOL_GPL(numa_map_to_online_node);
+
struct mempolicy *get_task_policy(struct task_struct *p)
{
struct mempolicy *pol = p->mempolicy;
@@ -442,6 +468,7 @@ static inline bool queue_pages_required(struct page *page,
*/
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
+ __releases(ptl)
{
int ret = 0;
struct page *page;
@@ -557,9 +584,10 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
+ int ret = 0;
#ifdef CONFIG_HUGETLB_PAGE
struct queue_pages *qp = walk->private;
- unsigned long flags = qp->flags;
+ unsigned long flags = (qp->flags & MPOL_MF_VALID);
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -571,16 +599,44 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
page = pte_page(entry);
if (!queue_pages_required(page, qp))
goto unlock;
+
+ if (flags == MPOL_MF_STRICT) {
+ /*
+ * STRICT alone means only detecting misplaced page and no
+ * need to further check other vma.
+ */
+ ret = -EIO;
+ goto unlock;
+ }
+
+ if (!vma_migratable(walk->vma)) {
+ /*
+ * Must be STRICT with MOVE*, otherwise .test_walk() have
+ * stopped walking current vma.
+ * Detecting misplaced page but allow migrating pages which
+ * have been queued.
+ */
+ ret = 1;
+ goto unlock;
+ }
+
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
- isolate_huge_page(page, qp->pagelist);
+ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ if (!isolate_huge_page(page, qp->pagelist) &&
+ (flags & MPOL_MF_STRICT))
+ /*
+ * Failed to isolate page but allow migrating pages
+ * which have been queued.
+ */
+ ret = 1;
+ }
unlock:
spin_unlock(ptl);
#else
BUG();
#endif
- return 0;
+ return ret;
}
#ifdef CONFIG_NUMA_BALANCING
@@ -598,7 +654,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
{
int nr_updated;
- nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
+ nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
@@ -621,7 +677,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long flags = qp->flags;
/* range check first */
- VM_BUG_ON((vma->vm_start > start) || (vma->vm_end < end));
+ VM_BUG_ON_VMA((vma->vm_start > start) || (vma->vm_end < end), vma);
if (!qp->first) {
qp->first = vma;
@@ -649,8 +705,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
if (flags & MPOL_MF_LAZY) {
/* Similar to task_numa_work, skip inaccessible VMAs */
- if (!is_vm_hugetlb_page(vma) &&
- (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) &&
+ if (!is_vm_hugetlb_page(vma) && vma_is_accessible(vma) &&
!(vma->vm_flags & VM_MIXEDMAP))
change_prot_numa(vma, start, endvma);
return 1;
@@ -852,7 +907,6 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
switch (p->mode) {
case MPOL_BIND:
- /* Fall through */
case MPOL_INTERLEAVE:
*nodes = p->v.nodes;
break;
@@ -868,12 +922,15 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
static int lookup_node(struct mm_struct *mm, unsigned long addr)
{
- struct page *p;
+ struct page *p = NULL;
int err;
int locked = 1;
err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
- if (err >= 0) {
+ if (err == 0) {
+ /* E.g. GUP interrupted by fatal signal */
+ err = -EFAULT;
+ } else if (err > 0) {
err = page_to_nid(p);
put_page(p);
}
@@ -994,7 +1051,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
if (!isolate_lru_page(head)) {
list_add_tail(&head->lru, pagelist);
mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON + page_is_file_cache(head),
+ NR_ISOLATED_ANON + page_is_file_lru(head),
hpage_nr_pages(head));
} else if (flags & MPOL_MF_STRICT) {
/*
@@ -1714,6 +1771,34 @@ COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid,
#endif /* CONFIG_COMPAT */
+bool vma_migratable(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+ return false;
+
+ /*
+ * DAX device mappings require predictable access latency, so avoid
+ * incurring periodic faults.
+ */
+ if (vma_is_dax(vma))
+ return false;
+
+ if (is_vm_hugetlb_page(vma) &&
+ !hugepage_migration_supported(hstate_vma(vma)))
+ return false;
+
+ /*
+ * Migration allocates pages in the highest zone. If we cannot
+ * do so then migration (at least from node to node) is not
+ * possible.
+ */
+ if (vma->vm_file &&
+ gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
+ < policy_zone)
+ return false;
+ return true;
+}
+
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
unsigned long addr)
{
@@ -2009,7 +2094,6 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
break;
case MPOL_BIND:
- /* Fall through */
case MPOL_INTERLEAVE:
*mask = mempolicy->v.nodes;
break;
@@ -2276,7 +2360,6 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
switch (a->mode) {
case MPOL_BIND:
- /* Fall through */
case MPOL_INTERLEAVE:
return !!nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
@@ -2841,7 +2924,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
switch (mode) {
case MPOL_PREFERRED:
/*
- * Insist on a nodelist of one node only
+ * Insist on a nodelist of one node only, although later
+ * we use first_node(nodes) to grab a single node, so here
+ * nodelist (or nodes) cannot be empty.
*/
if (nodelist) {
char *rest = nodelist;
@@ -2849,6 +2934,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
rest++;
if (*rest)
goto out;
+ if (nodes_empty(nodes))
+ goto out;
}
break;
case MPOL_INTERLEAVE: