aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/mempolicy.c63
-rw-r--r--mm/shmem.c2
3 files changed, 34 insertions, 69 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4e4ef8fa479d..55478ab3c83b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ release:
* available
* never: never stall for any thp allocation
*/
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
{
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+ gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+ struct mempolicy *pol;
+ /*
+ * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+ * specified, to express a general desire to stay on the current
+ * node for optimistic allocation attempts. If the defrag mode
+ * and/or madvise hint requires the direct reclaim then we prefer
+ * to fallback to other node rather than node reclaim because that
+ * can lead to excessive reclaim even though there is free memory
+ * on other nodes. We expect that NUMA preferences are specified
+ * by memory policies.
+ */
+ pol = get_vma_policy(vma, addr);
+ if (pol->mode != MPOL_BIND)
+ this_node = __GFP_THISNODE;
+ mpol_cond_put(pol);
+#endif
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+ return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- __GFP_KSWAPD_RECLAIM);
+ __GFP_KSWAPD_RECLAIM | this_node);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- 0);
- return GFP_TRANSHUGE_LIGHT;
+ this_node);
+ return GFP_TRANSHUGE_LIGHT | this_node;
}
/* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
pte_free(vma->vm_mm, pgtable);
return ret;
}
- gfp = alloc_hugepage_direct_gfpmask(vma);
- page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+ gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+ page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
alloc:
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) {
- huge_gfp = alloc_hugepage_direct_gfpmask(vma);
- new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+ huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+ new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+ haddr, numa_node_id());
} else
new_page = NULL;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 58fb833fce0c..5837a067124d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
} else if (PageTransHuge(page)) {
struct page *thp;
- thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
- HPAGE_PMD_ORDER);
+ thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+ address, numa_node_id());
if (!thp)
return NULL;
prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
* freeing by another task. It is the caller's responsibility to free the
* extra reference for shared policies.
*/
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
unsigned long addr)
{
struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy).
- * @hugepage: for hugepages try only the preferred node if possible
*
* This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/
struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, int node, bool hugepage)
+ unsigned long addr, int node)
{
struct mempolicy *pol;
struct page *page;
@@ -2040,60 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
goto out;
}
- if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
- int hpage_node = node;
-
- /*
- * For hugepage allocation and non-interleave policy which
- * allows the current node (or other explicitly preferred
- * node) we only try to allocate from the current/preferred
- * node and don't fall back to other nodes, as the cost of
- * remote accesses would likely offset THP benefits.
- *
- * If the policy is interleave, or does not allow the current
- * node in its nodemask, we allocate the standard way.
- */
- if (pol->mode == MPOL_PREFERRED &&
- !(pol->flags & MPOL_F_LOCAL))
- hpage_node = pol->v.preferred_node;
-
- nmask = policy_nodemask(gfp, pol);
- if (!nmask || node_isset(hpage_node, *nmask)) {
- mpol_cond_put(pol);
- /*
- * We cannot invoke reclaim if __GFP_THISNODE
- * is set. Invoking reclaim with
- * __GFP_THISNODE set, would cause THP
- * allocations to trigger heavy swapping
- * despite there may be tons of free memory
- * (including potentially plenty of THP
- * already available in the buddy) on all the
- * other NUMA nodes.
- *
- * At most we could invoke compaction when
- * __GFP_THISNODE is set (but we would need to
- * refrain from invoking reclaim even if
- * compaction returned COMPACT_SKIPPED because
- * there wasn't not enough memory to succeed
- * compaction). For now just avoid
- * __GFP_THISNODE instead of limiting the
- * allocation path to a strict and single
- * compaction invocation.
- *
- * Supposedly if direct reclaim was enabled by
- * the caller, the app prefers THP regardless
- * of the node it comes from so this would be
- * more desiderable behavior than only
- * providing THP originated from the local
- * node in such case.
- */
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- gfp |= __GFP_THISNODE;
- page = __alloc_pages_node(hpage_node, gfp, order);
- goto out;
- }
- }
-
nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node);
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 56bf122e0bb4..ea26d7a0342d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
- HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+ HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
shmem_pseudo_vma_destroy(&pvma);
if (page)
prep_transhuge_page(page);