aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c359
1 files changed, 298 insertions, 61 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 831b52dfd56e..2faa9daaf54b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -131,6 +131,13 @@ struct shmem_options {
#define SHMEM_SEEN_QUOTA 32
};
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static unsigned long huge_shmem_orders_always __read_mostly;
+static unsigned long huge_shmem_orders_madvise __read_mostly;
+static unsigned long huge_shmem_orders_inherit __read_mostly;
+static unsigned long huge_shmem_orders_within_size __read_mostly;
+#endif
+
#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
@@ -1614,73 +1621,174 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
return result;
}
-static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t index)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+unsigned long shmem_allowable_huge_orders(struct inode *inode,
+ struct vm_area_struct *vma, pgoff_t index,
+ bool global_huge)
{
- struct mempolicy *mpol;
- pgoff_t ilx;
- struct page *page;
+ unsigned long mask = READ_ONCE(huge_shmem_orders_always);
+ unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
+ unsigned long vm_flags = vma->vm_flags;
+ /*
+ * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
+ * are enabled for this vma.
+ */
+ unsigned long orders = BIT(PMD_ORDER + 1) - 1;
+ loff_t i_size;
+ int order;
- mpol = shmem_get_pgoff_policy(info, index, HPAGE_PMD_ORDER, &ilx);
- page = alloc_pages_mpol(gfp, HPAGE_PMD_ORDER, mpol, ilx, numa_node_id());
- mpol_cond_put(mpol);
+ if ((vm_flags & VM_NOHUGEPAGE) ||
+ test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ return 0;
- return page_rmappable_folio(page);
+ /* If the hardware/firmware marked hugepage support disabled. */
+ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
+ return 0;
+
+ /*
+ * Following the 'deny' semantics of the top level, force the huge
+ * option off from all mounts.
+ */
+ if (shmem_huge == SHMEM_HUGE_DENY)
+ return 0;
+
+ /*
+ * Only allow inherit orders if the top-level value is 'force', which
+ * means non-PMD sized THP can not override 'huge' mount option now.
+ */
+ if (shmem_huge == SHMEM_HUGE_FORCE)
+ return READ_ONCE(huge_shmem_orders_inherit);
+
+ /* Allow mTHP that will be fully within i_size. */
+ order = highest_order(within_size_orders);
+ while (within_size_orders) {
+ index = round_up(index + 1, order);
+ i_size = round_up(i_size_read(inode), PAGE_SIZE);
+ if (i_size >> PAGE_SHIFT >= index) {
+ mask |= within_size_orders;
+ break;
+ }
+
+ order = next_order(&within_size_orders, order);
+ }
+
+ if (vm_flags & VM_HUGEPAGE)
+ mask |= READ_ONCE(huge_shmem_orders_madvise);
+
+ if (global_huge)
+ mask |= READ_ONCE(huge_shmem_orders_inherit);
+
+ return orders & mask;
+}
+
+static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+ struct address_space *mapping, pgoff_t index,
+ unsigned long orders)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ unsigned long pages;
+ int order;
+
+ orders = thp_vma_suitable_orders(vma, vmf->address, orders);
+ if (!orders)
+ return 0;
+
+ /* Find the highest order that can add into the page cache */
+ order = highest_order(orders);
+ while (orders) {
+ pages = 1UL << order;
+ index = round_down(index, pages);
+ if (!xa_find(&mapping->i_pages, &index,
+ index + pages - 1, XA_PRESENT))
+ break;
+ order = next_order(&orders, order);
+ }
+
+ return orders;
+}
+#else
+static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+ struct address_space *mapping, pgoff_t index,
+ unsigned long orders)
+{
+ return 0;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static struct folio *shmem_alloc_folio(gfp_t gfp,
+static struct folio *shmem_alloc_folio(gfp_t gfp, int order,
struct shmem_inode_info *info, pgoff_t index)
{
struct mempolicy *mpol;
pgoff_t ilx;
- struct page *page;
+ struct folio *folio;
- mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
- page = alloc_pages_mpol(gfp, 0, mpol, ilx, numa_node_id());
+ mpol = shmem_get_pgoff_policy(info, index, order, &ilx);
+ folio = folio_alloc_mpol(gfp, order, mpol, ilx, numa_node_id());
mpol_cond_put(mpol);
- return (struct folio *)page;
+ return folio;
}
-static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
- struct inode *inode, pgoff_t index,
- struct mm_struct *fault_mm, bool huge)
+static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
+ gfp_t gfp, struct inode *inode, pgoff_t index,
+ struct mm_struct *fault_mm, unsigned long orders)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
- struct folio *folio;
+ struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
+ unsigned long suitable_orders = 0;
+ struct folio *folio = NULL;
long pages;
- int error;
+ int error, order;
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
- huge = false;
+ orders = 0;
- if (huge) {
- pages = HPAGE_PMD_NR;
- index = round_down(index, HPAGE_PMD_NR);
+ if (orders > 0) {
+ if (vma && vma_is_anon_shmem(vma)) {
+ suitable_orders = shmem_suitable_orders(inode, vmf,
+ mapping, index, orders);
+ } else if (orders & BIT(HPAGE_PMD_ORDER)) {
+ pages = HPAGE_PMD_NR;
+ suitable_orders = BIT(HPAGE_PMD_ORDER);
+ index = round_down(index, HPAGE_PMD_NR);
- /*
- * Check for conflict before waiting on a huge allocation.
- * Conflict might be that a huge page has just been allocated
- * and added to page cache by a racing thread, or that there
- * is already at least one small page in the huge extent.
- * Be careful to retry when appropriate, but not forever!
- * Elsewhere -EEXIST would be the right code, but not here.
- */
- if (xa_find(&mapping->i_pages, &index,
- index + HPAGE_PMD_NR - 1, XA_PRESENT))
- return ERR_PTR(-E2BIG);
+ /*
+ * Check for conflict before waiting on a huge allocation.
+ * Conflict might be that a huge page has just been allocated
+ * and added to page cache by a racing thread, or that there
+ * is already at least one small page in the huge extent.
+ * Be careful to retry when appropriate, but not forever!
+ * Elsewhere -EEXIST would be the right code, but not here.
+ */
+ if (xa_find(&mapping->i_pages, &index,
+ index + HPAGE_PMD_NR - 1, XA_PRESENT))
+ return ERR_PTR(-E2BIG);
+ }
- folio = shmem_alloc_hugefolio(gfp, info, index);
- if (!folio)
- count_vm_event(THP_FILE_FALLBACK);
+ order = highest_order(suitable_orders);
+ while (suitable_orders) {
+ pages = 1UL << order;
+ index = round_down(index, pages);
+ folio = shmem_alloc_folio(gfp, order, info, index);
+ if (folio)
+ goto allocated;
+
+ if (pages == HPAGE_PMD_NR)
+ count_vm_event(THP_FILE_FALLBACK);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK);
+#endif
+ order = next_order(&suitable_orders, order);
+ }
} else {
pages = 1;
- folio = shmem_alloc_folio(gfp, info, index);
+ folio = shmem_alloc_folio(gfp, 0, info, index);
}
if (!folio)
return ERR_PTR(-ENOMEM);
+allocated:
__folio_set_locked(folio);
__folio_set_swapbacked(folio);
@@ -1690,9 +1798,15 @@ static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
if (xa_find(&mapping->i_pages, &index,
index + pages - 1, XA_PRESENT)) {
error = -EEXIST;
- } else if (huge) {
- count_vm_event(THP_FILE_FALLBACK);
- count_vm_event(THP_FILE_FALLBACK_CHARGE);
+ } else if (pages > 1) {
+ if (pages == HPAGE_PMD_NR) {
+ count_vm_event(THP_FILE_FALLBACK);
+ count_vm_event(THP_FILE_FALLBACK_CHARGE);
+ }
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE);
+#endif
}
goto unlock;
}
@@ -1767,7 +1881,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
old = *foliop;
entry = old->swap;
- swap_index = swp_offset(entry);
+ swap_index = swap_cache_index(entry);
swap_mapping = swap_address_space(entry);
/*
@@ -1776,7 +1890,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
*/
gfp &= ~GFP_CONSTRAINT_MASK;
VM_BUG_ON_FOLIO(folio_test_large(old), old);
- new = shmem_alloc_folio(gfp, info, index);
+ new = shmem_alloc_folio(gfp, 0, info, index);
if (!new)
return -ENOMEM;
@@ -1975,7 +2089,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
struct mm_struct *fault_mm;
struct folio *folio;
int error;
- bool alloced;
+ bool alloced, huge;
+ unsigned long orders = 0;
if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping)))
return -EINVAL;
@@ -2047,23 +2162,34 @@ repeat:
return 0;
}
- if (shmem_is_huge(inode, index, false, fault_mm,
- vma ? vma->vm_flags : 0)) {
+ huge = shmem_is_huge(inode, index, false, fault_mm,
+ vma ? vma->vm_flags : 0);
+ /* Find hugepage orders that are allowed for anonymous shmem. */
+ if (vma && vma_is_anon_shmem(vma))
+ orders = shmem_allowable_huge_orders(inode, vma, index, huge);
+ else if (huge)
+ orders = BIT(HPAGE_PMD_ORDER);
+
+ if (orders > 0) {
gfp_t huge_gfp;
huge_gfp = vma_thp_gfp_mask(vma);
huge_gfp = limit_gfp_mask(huge_gfp, gfp);
- folio = shmem_alloc_and_add_folio(huge_gfp,
- inode, index, fault_mm, true);
+ folio = shmem_alloc_and_add_folio(vmf, huge_gfp,
+ inode, index, fault_mm, orders);
if (!IS_ERR(folio)) {
- count_vm_event(THP_FILE_ALLOC);
+ if (folio_test_pmd_mappable(folio))
+ count_vm_event(THP_FILE_ALLOC);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC);
+#endif
goto alloced;
}
if (PTR_ERR(folio) == -EEXIST)
goto repeat;
}
- folio = shmem_alloc_and_add_folio(gfp, inode, index, fault_mm, false);
+ folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0);
if (IS_ERR(folio)) {
error = PTR_ERR(folio);
if (error == -EEXIST)
@@ -2074,7 +2200,7 @@ repeat:
alloced:
alloced = true;
- if (folio_test_pmd_mappable(folio) &&
+ if (folio_test_large(folio) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
folio_next_index(folio) - 1) {
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -2283,6 +2409,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
unsigned long inflated_len;
unsigned long inflated_addr;
unsigned long inflated_offset;
+ unsigned long hpage_size;
if (len > TASK_SIZE)
return -ENOMEM;
@@ -2301,8 +2428,6 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (shmem_huge == SHMEM_HUGE_DENY)
return addr;
- if (len < HPAGE_PMD_SIZE)
- return addr;
if (flags & MAP_FIXED)
return addr;
/*
@@ -2314,8 +2439,11 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (uaddr == addr)
return addr;
+ hpage_size = HPAGE_PMD_SIZE;
if (shmem_huge != SHMEM_HUGE_FORCE) {
struct super_block *sb;
+ unsigned long __maybe_unused hpage_orders;
+ int order = 0;
if (file) {
VM_BUG_ON(file->f_op != &shmem_file_operations);
@@ -2328,18 +2456,38 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (IS_ERR(shm_mnt))
return addr;
sb = shm_mnt->mnt_sb;
+
+ /*
+ * Find the highest mTHP order used for anonymous shmem to
+ * provide a suitable alignment address.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hpage_orders = READ_ONCE(huge_shmem_orders_always);
+ hpage_orders |= READ_ONCE(huge_shmem_orders_within_size);
+ hpage_orders |= READ_ONCE(huge_shmem_orders_madvise);
+ if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+ hpage_orders |= READ_ONCE(huge_shmem_orders_inherit);
+
+ if (hpage_orders > 0) {
+ order = highest_order(hpage_orders);
+ hpage_size = PAGE_SIZE << order;
+ }
+#endif
}
- if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
+ if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER && !order)
return addr;
}
- offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
- if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
+ if (len < hpage_size)
return addr;
- if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
+
+ offset = (pgoff << PAGE_SHIFT) & (hpage_size - 1);
+ if (offset && offset + len < 2 * hpage_size)
+ return addr;
+ if ((addr & (hpage_size - 1)) == offset)
return addr;
- inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
+ inflated_len = len + hpage_size - PAGE_SIZE;
if (inflated_len > TASK_SIZE)
return addr;
if (inflated_len < len)
@@ -2352,10 +2500,10 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (inflated_addr & ~PAGE_MASK)
return addr;
- inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
+ inflated_offset = inflated_addr & (hpage_size - 1);
inflated_addr += offset - inflated_offset;
if (inflated_offset > offset)
- inflated_addr += HPAGE_PMD_SIZE;
+ inflated_addr += hpage_size;
if (inflated_addr > TASK_SIZE - len)
return addr;
@@ -2644,7 +2792,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
if (!*foliop) {
ret = -ENOMEM;
- folio = shmem_alloc_folio(gfp, info, pgoff);
+ folio = shmem_alloc_folio(gfp, 0, info, pgoff);
if (!folio)
goto out_unacct_blocks;
@@ -4695,6 +4843,12 @@ void __init shmem_init(void)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
+
+ /*
+ * Default to setting PMD-sized THP to inherit the global setting and
+ * disable all other multi-size THPs.
+ */
+ huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);
#endif
return;
@@ -4754,6 +4908,11 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
return -EINVAL;
+ /* Do not override huge allocation policy with non-PMD sized mTHP */
+ if (huge == SHMEM_HUGE_FORCE &&
+ huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER))
+ return -EINVAL;
+
shmem_huge = huge;
if (shmem_huge > SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
@@ -4761,6 +4920,84 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
}
struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
+static DEFINE_SPINLOCK(huge_shmem_orders_lock);
+
+static ssize_t thpsize_shmem_enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int order = to_thpsize(kobj)->order;
+ const char *output;
+
+ if (test_bit(order, &huge_shmem_orders_always))
+ output = "[always] inherit within_size advise never";
+ else if (test_bit(order, &huge_shmem_orders_inherit))
+ output = "always [inherit] within_size advise never";
+ else if (test_bit(order, &huge_shmem_orders_within_size))
+ output = "always inherit [within_size] advise never";
+ else if (test_bit(order, &huge_shmem_orders_madvise))
+ output = "always inherit within_size [advise] never";
+ else
+ output = "always inherit within_size advise [never]";
+
+ return sysfs_emit(buf, "%s\n", output);
+}
+
+static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int order = to_thpsize(kobj)->order;
+ ssize_t ret = count;
+
+ if (sysfs_streq(buf, "always")) {
+ spin_lock(&huge_shmem_orders_lock);
+ clear_bit(order, &huge_shmem_orders_inherit);
+ clear_bit(order, &huge_shmem_orders_madvise);
+ clear_bit(order, &huge_shmem_orders_within_size);
+ set_bit(order, &huge_shmem_orders_always);
+ spin_unlock(&huge_shmem_orders_lock);
+ } else if (sysfs_streq(buf, "inherit")) {
+ /* Do not override huge allocation policy with non-PMD sized mTHP */
+ if (shmem_huge == SHMEM_HUGE_FORCE &&
+ order != HPAGE_PMD_ORDER)
+ return -EINVAL;
+
+ spin_lock(&huge_shmem_orders_lock);
+ clear_bit(order, &huge_shmem_orders_always);
+ clear_bit(order, &huge_shmem_orders_madvise);
+ clear_bit(order, &huge_shmem_orders_within_size);
+ set_bit(order, &huge_shmem_orders_inherit);
+ spin_unlock(&huge_shmem_orders_lock);
+ } else if (sysfs_streq(buf, "within_size")) {
+ spin_lock(&huge_shmem_orders_lock);
+ clear_bit(order, &huge_shmem_orders_always);
+ clear_bit(order, &huge_shmem_orders_inherit);
+ clear_bit(order, &huge_shmem_orders_madvise);
+ set_bit(order, &huge_shmem_orders_within_size);
+ spin_unlock(&huge_shmem_orders_lock);
+ } else if (sysfs_streq(buf, "advise")) {
+ spin_lock(&huge_shmem_orders_lock);
+ clear_bit(order, &huge_shmem_orders_always);
+ clear_bit(order, &huge_shmem_orders_inherit);
+ clear_bit(order, &huge_shmem_orders_within_size);
+ set_bit(order, &huge_shmem_orders_madvise);
+ spin_unlock(&huge_shmem_orders_lock);
+ } else if (sysfs_streq(buf, "never")) {
+ spin_lock(&huge_shmem_orders_lock);
+ clear_bit(order, &huge_shmem_orders_always);
+ clear_bit(order, &huge_shmem_orders_inherit);
+ clear_bit(order, &huge_shmem_orders_within_size);
+ clear_bit(order, &huge_shmem_orders_madvise);
+ spin_unlock(&huge_shmem_orders_lock);
+ } else {
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct kobj_attribute thpsize_shmem_enabled_attr =
+ __ATTR(shmem_enabled, 0644, thpsize_shmem_enabled_show, thpsize_shmem_enabled_store);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */