aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2022-01-15 12:09:44 -0800
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2022-01-15 12:09:44 -0800
commit762f99f4f3cb41a775b5157dd761217beba65873 (patch)
tree2e93410a577dfff1202ea5b0d109460b97650e58 /mm/vmalloc.c
parentInput: zinitix - make sure the IRQ is allocated before it gets enabled (diff)
parentInput: ti_am335x_tsc - fix a typo in a comment (diff)
downloadlinux-dev-762f99f4f3cb41a775b5157dd761217beba65873.tar.xz
linux-dev-762f99f4f3cb41a775b5157dd761217beba65873.zip
Merge branch 'next' into for-linus
Prepare input updates for 5.17 merge window.
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c246
1 files changed, 178 insertions, 68 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d0a7d89be091..e8a807c78110 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,6 +25,7 @@
#include <linux/notifier.h>
#include <linux/rbtree.h>
#include <linux/xarray.h>
+#include <linux/io.h>
#include <linux/rcupdate.h>
#include <linux/pfn.h>
#include <linux/kmemleak.h>
@@ -36,12 +37,26 @@
#include <linux/overflow.h>
#include <linux/pgtable.h>
#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
#include "internal.h"
#include "pgalloc-track.h"
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
+
+static int __init set_nohugeiomap(char *str)
+{
+ ioremap_max_page_shift = PAGE_SHIFT;
+ return 0;
+}
+early_param("nohugeiomap", set_nohugeiomap);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
static bool __ro_after_init vmap_allow_huge = true;
@@ -83,10 +98,11 @@ static void free_work(struct work_struct *w)
/*** Page table manipulation functions ***/
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pte_t *pte;
u64 pfn;
+ unsigned long size = PAGE_SIZE;
pfn = phys_addr >> PAGE_SHIFT;
pte = pte_alloc_kernel_track(pmd, addr, mask);
@@ -94,9 +110,22 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
+
+#ifdef CONFIG_HUGETLB_PAGE
+ size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
+ if (size != PAGE_SIZE) {
+ pte_t entry = pfn_pte(pfn, prot);
+
+ entry = pte_mkhuge(entry);
+ entry = arch_make_huge_pte(entry, ilog2(size), 0);
+ set_huge_pte_at(&init_mm, addr, pte, entry);
+ pfn += PFN_DOWN(size);
+ continue;
+ }
+#endif
set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte += PFN_DOWN(size), addr += size, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
return 0;
}
@@ -145,7 +174,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
continue;
}
- if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
return -ENOMEM;
} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
@@ -282,15 +311,14 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
return err;
}
-int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
+int ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
int err;
- err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+ err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
+ ioremap_max_page_shift);
flush_cache_vmap(addr, end);
-
return err;
}
@@ -771,6 +799,28 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
+static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
+{
+ struct vmap_area *va = NULL;
+ struct rb_node *n = vmap_area_root.rb_node;
+
+ while (n) {
+ struct vmap_area *tmp;
+
+ tmp = rb_entry(n, struct vmap_area, rb_node);
+ if (tmp->va_end > addr) {
+ va = tmp;
+ if (tmp->va_start <= addr)
+ break;
+
+ n = n->rb_left;
+ } else
+ n = n->rb_right;
+ }
+
+ return va;
+}
+
static struct vmap_area *__find_vmap_area(unsigned long addr)
{
struct rb_node *n = vmap_area_root.rb_node;
@@ -1463,6 +1513,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
int node, gfp_t gfp_mask)
{
struct vmap_area *va;
+ unsigned long freed;
unsigned long addr;
int purged = 0;
int ret;
@@ -1526,13 +1577,12 @@ overflow:
goto retry;
}
- if (gfpflags_allow_blocking(gfp_mask)) {
- unsigned long freed = 0;
- blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
- if (freed > 0) {
- purged = 0;
- goto retry;
- }
+ freed = 0;
+ blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
+
+ if (freed > 0) {
+ purged = 0;
+ goto retry;
}
if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
@@ -1592,6 +1642,7 @@ static DEFINE_MUTEX(vmap_purge_lock);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
+#ifdef CONFIG_X86_64
/*
* called before a call to iounmap() if the caller wants vm_area_struct's
* immediately freed.
@@ -1600,6 +1651,7 @@ void set_iounmap_nonlazy(void)
{
atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
}
+#endif /* CONFIG_X86_64 */
/*
* Purges all lazily-freed vmap areas.
@@ -2567,6 +2619,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
BUG_ON(!page);
__free_pages(page, page_order);
+ cond_resched();
}
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2758,6 +2811,77 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
EXPORT_SYMBOL_GPL(vmap_pfn);
#endif /* CONFIG_VMAP_PFN */
+static inline unsigned int
+vm_area_alloc_pages(gfp_t gfp, int nid,
+ unsigned int order, unsigned int nr_pages, struct page **pages)
+{
+ unsigned int nr_allocated = 0;
+ struct page *page;
+ int i;
+
+ /*
+ * For order-0 pages we make use of bulk allocator, if
+ * the page array is partly or not at all populated due
+ * to fails, fallback to a single page allocator that is
+ * more permissive.
+ */
+ if (!order && nid != NUMA_NO_NODE) {
+ while (nr_allocated < nr_pages) {
+ unsigned int nr, nr_pages_request;
+
+ /*
+ * A maximum allowed request is hard-coded and is 100
+ * pages per call. That is done in order to prevent a
+ * long preemption off scenario in the bulk-allocator
+ * so the range is [1:100].
+ */
+ nr_pages_request = min(100U, nr_pages - nr_allocated);
+
+ nr = alloc_pages_bulk_array_node(gfp, nid,
+ nr_pages_request, pages + nr_allocated);
+
+ nr_allocated += nr;
+ cond_resched();
+
+ /*
+ * If zero or pages were obtained partly,
+ * fallback to a single page allocator.
+ */
+ if (nr != nr_pages_request)
+ break;
+ }
+ } else if (order)
+ /*
+ * Compound pages required for remap_vmalloc_page if
+ * high-order pages.
+ */
+ gfp |= __GFP_COMP;
+
+ /* High-order pages or fallback path if "bulk" fails. */
+
+ while (nr_allocated < nr_pages) {
+ if (nid == NUMA_NO_NODE)
+ page = alloc_pages(gfp, order);
+ else
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ break;
+
+ /*
+ * Careful, we allocate and map page-order pages, but
+ * tracking is done per PAGE_SIZE page so as to keep the
+ * vm_struct APIs independent of the physical/mapped size.
+ */
+ for (i = 0; i < (1U << order); i++)
+ pages[nr_allocated + i] = page + i;
+
+ cond_resched();
+ nr_allocated += 1U << order;
+ }
+
+ return nr_allocated;
+}
+
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, unsigned int page_shift,
int node)
@@ -2768,8 +2892,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
unsigned long array_size;
unsigned int nr_small_pages = size >> PAGE_SHIFT;
unsigned int page_order;
- struct page **pages;
- unsigned int i;
array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
gfp_mask |= __GFP_NOWARN;
@@ -2778,62 +2900,44 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+ area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
area->caller);
} else {
- pages = kmalloc_node(array_size, nested_gfp, node);
+ area->pages = kmalloc_node(array_size, nested_gfp, node);
}
- if (!pages) {
- free_vm_area(area);
+ if (!area->pages) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "page array size %lu allocation failed",
- nr_small_pages * PAGE_SIZE, array_size);
+ "vmalloc error: size %lu, failed to allocated page array size %lu",
+ nr_small_pages * PAGE_SIZE, array_size);
+ free_vm_area(area);
return NULL;
}
- area->pages = pages;
- area->nr_pages = nr_small_pages;
set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
-
page_order = vm_area_page_order(area);
- /*
- * Careful, we allocate and map page_order pages, but tracking is done
- * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
- * the physical/mapped size.
- */
- for (i = 0; i < area->nr_pages; i += 1U << page_order) {
- struct page *page;
- int p;
-
- /* Compound pages required for remap_vmalloc_page */
- page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
- if (unlikely(!page)) {
- /* Successfully allocated i pages, free them in __vfree() */
- area->nr_pages = i;
- atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "page order %u allocation failed",
- area->nr_pages * PAGE_SIZE, page_order);
- goto fail;
- }
+ area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
+ page_order, nr_small_pages, area->pages);
- for (p = 0; p < (1U << page_order); p++)
- area->pages[i + p] = page + p;
+ atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (gfpflags_allow_blocking(gfp_mask))
- cond_resched();
+ /*
+ * If not enough pages were obtained to accomplish an
+ * allocation request, free them via __vfree() if any.
+ */
+ if (area->nr_pages != nr_small_pages) {
+ warn_alloc(gfp_mask, NULL,
+ "vmalloc error: size %lu, page order %u, failed to allocate pages",
+ area->nr_pages * PAGE_SIZE, page_order);
+ goto fail;
}
- atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0) {
+ if (vmap_pages_range(addr, addr + size, prot, area->pages,
+ page_shift) < 0) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "failed to map pages",
- area->nr_pages * PAGE_SIZE);
+ "vmalloc error: size %lu, failed to map pages",
+ area->nr_pages * PAGE_SIZE);
goto fail;
}
@@ -2878,13 +2982,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if ((size >> PAGE_SHIFT) > totalram_pages()) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "exceeds total pages", real_size);
+ "vmalloc error: size %lu, exceeds total pages",
+ real_size);
return NULL;
}
- if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP) &&
- arch_vmap_pmd_supported(prot)) {
+ if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
unsigned long size_per_node;
/*
@@ -2897,11 +3000,13 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
size_per_node = size;
if (node == NUMA_NO_NODE)
size_per_node /= num_online_nodes();
- if (size_per_node >= PMD_SIZE) {
+ if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
shift = PMD_SHIFT;
- align = max(real_align, 1UL << shift);
- size = ALIGN(real_size, 1UL << shift);
- }
+ else
+ shift = arch_vmap_pte_supported_shift(size_per_node);
+
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
}
again:
@@ -2910,8 +3015,8 @@ again:
gfp_mask, caller);
if (!area) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "vm_struct allocation failed", real_size);
+ "vmalloc error: size %lu, vm_struct allocation failed",
+ real_size);
goto fail;
}
@@ -3219,9 +3324,14 @@ long vread(char *buf, char *addr, unsigned long count)
count = -(unsigned long) addr;
spin_lock(&vmap_area_lock);
- va = __find_vmap_area((unsigned long)addr);
+ va = find_vmap_area_exceed_addr((unsigned long)addr);
if (!va)
goto finished;
+
+ /* no intersects with alive vmap_area */
+ if ((unsigned long)addr + count <= va->va_start)
+ goto finished;
+
list_for_each_entry_from(va, &vmap_area_list, list) {
if (!count)
break;