aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@linaro.org>2021-12-09 13:57:57 +0100
committerDaniel Lezcano <daniel.lezcano@linaro.org>2021-12-09 13:57:57 +0100
commit8632987380765dee716d460640aa58d58d52998e (patch)
treef8f4a55fc95c1060f97d64ee4d6db5c9693bb794 /mm/vmalloc.c
parentclocksource/drivers/exynos_mct: Fix silly typo resulting in checkpatch warning (diff)
parentreset: Add of_reset_control_get_optional_exclusive() (diff)
downloadlinux-dev-8632987380765dee716d460640aa58d58d52998e.tar.xz
linux-dev-8632987380765dee716d460640aa58d58d52998e.zip
Merge branch 'reset/of-get-optional-exclusive' of git://git.pengutronix.de/pza/linux into timers/drivers/next
"Add optional variant of of_reset_control_get_exclusive(). If the requested reset is not specified in the device tree, this function returns NULL instead of an error." This dependency is needed for the Generic Timer Module (a.k.a OSTM) support for RZ/G2L. Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c209
1 files changed, 157 insertions, 52 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d5cd52805149..d2a00ad4e1dd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -44,6 +44,19 @@
#include "internal.h"
#include "pgalloc-track.h"
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
+
+static int __init set_nohugeiomap(char *str)
+{
+ ioremap_max_page_shift = PAGE_SHIFT;
+ return 0;
+}
+early_param("nohugeiomap", set_nohugeiomap);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
static bool __ro_after_init vmap_allow_huge = true;
@@ -298,15 +311,14 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
return err;
}
-int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
+int ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
int err;
- err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+ err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
+ ioremap_max_page_shift);
flush_cache_vmap(addr, end);
-
return err;
}
@@ -787,6 +799,28 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
+static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
+{
+ struct vmap_area *va = NULL;
+ struct rb_node *n = vmap_area_root.rb_node;
+
+ while (n) {
+ struct vmap_area *tmp;
+
+ tmp = rb_entry(n, struct vmap_area, rb_node);
+ if (tmp->va_end > addr) {
+ va = tmp;
+ if (tmp->va_start <= addr)
+ break;
+
+ n = n->rb_left;
+ } else
+ n = n->rb_right;
+ }
+
+ return va;
+}
+
static struct vmap_area *__find_vmap_area(unsigned long addr)
{
struct rb_node *n = vmap_area_root.rb_node;
@@ -1161,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size,
{
struct vmap_area *va;
struct rb_node *node;
- unsigned long length;
/* Start from the root. */
node = free_vmap_area_root.rb_node;
- /* Adjust the search size for alignment overhead. */
- length = size + align - 1;
-
while (node) {
va = rb_entry(node, struct vmap_area, rb_node);
- if (get_subtree_max_size(node->rb_left) >= length &&
+ if (get_subtree_max_size(node->rb_left) >= size &&
vstart < va->va_start) {
node = node->rb_left;
} else {
@@ -1182,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size,
/*
* Does not make sense to go deeper towards the right
* sub-tree if it does not have a free block that is
- * equal or bigger to the requested search length.
+ * equal or bigger to the requested search size.
*/
- if (get_subtree_max_size(node->rb_right) >= length) {
+ if (get_subtree_max_size(node->rb_right) >= size) {
node = node->rb_right;
continue;
}
@@ -1192,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size,
/*
* OK. We roll back and find the first right sub-tree,
* that will satisfy the search criteria. It can happen
- * only once due to "vstart" restriction.
+ * due to "vstart" restriction or an alignment overhead
+ * that is bigger then PAGE_SIZE.
*/
while ((node = rb_parent(node))) {
va = rb_entry(node, struct vmap_area, rb_node);
if (is_within_this_va(va, size, align, vstart))
return va;
- if (get_subtree_max_size(node->rb_right) >= length &&
+ if (get_subtree_max_size(node->rb_right) >= size &&
vstart <= va->va_start) {
+ /*
+ * Shift the vstart forward. Please note, we update it with
+ * parent's start address adding "1" because we do not want
+ * to enter same sub-tree after it has already been checked
+ * and no suitable free block found there.
+ */
+ vstart = va->va_start + 1;
node = node->rb_right;
break;
}
@@ -1231,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size,
}
static void
-find_vmap_lowest_match_check(unsigned long size)
+find_vmap_lowest_match_check(unsigned long size, unsigned long align)
{
struct vmap_area *va_1, *va_2;
unsigned long vstart;
@@ -1240,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size)
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
- va_1 = find_vmap_lowest_match(size, 1, vstart);
- va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
+ va_1 = find_vmap_lowest_match(size, align, vstart);
+ va_2 = find_vmap_lowest_linear_match(size, align, vstart);
if (va_1 != va_2)
pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
@@ -1420,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
return vend;
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
- find_vmap_lowest_match_check(size);
+ find_vmap_lowest_match_check(size, align);
#endif
return nva_start_addr;
@@ -1479,6 +1517,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
int node, gfp_t gfp_mask)
{
struct vmap_area *va;
+ unsigned long freed;
unsigned long addr;
int purged = 0;
int ret;
@@ -1542,13 +1581,12 @@ overflow:
goto retry;
}
- if (gfpflags_allow_blocking(gfp_mask)) {
- unsigned long freed = 0;
- blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
- if (freed > 0) {
- purged = 0;
- goto retry;
- }
+ freed = 0;
+ blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
+
+ if (freed > 0) {
+ purged = 0;
+ goto retry;
}
if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
@@ -2238,15 +2276,22 @@ void __init vm_area_add_early(struct vm_struct *vm)
*/
void __init vm_area_register_early(struct vm_struct *vm, size_t align)
{
- static size_t vm_init_off __initdata;
- unsigned long addr;
+ unsigned long addr = ALIGN(VMALLOC_START, align);
+ struct vm_struct *cur, **p;
- addr = ALIGN(VMALLOC_START + vm_init_off, align);
- vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
+ BUG_ON(vmap_initialized);
- vm->addr = (void *)addr;
+ for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) {
+ if ((unsigned long)cur->addr - addr >= vm->size)
+ break;
+ addr = ALIGN((unsigned long)cur->addr + cur->size, align);
+ }
- vm_area_add_early(vm);
+ BUG_ON(addr > VMALLOC_END - vm->size);
+ vm->addr = (void *)addr;
+ vm->next = *p;
+ *p = vm;
+ kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
}
static void vmap_init_free_space(void)
@@ -2709,6 +2754,13 @@ void *vmap(struct page **pages, unsigned int count,
might_sleep();
+ /*
+ * Your top guard is someone else's bottom guard. Not having a top
+ * guard compromises someone else's mappings too.
+ */
+ if (WARN_ON_ONCE(flags & VM_NO_GUARD))
+ flags &= ~VM_NO_GUARD;
+
if (count > totalram_pages())
return NULL;
@@ -2779,9 +2831,11 @@ EXPORT_SYMBOL_GPL(vmap_pfn);
static inline unsigned int
vm_area_alloc_pages(gfp_t gfp, int nid,
- unsigned int order, unsigned long nr_pages, struct page **pages)
+ unsigned int order, unsigned int nr_pages, struct page **pages)
{
unsigned int nr_allocated = 0;
+ struct page *page;
+ int i;
/*
* For order-0 pages we make use of bulk allocator, if
@@ -2789,10 +2843,44 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* to fails, fallback to a single page allocator that is
* more permissive.
*/
- if (!order)
- nr_allocated = alloc_pages_bulk_array_node(
- gfp, nid, nr_pages, pages);
- else
+ if (!order) {
+ while (nr_allocated < nr_pages) {
+ unsigned int nr, nr_pages_request;
+
+ /*
+ * A maximum allowed request is hard-coded and is 100
+ * pages per call. That is done in order to prevent a
+ * long preemption off scenario in the bulk-allocator
+ * so the range is [1:100].
+ */
+ nr_pages_request = min(100U, nr_pages - nr_allocated);
+
+ /* memory allocation should consider mempolicy, we can't
+ * wrongly use nearest node when nid == NUMA_NO_NODE,
+ * otherwise memory may be allocated in only one node,
+ * but mempolcy want to alloc memory by interleaving.
+ */
+ if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
+ nr = alloc_pages_bulk_array_mempolicy(gfp,
+ nr_pages_request,
+ pages + nr_allocated);
+
+ else
+ nr = alloc_pages_bulk_array_node(gfp, nid,
+ nr_pages_request,
+ pages + nr_allocated);
+
+ nr_allocated += nr;
+ cond_resched();
+
+ /*
+ * If zero or pages were obtained partly,
+ * fallback to a single page allocator.
+ */
+ if (nr != nr_pages_request)
+ break;
+ }
+ } else
/*
* Compound pages required for remap_vmalloc_page if
* high-order pages.
@@ -2800,11 +2888,15 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
gfp |= __GFP_COMP;
/* High-order pages or fallback path if "bulk" fails. */
+
while (nr_allocated < nr_pages) {
- struct page *page;
- int i;
+ if (fatal_signal_pending(current))
+ break;
- page = alloc_pages_node(nid, gfp, order);
+ if (nid == NUMA_NO_NODE)
+ page = alloc_pages(gfp, order);
+ else
+ page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
break;
@@ -2816,9 +2908,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
for (i = 0; i < (1U << order); i++)
pages[nr_allocated + i] = page + i;
- if (gfpflags_allow_blocking(gfp))
- cond_resched();
-
+ cond_resched();
nr_allocated += 1U << order;
}
@@ -2830,6 +2920,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t orig_gfp_mask = gfp_mask;
unsigned long addr = (unsigned long)area->addr;
unsigned long size = get_vm_area_size(area);
unsigned long array_size;
@@ -2850,7 +2941,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
if (!area->pages) {
- warn_alloc(gfp_mask, NULL,
+ warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, failed to allocated page array size %lu",
nr_small_pages * PAGE_SIZE, array_size);
free_vm_area(area);
@@ -2870,7 +2961,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via __vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- warn_alloc(gfp_mask, NULL,
+ warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);
goto fail;
@@ -2878,7 +2969,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
if (vmap_pages_range(addr, addr + size, prot, area->pages,
page_shift) < 0) {
- warn_alloc(gfp_mask, NULL,
+ warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, failed to map pages",
area->nr_pages * PAGE_SIZE);
goto fail;
@@ -2904,8 +2995,16 @@ fail:
* @caller: caller's return address
*
* Allocate enough pages to cover @size from the page level
- * allocator with @gfp_mask flags. Map them into contiguous
- * kernel virtual space, using a pagetable protection of @prot.
+ * allocator with @gfp_mask flags. Please note that the full set of gfp
+ * flags are not supported. GFP_KERNEL would be a preferred allocation mode
+ * but GFP_NOFS and GFP_NOIO are supported as well. Zone modifiers are not
+ * supported. From the reclaim modifiers__GFP_DIRECT_RECLAIM is required (aka
+ * GFP_NOWAIT is not supported) and only __GFP_NOFAIL is supported (aka
+ * __GFP_NORETRY and __GFP_RETRY_MAYFAIL are not supported).
+ * __GFP_NOWARN can be used to suppress error messages about failures.
+ *
+ * Map them into contiguous kernel virtual space, using a pagetable
+ * protection of @prot.
*
* Return: the address of the area or %NULL on failure
*/
@@ -3267,9 +3366,14 @@ long vread(char *buf, char *addr, unsigned long count)
count = -(unsigned long) addr;
spin_lock(&vmap_area_lock);
- va = __find_vmap_area((unsigned long)addr);
+ va = find_vmap_area_exceed_addr((unsigned long)addr);
if (!va)
goto finished;
+
+ /* no intersects with alive vmap_area */
+ if ((unsigned long)addr + count <= va->va_start)
+ goto finished;
+
list_for_each_entry_from(va, &vmap_area_list, list) {
if (!count)
break;
@@ -3794,6 +3898,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
{
if (IS_ENABLED(CONFIG_NUMA)) {
unsigned int nr, *counters = m->private;
+ unsigned int step = 1U << vm_area_page_order(v);
if (!counters)
return;
@@ -3805,9 +3910,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
- for (nr = 0; nr < v->nr_pages; nr++)
- counters[page_to_nid(v->pages[nr])]++;
-
+ for (nr = 0; nr < v->nr_pages; nr += step)
+ counters[page_to_nid(v->pages[nr])] += step;
for_each_node_state(nr, N_HIGH_MEMORY)
if (counters[nr])
seq_printf(m, " N%u=%u", nr, counters[nr]);
@@ -3843,7 +3947,7 @@ static int s_show(struct seq_file *m, void *p)
(void *)va->va_start, (void *)va->va_end,
va->va_end - va->va_start);
- return 0;
+ goto final;
}
v = va->vm;
@@ -3884,6 +3988,7 @@ static int s_show(struct seq_file *m, void *p)
/*
* As a final step, dump "unpurged" areas.
*/
+final:
if (list_is_last(&va->list, &vmap_area_list))
show_purge_info(m);