aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memblock.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/memblock.c495
1 files changed, 298 insertions, 197 deletions
diff --git a/mm/memblock.c b/mm/memblock.c
index eba94ee3de0b..511d4783dcf1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -29,6 +29,10 @@
# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
#endif
+#ifndef INIT_MEMBLOCK_MEMORY_REGIONS
+#define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS
+#endif
+
/**
* DOC: memblock overview
*
@@ -44,19 +48,20 @@
* in the system, for instance when the memory is restricted with
* ``mem=`` command line parameter
* * ``reserved`` - describes the regions that were allocated
- * * ``physmap`` - describes the actual physical memory regardless of
- * the possible restrictions; the ``physmap`` type is only available
- * on some architectures.
+ * * ``physmem`` - describes the actual physical memory available during
+ * boot regardless of the possible restrictions and memory hot(un)plug;
+ * the ``physmem`` type is only available on some architectures.
*
- * Each region is represented by :c:type:`struct memblock_region` that
+ * Each region is represented by struct memblock_region that
* defines the region extents, its attributes and NUMA node id on NUMA
- * systems. Every memory type is described by the :c:type:`struct
- * memblock_type` which contains an array of memory regions along with
- * the allocator metadata. The memory types are nicely wrapped with
- * :c:type:`struct memblock`. This structure is statically initialzed
- * at build time. The region arrays for the "memory" and "reserved"
- * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the
- * "physmap" type to %INIT_PHYSMEM_REGIONS.
+ * systems. Every memory type is described by the struct memblock_type
+ * which contains an array of memory regions along with
+ * the allocator metadata. The "memory" and "reserved" types are nicely
+ * wrapped with struct memblock. This structure is statically
+ * initialized at build time. The region arrays are initially sized to
+ * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and
+ * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array
+ * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS.
* The memblock_allow_resize() enables automatic resizing of the region
* arrays during addition of new regions. This feature should be used
* with care so that memory allocated for the region array will not
@@ -78,7 +83,7 @@
* * memblock_alloc*() - these functions return the **virtual** address
* of the allocated memory.
*
- * Note, that both API variants use implict assumptions about allowed
+ * Note, that both API variants use implicit assumptions about allowed
* memory ranges and the fallback methods. Consult the documentation
* of memblock_alloc_internal() and memblock_alloc_range_nid()
* functions for more elaborate description.
@@ -87,11 +92,11 @@
* function frees all the memory to the buddy page allocator.
*
* Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the
- * memblock data structures will be discarded after the system
- * initialization completes.
+ * memblock data structures (except "physmem") will be discarded after the
+ * system initialization completes.
*/
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
struct pglist_data __refdata contig_page_data;
EXPORT_SYMBOL(contig_page_data);
#endif
@@ -101,16 +106,16 @@ unsigned long min_low_pfn;
unsigned long max_pfn;
unsigned long long max_possible_pfn;
-static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
#endif
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
- .memory.max = INIT_MEMBLOCK_REGIONS,
+ .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
@@ -118,18 +123,39 @@ struct memblock memblock __initdata_memblock = {
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
-#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
- .physmem.regions = memblock_physmem_init_regions,
- .physmem.cnt = 1, /* empty dummy entry */
- .physmem.max = INIT_PHYSMEM_REGIONS,
- .physmem.name = "physmem",
-#endif
-
.bottom_up = false,
.current_limit = MEMBLOCK_ALLOC_ANYWHERE,
};
-int memblock_debug __initdata_memblock;
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+struct memblock_type physmem = {
+ .regions = memblock_physmem_init_regions,
+ .cnt = 1, /* empty dummy entry */
+ .max = INIT_PHYSMEM_REGIONS,
+ .name = "physmem",
+};
+#endif
+
+/*
+ * keep a pointer to &memblock.memory in the text section to use it in
+ * __next_mem_range() and its helpers.
+ * For architectures that do not keep memblock data after init, this
+ * pointer will be reset to NULL at memblock_discard()
+ */
+static __refdata struct memblock_type *memblock_memory = &memblock.memory;
+
+#define for_each_memblock_type(i, memblock_type, rgn) \
+ for (i = 0, rgn = &memblock_type->regions[0]; \
+ i < memblock_type->cnt; \
+ i++, rgn = &memblock_type->regions[i])
+
+#define memblock_dbg(fmt, ...) \
+ do { \
+ if (memblock_debug) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static int memblock_debug __initdata_memblock;
static bool system_has_some_mirror __initdata_memblock = false;
static int memblock_can_resize __initdata_memblock;
static int memblock_memory_in_slab __initdata_memblock = 0;
@@ -160,6 +186,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
{
unsigned long i;
+ memblock_cap_size(base, &size);
+
for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size))
@@ -253,14 +281,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
*
* Find @size free area aligned to @align in the specified range and node.
*
- * When allocation direction is bottom-up, the @start should be greater
- * than the end of the kernel image. Otherwise, it will be trimmed. The
- * reason is that we want the bottom-up allocation just near the kernel
- * image so it is highly likely that the allocated memory and the kernel
- * will reside in the same node.
- *
- * If bottom-up allocation failed, will try to allocate memory top-down.
- *
* Return:
* Found address on success, 0 on failure.
*/
@@ -269,50 +289,21 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid,
enum memblock_flags flags)
{
- phys_addr_t kernel_end, ret;
-
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
- end == MEMBLOCK_ALLOC_KASAN)
+ end == MEMBLOCK_ALLOC_NOLEAKTRACE)
end = memblock.current_limit;
/* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end);
- kernel_end = __pa_symbol(_end);
- /*
- * try bottom-up allocation only when bottom-up mode
- * is set and @end is above the kernel image.
- */
- if (memblock_bottom_up() && end > kernel_end) {
- phys_addr_t bottom_up_start;
-
- /* make sure we will allocate above the kernel */
- bottom_up_start = max(start, kernel_end);
-
- /* ok, try bottom-up allocation first */
- ret = __memblock_find_range_bottom_up(bottom_up_start, end,
- size, align, nid, flags);
- if (ret)
- return ret;
-
- /*
- * we always limit bottom-up allocation above the kernel,
- * but top-down allocation doesn't have the limit, so
- * retrying top-down allocation may succeed when bottom-up
- * allocation failed.
- *
- * bottom-up allocation is expected to be fail very rarely,
- * so we use WARN_ONCE() here to see the stack trace if
- * fail happens.
- */
- WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
- "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
- }
-
- return __memblock_find_range_top_down(start, end, size, align, nid,
- flags);
+ if (memblock_bottom_up())
+ return __memblock_find_range_bottom_up(start, end, size, align,
+ nid, flags);
+ else
+ return __memblock_find_range_top_down(start, end, size, align,
+ nid, flags);
}
/**
@@ -328,7 +319,7 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
* Return:
* Found address on success, 0 on failure.
*/
-phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
+static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
phys_addr_t end, phys_addr_t size,
phys_addr_t align)
{
@@ -340,7 +331,7 @@ again:
NUMA_NO_NODE, flags);
if (!ret && (flags & MEMBLOCK_MIRROR)) {
- pr_warn("Could not allocate %pap bytes of mirrored memory\n",
+ pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
&size);
flags &= ~MEMBLOCK_MIRROR;
goto again;
@@ -379,15 +370,23 @@ void __init memblock_discard(void)
addr = __pa(memblock.reserved.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.reserved.max);
- __memblock_free_late(addr, size);
+ if (memblock_reserved_in_slab)
+ kfree(memblock.reserved.regions);
+ else
+ memblock_free_late(addr, size);
}
if (memblock.memory.regions != memblock_memory_init_regions) {
addr = __pa(memblock.memory.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.memory.max);
- __memblock_free_late(addr, size);
+ if (memblock_memory_in_slab)
+ kfree(memblock.memory.regions);
+ else
+ memblock_free_late(addr, size);
}
+
+ memblock_memory = NULL;
}
#endif
@@ -483,7 +482,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
kfree(old_array);
else if (old_array != memblock_memory_init_regions &&
old_array != memblock_reserved_init_regions)
- memblock_free(__pa(old_array), old_alloc_size);
+ memblock_free(old_array, old_alloc_size);
/*
* Reserve the new array if that comes from the memblock. Otherwise, we
@@ -598,6 +597,17 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
type->total_size = size;
return 0;
}
+
+ /*
+ * The worst case is when new range overlaps all existing regions,
+ * then we'll need type->cnt + 1 empty regions in @type. So if
+ * type->cnt * 2 + 1 is less than type->max, we know
+ * that there is enough empty regions in @type, and we can insert
+ * regions directly.
+ */
+ if (type->cnt * 2 + 1 < type->max)
+ insert = true;
+
repeat:
/*
* The following is executed twice. Once with %false @insert and
@@ -620,7 +630,7 @@ repeat:
* area, insert that portion.
*/
if (rbase > base) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#ifdef CONFIG_NUMA
WARN_ON(nid != memblock_get_region_node(rgn));
#endif
WARN_ON(flags != rgn->flags);
@@ -666,6 +676,7 @@ repeat:
* @base: base address of the new region
* @size: size of the new region
* @nid: nid of the new region
+ * @flags: flags of the new region
*
* Add new memblock region [@base, @base + @size) to the "memory"
* type. See memblock_add_range() description for mode details
@@ -674,9 +685,14 @@ repeat:
* 0 on success, -errno on failure.
*/
int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
- int nid)
+ int nid, enum memblock_flags flags)
{
- return memblock_add_range(&memblock.memory, base, size, nid, 0);
+ phys_addr_t end = base + size - 1;
+
+ memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__,
+ &base, &end, nid, flags, (void *)_RET_IP_);
+
+ return memblock_add_range(&memblock.memory, base, size, nid, flags);
}
/**
@@ -802,14 +818,28 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
}
/**
- * memblock_free - free boot memory block
+ * memblock_free - free boot memory allocation
+ * @ptr: starting address of the boot memory allocation
+ * @size: size of the boot memory block in bytes
+ *
+ * Free boot memory block previously allocated by memblock_alloc_xx() API.
+ * The freeing memory will not be released to the buddy allocator.
+ */
+void __init_memblock memblock_free(void *ptr, size_t size)
+{
+ if (ptr)
+ memblock_phys_free(__pa(ptr), size);
+}
+
+/**
+ * memblock_phys_free - free boot memory block
* @base: phys starting address of the boot memory block
* @size: size of the boot memory block in bytes
*
* Free boot memory block previously allocated by memblock_alloc_xx() API.
* The freeing memory will not be released to the buddy allocator.
*/
-int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
{
phys_addr_t end = base + size - 1;
@@ -838,7 +868,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
&base, &end, (void *)_RET_IP_);
- return memblock_add_range(&memblock.physmem, base, size, MAX_NUMNODES, 0);
+ return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0);
}
#endif
@@ -847,7 +877,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
* @base: base address of the region
* @size: size of the region
* @set: set or clear the flag
- * @flag: the flag to udpate
+ * @flag: the flag to update
*
* This function isolates region [@base, @base + @size), and sets/clears flag
*
@@ -909,6 +939,9 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
*/
int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
{
+ if (!mirrored_kernelcore)
+ return 0;
+
system_has_some_mirror = true;
return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
@@ -919,6 +952,14 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
* @base: the base phys addr of the region
* @size: the size of the region
*
+ * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the
+ * direct mapping of the physical memory. These regions will still be
+ * covered by the memory map. The struct page representing NOMAP memory
+ * frames in the memory map will be PageReserved()
+ *
+ * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from
+ * memblock, the caller must inform kmemleak to ignore that memory
+ *
* Return: 0 on success, -errno on failure.
*/
int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
@@ -938,48 +979,23 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
}
-/**
- * __next_reserved_mem_region - next function for for_each_reserved_region()
- * @idx: pointer to u64 loop variable
- * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
- * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL
- *
- * Iterate over all reserved memory regions.
- */
-void __init_memblock __next_reserved_mem_region(u64 *idx,
- phys_addr_t *out_start,
- phys_addr_t *out_end)
-{
- struct memblock_type *type = &memblock.reserved;
-
- if (*idx < type->cnt) {
- struct memblock_region *r = &type->regions[*idx];
- phys_addr_t base = r->base;
- phys_addr_t size = r->size;
-
- if (out_start)
- *out_start = base;
- if (out_end)
- *out_end = base + size - 1;
-
- *idx += 1;
- return;
- }
-
- /* signal end of iteration */
- *idx = ULLONG_MAX;
-}
-
-static bool should_skip_region(struct memblock_region *m, int nid, int flags)
+static bool should_skip_region(struct memblock_type *type,
+ struct memblock_region *m,
+ int nid, int flags)
{
int m_nid = memblock_get_region_node(m);
+ /* we never skip regions when iterating memblock.reserved or physmem */
+ if (type != memblock_memory)
+ return false;
+
/* only memory regions are associated with nodes, check it */
if (nid != NUMA_NO_NODE && nid != m_nid)
return true;
/* skip hotpluggable memory regions if needed */
- if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+ if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
+ !(flags & MEMBLOCK_HOTPLUG))
return true;
/* if we want mirror memory skip non-mirror memory regions */
@@ -990,6 +1006,10 @@ static bool should_skip_region(struct memblock_region *m, int nid, int flags)
if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
return true;
+ /* skip driver-managed memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m))
+ return true;
+
return false;
}
@@ -1019,12 +1039,10 @@ static bool should_skip_region(struct memblock_region *m, int nid, int flags)
* As both region arrays are sorted, the function advances the two indices
* in lockstep and returns each intersection.
*/
-void __init_memblock __next_mem_range(u64 *idx, int nid,
- enum memblock_flags flags,
- struct memblock_type *type_a,
- struct memblock_type *type_b,
- phys_addr_t *out_start,
- phys_addr_t *out_end, int *out_nid)
+void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
+ struct memblock_type *type_a,
+ struct memblock_type *type_b, phys_addr_t *out_start,
+ phys_addr_t *out_end, int *out_nid)
{
int idx_a = *idx & 0xffffffff;
int idx_b = *idx >> 32;
@@ -1040,7 +1058,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
phys_addr_t m_end = m->base + m->size;
int m_nid = memblock_get_region_node(m);
- if (should_skip_region(m, nid, flags))
+ if (should_skip_region(type_a, m, nid, flags))
continue;
if (!type_b) {
@@ -1144,7 +1162,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
phys_addr_t m_end = m->base + m->size;
int m_nid = memblock_get_region_node(m);
- if (should_skip_region(m, nid, flags))
+ if (should_skip_region(type_a, m, nid, flags))
continue;
if (!type_b) {
@@ -1197,7 +1215,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
*idx = ULLONG_MAX;
}
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
* Common iterator interface used to define for_each_mem_pfn_range().
*/
@@ -1207,13 +1224,15 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
{
struct memblock_type *type = &memblock.memory;
struct memblock_region *r;
+ int r_nid;
while (++*idx < type->cnt) {
r = &type->regions[*idx];
+ r_nid = memblock_get_region_node(r);
if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
continue;
- if (nid == MAX_NUMNODES || nid == r->nid)
+ if (nid == MAX_NUMNODES || nid == r_nid)
break;
}
if (*idx >= type->cnt) {
@@ -1226,7 +1245,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
if (out_end_pfn)
*out_end_pfn = PFN_DOWN(r->base + r->size);
if (out_nid)
- *out_nid = r->nid;
+ *out_nid = r_nid;
}
/**
@@ -1245,6 +1264,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
struct memblock_type *type, int nid)
{
+#ifdef CONFIG_NUMA
int start_rgn, end_rgn;
int i, ret;
@@ -1256,9 +1276,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
memblock_set_region_node(&type->regions[i], nid);
memblock_merge_regions(type);
+#endif
return 0;
}
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/**
* __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
@@ -1281,11 +1302,10 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
{
int zone_nid = zone_to_nid(zone);
phys_addr_t spa, epa;
- int nid;
__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
&memblock.memory, &memblock.reserved,
- &spa, &epa, &nid);
+ &spa, &epa, NULL);
while (*idx != U64_MAX) {
unsigned long epfn = PFN_DOWN(epa);
@@ -1312,7 +1332,7 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
&memblock.memory, &memblock.reserved,
- &spa, &epa, &nid);
+ &spa, &epa, NULL);
}
/* signal end of iteration */
@@ -1343,13 +1363,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
* from the regions with mirroring enabled and then retried from any
* memory region.
*
- * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for
- * allocated boot memory block, so that it is never reported as leaks.
+ * In addition, function using kmemleak_alloc_phys for allocated boot
+ * memory block, it is never reported as leaks.
*
* Return:
* Physical address of allocated memory block on success, %0 on failure.
*/
-static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
+phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid,
bool exact_nid)
@@ -1382,7 +1402,7 @@ again:
if (flags & MEMBLOCK_MIRROR) {
flags &= ~MEMBLOCK_MIRROR;
- pr_warn("Could not allocate %pap bytes of mirrored memory\n",
+ pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
&size);
goto again;
}
@@ -1390,15 +1410,18 @@ again:
return 0;
done:
- /* Skip kmemleak for kasan_init() due to high volume. */
- if (end != MEMBLOCK_ALLOC_KASAN)
+ /*
+ * Skip kmemleak for those places like kasan_init() and
+ * early_pgtable_alloc() due to high volume.
+ */
+ if (end != MEMBLOCK_ALLOC_NOLEAKTRACE)
/*
- * The min_count is set to 0 so that memblock allocated
- * blocks are never reported as leaks. This is because many
- * of these blocks are only referred via the physical
- * address which is not looked up by kmemleak.
+ * Memblock allocated blocks are never reported as
+ * leaks. This is because many of these blocks are
+ * only referred via the physical address which is
+ * not looked up by kmemleak.
*/
- kmemleak_alloc_phys(found, size, 0, 0);
+ kmemleak_alloc_phys(found, size, 0);
return found;
}
@@ -1420,12 +1443,15 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
phys_addr_t start,
phys_addr_t end)
{
+ memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n",
+ __func__, (u64)size, (u64)align, &start, &end,
+ (void *)_RET_IP_);
return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
false);
}
/**
- * memblock_phys_alloc_try_nid - allocate a memory block from specified MUMA node
+ * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node
* @size: size of memory block to be allocated in bytes
* @align: alignment of the region and block's size
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -1518,18 +1544,12 @@ void * __init memblock_alloc_exact_nid_raw(
phys_addr_t min_addr, phys_addr_t max_addr,
int nid)
{
- void *ptr;
-
memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
- ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid, true);
- if (ptr && size > 0)
- page_init_poison(ptr, size);
-
- return ptr;
+ return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+ true);
}
/**
@@ -1556,18 +1576,12 @@ void * __init memblock_alloc_try_nid_raw(
phys_addr_t min_addr, phys_addr_t max_addr,
int nid)
{
- void *ptr;
-
memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
- ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid, false);
- if (ptr && size > 0)
- page_init_poison(ptr, size);
-
- return ptr;
+ return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+ false);
}
/**
@@ -1606,7 +1620,7 @@ void * __init memblock_alloc_try_nid(
}
/**
- * __memblock_free_late - free pages directly to buddy allocator
+ * memblock_free_late - free pages directly to buddy allocator
* @base: phys starting address of the boot memory block
* @size: size of the boot memory block in bytes
*
@@ -1614,7 +1628,7 @@ void * __init memblock_alloc_try_nid(
* down, but we are still initializing the system. Pages are released directly
* to the buddy allocator.
*/
-void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
+void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
{
phys_addr_t cursor, end;
@@ -1645,23 +1659,6 @@ phys_addr_t __init_memblock memblock_reserved_size(void)
return memblock.reserved.total_size;
}
-phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
-{
- unsigned long pages = 0;
- struct memblock_region *r;
- unsigned long start_pfn, end_pfn;
-
- for_each_memblock(memory, r) {
- start_pfn = memblock_region_memory_base_pfn(r);
- end_pfn = memblock_region_memory_end_pfn(r);
- start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
- end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
- pages += end_pfn - start_pfn;
- }
-
- return PFN_PHYS(pages);
-}
-
/* lowest address */
phys_addr_t __init_memblock memblock_start_of_DRAM(void)
{
@@ -1685,7 +1682,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
* the memory memblock regions, if the @limit exceeds the total size
* of those regions, max_addr will keep original value PHYS_ADDR_MAX
*/
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
if (limit <= r->size) {
max_addr = r->base + limit;
break;
@@ -1698,7 +1695,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
void __init memblock_enforce_memory_limit(phys_addr_t limit)
{
- phys_addr_t max_addr = PHYS_ADDR_MAX;
+ phys_addr_t max_addr;
if (!limit)
return;
@@ -1724,6 +1721,11 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
if (!size)
return;
+ if (!memblock_memory->total_size) {
+ pr_warn("%s: No memory registered yet\n", __func__);
+ return;
+ }
+
ret = memblock_isolate_range(&memblock.memory, base, size,
&start_rgn, &end_rgn);
if (ret)
@@ -1797,7 +1799,6 @@ bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
return !memblock_is_nomap(&memblock.memory.regions[i]);
}
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
unsigned long *start_pfn, unsigned long *end_pfn)
{
@@ -1810,9 +1811,8 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
*start_pfn = PFN_DOWN(type->regions[mid].base);
*end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
- return type->regions[mid].nid;
+ return memblock_get_region_node(&type->regions[mid]);
}
-#endif
/**
* memblock_is_region_memory - check if a region is a subset of memory
@@ -1848,7 +1848,6 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz
*/
bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
{
- memblock_cap_size(base, &size);
return memblock_overlaps_region(&memblock.reserved, base, size);
}
@@ -1857,7 +1856,7 @@ void __init_memblock memblock_trim_memory(phys_addr_t align)
phys_addr_t start, end, orig_start, orig_end;
struct memblock_region *r;
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
orig_start = r->base;
orig_end = r->base + r->size;
start = round_up(orig_start, align);
@@ -1903,7 +1902,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
size = rgn->size;
end = base + size - 1;
flags = rgn->flags;
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#ifdef CONFIG_NUMA
if (memblock_get_region_node(rgn) != MAX_NUMNODES)
snprintf(nid_buf, sizeof(nid_buf), " on node %d",
memblock_get_region_node(rgn));
@@ -1913,7 +1912,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
}
}
-void __init_memblock __memblock_dump_all(void)
+static void __init_memblock __memblock_dump_all(void)
{
pr_info("MEMBLOCK configuration:\n");
pr_info(" memory size = %pa reserved size = %pa\n",
@@ -1923,10 +1922,16 @@ void __init_memblock __memblock_dump_all(void)
memblock_dump(&memblock.memory);
memblock_dump(&memblock.reserved);
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
- memblock_dump(&memblock.physmem);
+ memblock_dump(&physmem);
#endif
}
+void __init_memblock memblock_dump_all(void)
+{
+ if (memblock_debug)
+ __memblock_dump_all();
+}
+
void __init memblock_allow_resize(void)
{
memblock_can_resize = 1;
@@ -1940,6 +1945,86 @@ static int __init early_memblock(char *p)
}
early_param("memblock", early_memblock);
+static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct page *start_pg, *end_pg;
+ phys_addr_t pg, pgend;
+
+ /*
+ * Convert start_pfn/end_pfn to a struct page pointer.
+ */
+ start_pg = pfn_to_page(start_pfn - 1) + 1;
+ end_pg = pfn_to_page(end_pfn - 1) + 1;
+
+ /*
+ * Convert to physical addresses, and round start upwards and end
+ * downwards.
+ */
+ pg = PAGE_ALIGN(__pa(start_pg));
+ pgend = __pa(end_pg) & PAGE_MASK;
+
+ /*
+ * If there are free pages between these, free the section of the
+ * memmap array.
+ */
+ if (pg < pgend)
+ memblock_phys_free(pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big. Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap(void)
+{
+ unsigned long start, end, prev_end = 0;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) ||
+ IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
+
+ /*
+ * This relies on each bank being in address order.
+ * The banks are sorted previously in bootmem_init().
+ */
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
+#ifdef CONFIG_SPARSEMEM
+ /*
+ * Take care not to free memmap entries that don't exist
+ * due to SPARSEMEM sections which aren't present.
+ */
+ start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
+#endif
+ /*
+ * Align down here since many operations in VM subsystem
+ * presume that there are no holes in the memory map inside
+ * a pageblock
+ */
+ start = pageblock_start_pfn(start);
+
+ /*
+ * If we had a previous bank, and there is a space
+ * between the current bank and the previous, free it.
+ */
+ if (prev_end && prev_end < start)
+ free_memmap(prev_end, start);
+
+ /*
+ * Align up here since many operations in VM subsystem
+ * presume that there are no holes in the memory map inside
+ * a pageblock
+ */
+ prev_end = pageblock_align(end);
+ }
+
+#ifdef CONFIG_SPARSEMEM
+ if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) {
+ prev_end = pageblock_align(end);
+ free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
+ }
+#endif
+}
+
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
@@ -1971,6 +2056,26 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
return end_pfn - start_pfn;
}
+static void __init memmap_init_reserved_pages(void)
+{
+ struct memblock_region *region;
+ phys_addr_t start, end;
+ u64 i;
+
+ /* initialize struct pages for the reserved regions */
+ for_each_reserved_mem_range(i, &start, &end)
+ reserve_bootmem_region(start, end);
+
+ /* and also treat struct pages for the NOMAP regions as PageReserved */
+ for_each_mem_region(region) {
+ if (memblock_is_nomap(region)) {
+ start = region->base;
+ end = start + region->size;
+ reserve_bootmem_region(start, end);
+ }
+ }
+}
+
static unsigned long __init free_low_memory_core_early(void)
{
unsigned long count = 0;
@@ -1979,8 +2084,7 @@ static unsigned long __init free_low_memory_core_early(void)
memblock_clear_hotplug(0, -1);
- for_each_reserved_mem_region(i, &start, &end)
- reserve_bootmem_region(start, end);
+ memmap_init_reserved_pages();
/*
* We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
@@ -2019,19 +2123,16 @@ void __init reset_all_zones_managed_pages(void)
/**
* memblock_free_all - release free pages to the buddy allocator
- *
- * Return: the number of pages actually released.
*/
-unsigned long __init memblock_free_all(void)
+void __init memblock_free_all(void)
{
unsigned long pages;
+ free_unused_memmap();
reset_all_zones_managed_pages();
pages = free_low_memory_core_early();
totalram_pages_add(pages);
-
- return pages;
}
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
@@ -2063,8 +2164,8 @@ static int __init memblock_init_debugfs(void)
debugfs_create_file("reserved", 0444, root,
&memblock.reserved, &memblock_debug_fops);
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
- debugfs_create_file("physmem", 0444, root,
- &memblock.physmem, &memblock_debug_fops);
+ debugfs_create_file("physmem", 0444, root, &physmem,
+ &memblock_debug_fops);
#endif
return 0;