aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c318
1 files changed, 186 insertions, 132 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fb975cec3518..ca423cc20b59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -607,6 +607,9 @@ static bool need_debug_guardpage(void)
if (!debug_pagealloc_enabled())
return false;
+ if (!debug_guardpage_minorder())
+ return false;
+
return true;
}
@@ -615,6 +618,9 @@ static void init_debug_guardpage(void)
if (!debug_pagealloc_enabled())
return;
+ if (!debug_guardpage_minorder())
+ return;
+
_debug_guardpage_enabled = true;
}
@@ -635,19 +641,22 @@ static int __init debug_guardpage_minorder_setup(char *buf)
pr_info("Setting debug_guardpage_minorder to %lu\n", res);
return 0;
}
-__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
-static inline void set_page_guard(struct zone *zone, struct page *page,
+static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
struct page_ext *page_ext;
if (!debug_guardpage_enabled())
- return;
+ return false;
+
+ if (order >= debug_guardpage_minorder())
+ return false;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
- return;
+ return false;
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
@@ -655,6 +664,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
set_page_private(page, order);
/* Guard pages are not available for any usage */
__mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+ return true;
}
static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -676,9 +687,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
#else
-struct page_ext_operations debug_guardpage_ops = { NULL, };
-static inline void set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) {}
+struct page_ext_operations debug_guardpage_ops;
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype) {}
#endif
@@ -1008,10 +1019,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
}
if (PageMappingFlags(page))
page->mapping = NULL;
- if (memcg_kmem_enabled() && PageKmemcg(page)) {
+ if (memcg_kmem_enabled() && PageKmemcg(page))
memcg_kmem_uncharge(page, order);
- __ClearPageKmemcg(page);
- }
if (check_free)
bad += free_pages_check(page);
if (bad)
@@ -1395,15 +1404,18 @@ static void __init deferred_free_range(struct page *page,
return;
/* Free a large naturally-aligned chunk if possible */
- if (nr_pages == MAX_ORDER_NR_PAGES &&
- (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
+ if (nr_pages == pageblock_nr_pages &&
+ (pfn & (pageblock_nr_pages - 1)) == 0) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- __free_pages_boot_core(page, MAX_ORDER-1);
+ __free_pages_boot_core(page, pageblock_order);
return;
}
- for (i = 0; i < nr_pages; i++, page++)
+ for (i = 0; i < nr_pages; i++, page++, pfn++) {
+ if ((pfn & (pageblock_nr_pages - 1)) == 0)
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_boot_core(page, 0);
+ }
}
/* Completion tracking for deferred_init_memmap() threads */
@@ -1471,9 +1483,9 @@ static int __init deferred_init_memmap(void *data)
/*
* Ensure pfn_valid is checked every
- * MAX_ORDER_NR_PAGES for memory holes
+ * pageblock_nr_pages for memory holes
*/
- if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
+ if ((pfn & (pageblock_nr_pages - 1)) == 0) {
if (!pfn_valid(pfn)) {
page = NULL;
goto free_range;
@@ -1486,7 +1498,7 @@ static int __init deferred_init_memmap(void *data)
}
/* Minimise pfn page lookups and scheduler checks */
- if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) {
+ if (page && (pfn & (pageblock_nr_pages - 1)) != 0) {
page++;
} else {
nr_pages += nr_to_free;
@@ -1522,6 +1534,9 @@ free_range:
free_base_page = NULL;
free_base_pfn = nr_to_free = 0;
}
+ /* Free the last block of pages to allocator */
+ nr_pages += nr_to_free;
+ deferred_free_range(free_base_page, free_base_pfn, nr_to_free);
first_init_pfn = max(end_pfn, first_init_pfn);
}
@@ -1618,18 +1633,15 @@ static inline void expand(struct zone *zone, struct page *page,
size >>= 1;
VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
- if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
- debug_guardpage_enabled() &&
- high < debug_guardpage_minorder()) {
- /*
- * Mark as guard pages (or page), that will allow to
- * merge back to allocator when buddy will be freed.
- * Corresponding page table entries will not be touched,
- * pages will stay not present in virtual address space
- */
- set_page_guard(zone, &page[size], high, migratetype);
+ /*
+ * Mark as guard pages (or page), that will allow to
+ * merge back to allocator when buddy will be freed.
+ * Corresponding page table entries will not be touched,
+ * pages will stay not present in virtual address space
+ */
+ if (set_page_guard(zone, &page[size], high, migratetype))
continue;
- }
+
list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
@@ -2491,9 +2503,14 @@ int __isolate_free_page(struct page *page, unsigned int order)
mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
- /* Obey watermarks as if the page was being allocated */
- watermark = low_wmark_pages(zone) + (1 << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+ /*
+ * Obey watermarks as if the page was being allocated. We can
+ * emulate a high-order watermark check with a raised order-0
+ * watermark, because we already know our high-order page
+ * exists.
+ */
+ watermark = min_wmark_pages(zone) + (1UL << order);
+ if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -2962,9 +2979,11 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
-void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
+void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
+ struct va_format vaf;
+ va_list args;
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
debug_guardpage_minorder() > 0)
@@ -2982,22 +3001,16 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
- if (fmt) {
- struct va_format vaf;
- va_list args;
+ pr_warn("%s: ", current->comm);
- va_start(args, fmt);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_cont("%pV", &vaf);
+ va_end(args);
- vaf.fmt = fmt;
- vaf.va = &args;
+ pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
- pr_warn("%pV", &vaf);
-
- va_end(args);
- }
-
- pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
- current->comm, order, gfp_mask, &gfp_mask);
dump_stack();
if (!should_suppress_show_mem())
show_mem(filter);
@@ -3143,25 +3156,24 @@ static inline bool
should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
enum compact_result compact_result,
enum compact_priority *compact_priority,
- int compaction_retries)
+ int *compaction_retries)
{
int max_retries = MAX_COMPACT_RETRIES;
+ int min_priority;
if (!order)
return false;
+ if (compaction_made_progress(compact_result))
+ (*compaction_retries)++;
+
/*
* compaction considers all the zone as desperately out of memory
* so it doesn't really make much sense to retry except when the
* failure could be caused by insufficient priority
*/
- if (compaction_failed(compact_result)) {
- if (*compact_priority > MIN_COMPACT_PRIORITY) {
- (*compact_priority)--;
- return true;
- }
- return false;
- }
+ if (compaction_failed(compact_result))
+ goto check_priority;
/*
* make sure the compaction wasn't deferred or didn't bail out early
@@ -3182,9 +3194,21 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
*/
if (order > PAGE_ALLOC_COSTLY_ORDER)
max_retries /= 4;
- if (compaction_retries <= max_retries)
+ if (*compaction_retries <= max_retries)
return true;
+ /*
+ * Make sure there are attempts at the highest priority if we exhausted
+ * all retries or failed at the lower priorities.
+ */
+check_priority:
+ min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+ MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+ if (*compact_priority > min_priority) {
+ (*compact_priority)--;
+ *compaction_retries = 0;
+ return true;
+ }
return false;
}
#else
@@ -3201,7 +3225,7 @@ static inline bool
should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
enum compact_result compact_result,
enum compact_priority *compact_priority,
- int compaction_retries)
+ int *compaction_retries)
{
struct zone *zone;
struct zoneref *z;
@@ -3374,16 +3398,26 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
static inline bool
should_reclaim_retry(gfp_t gfp_mask, unsigned order,
struct alloc_context *ac, int alloc_flags,
- bool did_some_progress, int no_progress_loops)
+ bool did_some_progress, int *no_progress_loops)
{
struct zone *zone;
struct zoneref *z;
/*
+ * Costly allocations might have made a progress but this doesn't mean
+ * their order will become available due to high fragmentation so
+ * always increment the no progress counter for them
+ */
+ if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
+ *no_progress_loops = 0;
+ else
+ (*no_progress_loops)++;
+
+ /*
* Make sure we converge to OOM if we cannot make any progress
* several times in the row.
*/
- if (no_progress_loops > MAX_RECLAIM_RETRIES)
+ if (*no_progress_loops > MAX_RECLAIM_RETRIES)
return false;
/*
@@ -3398,7 +3432,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
unsigned long reclaimable;
available = reclaimable = zone_reclaimable_pages(zone);
- available -= DIV_ROUND_UP(no_progress_loops * available,
+ available -= DIV_ROUND_UP((*no_progress_loops) * available,
MAX_RECLAIM_RETRIES);
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
@@ -3459,6 +3493,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
enum compact_result compact_result;
int compaction_retries = 0;
int no_progress_loops = 0;
+ unsigned long alloc_start = jiffies;
+ unsigned int stall_timeout = 10 * HZ;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -3603,9 +3639,6 @@ retry:
if (page)
goto got_pg;
- if (order && compaction_made_progress(compact_result))
- compaction_retries++;
-
/* Do not loop if specifically requested */
if (gfp_mask & __GFP_NORETRY)
goto nopage;
@@ -3617,18 +3650,16 @@ retry:
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
goto nopage;
- /*
- * Costly allocations might have made a progress but this doesn't mean
- * their order will become available due to high fragmentation so
- * always increment the no progress counter for them
- */
- if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
- no_progress_loops = 0;
- else
- no_progress_loops++;
+ /* Make sure we know about allocations which stall for too long */
+ if (time_after(jiffies, alloc_start + stall_timeout)) {
+ warn_alloc(gfp_mask,
+ "page alloction stalls for %ums, order:%u\n",
+ jiffies_to_msecs(jiffies-alloc_start), order);
+ stall_timeout += 10 * HZ;
+ }
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
- did_some_progress > 0, no_progress_loops))
+ did_some_progress > 0, &no_progress_loops))
goto retry;
/*
@@ -3640,7 +3671,7 @@ retry:
if (did_some_progress > 0 &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
- compaction_retries))
+ &compaction_retries))
goto retry;
/* Reclaim has failed us, start killing things */
@@ -3655,7 +3686,8 @@ retry:
}
nopage:
- warn_alloc_failed(gfp_mask, order, NULL);
+ warn_alloc(gfp_mask,
+ "page allocation failure: order:%u", order);
got_pg:
return page;
}
@@ -3756,12 +3788,10 @@ no_zone:
}
out:
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
- if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
- __free_pages(page, order);
- page = NULL;
- } else
- __SetPageKmemcg(page);
+ if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
+ unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
+ __free_pages(page, order);
+ page = NULL;
}
if (kmemcheck_enabled && page)
@@ -4064,7 +4094,7 @@ long si_mem_available(void)
int lru;
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
for_each_zone(zone)
wmark_low += zone->watermark[WMARK_LOW];
@@ -4411,7 +4441,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
do {
zone_type--;
zone = pgdat->node_zones + zone_type;
- if (populated_zone(zone)) {
+ if (managed_zone(zone)) {
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);
check_highest_zone(zone_type);
@@ -4606,7 +4636,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
int j;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
;
j = build_zonelists_node(NODE_DATA(node), zonelist, j);
@@ -4622,7 +4652,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
int j;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[1];
+ zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK];
j = build_zonelists_node(pgdat, zonelist, 0);
zonelist->_zonerefs[j].zone = NULL;
zonelist->_zonerefs[j].zone_idx = 0;
@@ -4643,13 +4673,13 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
struct zone *z;
struct zonelist *zonelist;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
pos = 0;
for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
for (j = 0; j < nr_nodes; j++) {
node = node_order[j];
z = &NODE_DATA(node)->node_zones[zone_type];
- if (populated_zone(z)) {
+ if (managed_zone(z)) {
zoneref_set_zone(z,
&zonelist->_zonerefs[pos++]);
check_highest_zone(zone_type);
@@ -4761,6 +4791,8 @@ int local_memory_node(int node)
}
#endif
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
#else /* CONFIG_NUMA */
static void set_zonelist_order(void)
@@ -4776,7 +4808,7 @@ static void build_zonelists(pg_data_t *pgdat)
local_node = pgdat->node_id;
- zonelist = &pgdat->node_zonelists[0];
+ zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
j = build_zonelists_node(pgdat, zonelist, 0);
/*
@@ -5049,15 +5081,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
- * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
- * from zone_movable_pfn[nid] to end of each node should be
- * ZONE_MOVABLE not ZONE_NORMAL. skip it.
- */
- if (!mirrored_kernelcore && zone_movable_pfn[nid])
- if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
- continue;
-
- /*
* Check given memblock attribute by firmware which can affect
* kernel memory layout. If zone==ZONE_MOVABLE but memory is
* mirrored, it's an overlapped memmap init. skip it.
@@ -5500,6 +5523,12 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
*zone_end_pfn = min(node_end_pfn,
arch_zone_highest_possible_pfn[movable_zone]);
+ /* Adjust for ZONE_MOVABLE starting within this range */
+ } else if (!mirrored_kernelcore &&
+ *zone_start_pfn < zone_movable_pfn[nid] &&
+ *zone_end_pfn > zone_movable_pfn[nid]) {
+ *zone_end_pfn = zone_movable_pfn[nid];
+
/* Check if this whole range is within ZONE_MOVABLE */
} else if (*zone_start_pfn >= zone_movable_pfn[nid])
*zone_start_pfn = *zone_end_pfn;
@@ -5603,28 +5632,23 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
* Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
* and vice versa.
*/
- if (zone_movable_pfn[nid]) {
- if (mirrored_kernelcore) {
- unsigned long start_pfn, end_pfn;
- struct memblock_region *r;
-
- for_each_memblock(memory, r) {
- start_pfn = clamp(memblock_region_memory_base_pfn(r),
- zone_start_pfn, zone_end_pfn);
- end_pfn = clamp(memblock_region_memory_end_pfn(r),
- zone_start_pfn, zone_end_pfn);
-
- if (zone_type == ZONE_MOVABLE &&
- memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
-
- if (zone_type == ZONE_NORMAL &&
- !memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
- }
- } else {
- if (zone_type == ZONE_NORMAL)
- nr_absent += node_end_pfn - zone_movable_pfn[nid];
+ if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+ unsigned long start_pfn, end_pfn;
+ struct memblock_region *r;
+
+ for_each_memblock(memory, r) {
+ start_pfn = clamp(memblock_region_memory_base_pfn(r),
+ zone_start_pfn, zone_end_pfn);
+ end_pfn = clamp(memblock_region_memory_end_pfn(r),
+ zone_start_pfn, zone_end_pfn);
+
+ if (zone_type == ZONE_MOVABLE &&
+ memblock_is_mirror(r))
+ nr_absent += end_pfn - start_pfn;
+
+ if (zone_type == ZONE_NORMAL &&
+ !memblock_is_mirror(r))
+ nr_absent += end_pfn - start_pfn;
}
}
@@ -5882,9 +5906,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
- pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
- / 100;
- pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
zone->zone_pgdat = pgdat;
@@ -6805,6 +6826,12 @@ int __meminit init_per_zone_wmark_min(void)
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+ setup_min_unmapped_ratio();
+ setup_min_slab_ratio();
+#endif
+
return 0;
}
core_initcall(init_per_zone_wmark_min)
@@ -6846,43 +6873,58 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
}
#ifdef CONFIG_NUMA
+static void setup_min_unmapped_ratio(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+
+ for_each_online_pgdat(pgdat)
+ pgdat->min_unmapped_pages = 0;
+
+ for_each_zone(zone)
+ zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
+ sysctl_min_unmapped_ratio) / 100;
+}
+
+
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- struct pglist_data *pgdat;
- struct zone *zone;
int rc;
rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
+ setup_min_unmapped_ratio();
+
+ return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+
for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;
for_each_zone(zone)
- zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
- sysctl_min_unmapped_ratio) / 100;
- return 0;
+ zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
+ sysctl_min_slab_ratio) / 100;
}
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- struct pglist_data *pgdat;
- struct zone *zone;
int rc;
rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
- for_each_online_pgdat(pgdat)
- pgdat->min_slab_pages = 0;
+ setup_min_slab_ratio();
- for_each_zone(zone)
- zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
- sysctl_min_slab_ratio) / 100;
return 0;
}
#endif
@@ -6960,6 +7002,17 @@ static int __init set_hashdist(char *str)
__setup("hashdist=", set_hashdist);
#endif
+#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+/*
+ * Returns the number of pages that arch has reserved but
+ * is not known to alloc_large_system_hash().
+ */
+static unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return 0;
+}
+#endif
+
/*
* allocate a large system hash table from bootmem
* - it is assumed that the hash table must contain an exact power-of-2
@@ -6984,6 +7037,7 @@ void *__init alloc_large_system_hash(const char *tablename,
if (!numentries) {
/* round applicable memory size up to nearest megabyte */
numentries = nr_kernel_pages;
+ numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
if (PAGE_SHIFT < 20)