aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
commit099469502f62fbe0d7e4f0b83a2f22538367f734 (patch)
tree5229c3818b2e6e09d35026d49314047121130536 /mm/page_alloc.c
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff)
parentunlzo: fix input buffer free (diff)
downloadlinux-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.tar.xz
linux-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.zip
Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")
Andrew explains: - various misc stuff - Most of the rest of MM: memcg, threaded hugepages, others. - cpumask - kexec - kdump - some direct-io performance tweaking - radix-tree optimisations - new selftests code A note on this: often people will develop a new userspace-visible feature and will develop userspace code to exercise/test that feature. Then they merge the patch and the selftest code dies. Sometimes we paste it into the changelog. Sometimes the code gets thrown into Documentation/(!). This saddens me. So this patch creates a bare-bones framework which will henceforth allow me to ask people to include their test apps in the kernel tree so we can keep them alive. Then when people enhance or fix the feature, I can ask them to update the test app too. The infrastruture is terribly trivial at present - let's see how it evolves. - checkpoint/restart feature work. A note on this: this is a project by various mad Russians to perform c/r mainly from userspace, with various oddball helper code added into the kernel where the need is demonstrated. So rather than some large central lump of code, what we have is little bits and pieces popping up in various places which either expose something new or which permit something which is normally kernel-private to be modified. The overall project is an ongoing thing. I've judged that the size and scope of the thing means that we're more likely to be successful with it if we integrate the support into mainline piecemeal rather than allowing it all to develop out-of-tree. However I'm less confident than the developers that it will all eventually work! So what I'm asking them to do is to wrap each piece of new code inside CONFIG_CHECKPOINT_RESTORE. So if it all eventually comes to tears and the project as a whole fails, it should be a simple matter to go through and delete all trace of it. This lot pretty much wraps up the -rc1 merge for me. * akpm: (96 commits) unlzo: fix input buffer free ramoops: update parameters only after successful init ramoops: fix use of rounddown_pow_of_two() c/r: prctl: add PR_SET_MM codes to set up mm_struct entries c/r: procfs: add start_data, end_data, start_brk members to /proc/$pid/stat v4 c/r: introduce CHECKPOINT_RESTORE symbol selftests: new x86 breakpoints selftest selftests: new very basic kernel selftests directory radix_tree: take radix_tree_path off stack radix_tree: remove radix_tree_indirect_to_ptr() dio: optimize cache misses in the submission path vfs: cache request_queue in struct block_device fs/direct-io.c: calculate fs_count correctly in get_more_blocks() drivers/parport/parport_pc.c: fix warnings panic: don't print redundant backtraces on oops sysctl: add the kernel.ns_last_pid control kdump: add udev events for memory online/offline include/linux/crash_dump.h needs elf.h kdump: fix crash_kexec()/smp_send_stop() race in panic() kdump: crashk_res init check for /sys/kernel/kexec_crash_size ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c55
1 files changed, 41 insertions, 14 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 794e6715c226..0027d8f4a1bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1981,14 +1981,20 @@ static struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
- int migratetype, unsigned long *did_some_progress,
- bool sync_migration)
+ int migratetype, bool sync_migration,
+ bool *deferred_compaction,
+ unsigned long *did_some_progress)
{
struct page *page;
- if (!order || compaction_deferred(preferred_zone))
+ if (!order)
return NULL;
+ if (compaction_deferred(preferred_zone)) {
+ *deferred_compaction = true;
+ return NULL;
+ }
+
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, sync_migration);
@@ -2016,7 +2022,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
* but not enough to satisfy watermarks.
*/
count_vm_event(COMPACTFAIL);
- defer_compaction(preferred_zone);
+
+ /*
+ * As async compaction considers a subset of pageblocks, only
+ * defer if the failure was a sync compaction failure.
+ */
+ if (sync_migration)
+ defer_compaction(preferred_zone);
cond_resched();
}
@@ -2028,8 +2040,9 @@ static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
- int migratetype, unsigned long *did_some_progress,
- bool sync_migration)
+ int migratetype, bool sync_migration,
+ bool *deferred_compaction,
+ unsigned long *did_some_progress)
{
return NULL;
}
@@ -2179,6 +2192,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned long pages_reclaimed = 0;
unsigned long did_some_progress;
bool sync_migration = false;
+ bool deferred_compaction = false;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -2259,12 +2273,22 @@ rebalance:
zonelist, high_zoneidx,
nodemask,
alloc_flags, preferred_zone,
- migratetype, &did_some_progress,
- sync_migration);
+ migratetype, sync_migration,
+ &deferred_compaction,
+ &did_some_progress);
if (page)
goto got_pg;
sync_migration = true;
+ /*
+ * If compaction is deferred for high-order allocations, it is because
+ * sync compaction recently failed. In this is the case and the caller
+ * has requested the system not be heavily disrupted, fail the
+ * allocation now instead of entering direct reclaim
+ */
+ if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+ goto nopage;
+
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order,
zonelist, high_zoneidx,
@@ -2328,8 +2352,9 @@ rebalance:
zonelist, high_zoneidx,
nodemask,
alloc_flags, preferred_zone,
- migratetype, &did_some_progress,
- sync_migration);
+ migratetype, sync_migration,
+ &deferred_compaction,
+ &did_some_progress);
if (page)
goto got_pg;
}
@@ -4237,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
- enum lru_list l;
+ enum lru_list lru;
size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4287,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->zone_pgdat = pgdat;
zone_pcp_init(zone);
- for_each_lru(l)
- INIT_LIST_HEAD(&zone->lru[l].list);
+ for_each_lru(lru)
+ INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
zone->reclaim_stat.recent_rotated[0] = 0;
zone->reclaim_stat.recent_rotated[1] = 0;
zone->reclaim_stat.recent_scanned[0] = 0;
@@ -4642,8 +4667,10 @@ static void check_for_regular_memory(pg_data_t *pgdat)
for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
struct zone *zone = &pgdat->node_zones[zone_type];
- if (zone->present_pages)
+ if (zone->present_pages) {
node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+ break;
+ }
}
#endif
}