From 7be62de99adcab4449d416977b4274985c5fe023 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 21 Mar 2012 16:33:52 -0700
Subject: vmscan: kswapd carefully call compaction

With CONFIG_COMPACTION enabled, kswapd does not try to free contiguous
free pages, even when it is woken for a higher order request.

This could be bad for eg.  jumbo frame network allocations, which are done
from interrupt context and cannot compact memory themselves.  Higher than
before allocation failure rates in the network receive path have been
observed in kernels with compaction enabled.

Teach kswapd to defragment the memory zones in a node, but only if
required and compaction is not deferred in a zone.

[akpm@linux-foundation.org: reduce scope of zones_need_compaction]
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux/compaction.h')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index bb2bbdbe5464..7a9323aef4a3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -23,6 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			bool sync);
+extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 
 /* Do not skip compaction more than 64 times */
@@ -62,6 +63,11 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	return COMPACT_CONTINUE;
 }
 
+static inline int compact_pgdat(pg_data_t *pgdat, int order)
+{
+	return COMPACT_CONTINUE;
+}
+
 static inline unsigned long compaction_suitable(struct zone *zone, int order)
 {
 	return COMPACT_SKIPPED;
-- 
cgit v1.2.3-59-g8ed1b


From aff622495c9a0b56148192e53bdec539f5e147f2 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 21 Mar 2012 16:33:52 -0700
Subject: vmscan: only defer compaction for failed order and higher

Currently a failed order-9 (transparent hugepage) compaction can lead to
memory compaction being temporarily disabled for a memory zone.  Even if
we only need compaction for an order 2 allocation, eg.  for jumbo frames
networking.

The fix is relatively straightforward: keep track of the highest order at
which compaction is succeeding, and only defer compaction for orders at
which compaction is failing.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 14 ++++++++++----
 include/linux/mmzone.h     |  1 +
 mm/compaction.c            | 12 +++++++++++-
 mm/page_alloc.c            |  6 ++++--
 mm/vmscan.c                |  2 +-
 5 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux/compaction.h')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 7a9323aef4a3..51a90b7f2d60 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -34,20 +34,26 @@ extern unsigned long compaction_suitable(struct zone *zone, int order);
  * allocation success. 1 << compact_defer_limit compactions are skipped up
  * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
  */
-static inline void defer_compaction(struct zone *zone)
+static inline void defer_compaction(struct zone *zone, int order)
 {
 	zone->compact_considered = 0;
 	zone->compact_defer_shift++;
 
+	if (order < zone->compact_order_failed)
+		zone->compact_order_failed = order;
+
 	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
 		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
 }
 
 /* Returns true if compaction should be skipped this time */
-static inline bool compaction_deferred(struct zone *zone)
+static inline bool compaction_deferred(struct zone *zone, int order)
 {
 	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
 
+	if (order < zone->compact_order_failed)
+		return false;
+
 	/* Avoid possible overflow */
 	if (++zone->compact_considered > defer_limit)
 		zone->compact_considered = defer_limit;
@@ -73,11 +79,11 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order)
 	return COMPACT_SKIPPED;
 }
 
-static inline void defer_compaction(struct zone *zone)
+static inline void defer_compaction(struct zone *zone, int order)
 {
 }
 
-static inline bool compaction_deferred(struct zone *zone)
+static inline bool compaction_deferred(struct zone *zone, int order)
 {
 	return 1;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 650ba2fb3301..dff711509661 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -365,6 +365,7 @@ struct zone {
 	 */
 	unsigned int		compact_considered;
 	unsigned int		compact_defer_shift;
+	int			compact_order_failed;
 #endif
 
 	ZONE_PADDING(_pad1_)
diff --git a/mm/compaction.c b/mm/compaction.c
index 36f0f61f4a24..c4b344a95032 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -695,9 +695,19 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 		INIT_LIST_HEAD(&cc->freepages);
 		INIT_LIST_HEAD(&cc->migratepages);
 
-		if (cc->order < 0 || !compaction_deferred(zone))
+		if (cc->order < 0 || !compaction_deferred(zone, cc->order))
 			compact_zone(zone, cc);
 
+		if (cc->order > 0) {
+			int ok = zone_watermark_ok(zone, cc->order,
+						low_wmark_pages(zone), 0, 0);
+			if (ok && cc->order > zone->compact_order_failed)
+				zone->compact_order_failed = cc->order + 1;
+			/* Currently async compaction is never deferred. */
+			else if (!ok && cc->sync)
+				defer_compaction(zone, cc->order);
+		}
+
 		VM_BUG_ON(!list_empty(&cc->freepages));
 		VM_BUG_ON(!list_empty(&cc->migratepages));
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a13ded1938f0..572b93ea475c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1990,7 +1990,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	if (!order)
 		return NULL;
 
-	if (compaction_deferred(preferred_zone)) {
+	if (compaction_deferred(preferred_zone, order)) {
 		*deferred_compaction = true;
 		return NULL;
 	}
@@ -2012,6 +2012,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		if (page) {
 			preferred_zone->compact_considered = 0;
 			preferred_zone->compact_defer_shift = 0;
+			if (order >= preferred_zone->compact_order_failed)
+				preferred_zone->compact_order_failed = order + 1;
 			count_vm_event(COMPACTSUCCESS);
 			return page;
 		}
@@ -2028,7 +2030,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		 * defer if the failure was a sync compaction failure.
 		 */
 		if (sync_migration)
-			defer_compaction(preferred_zone);
+			defer_compaction(preferred_zone, order);
 
 		cond_resched();
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b2b4c4a0ada2..87e4d6a6dc11 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2198,7 +2198,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	 * If compaction is deferred, reclaim up to a point where
 	 * compaction will have a chance of success when re-enabled
 	 */
-	if (compaction_deferred(zone))
+	if (compaction_deferred(zone, sc->order))
 		return watermark_ok;
 
 	/* If compaction is not ready to start, keep reclaiming */
-- 
cgit v1.2.3-59-g8ed1b