From bbefa0fc04bab21e85f6b2ee7984c59694366f6a Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:36 +0800
Subject: mm/compaction: use correct list in move_freelist_{head}/{tail}

Patch series "Fixes and cleanups to compaction", v3.

This is a series to do fix and clean up to compaction.
Patch 1-2 fix and clean up freepage list operation.
Patch 3-4 fix and clean up isolation of freepages
Patch 7 factor code to check if compaction is needed for allocation order.

More details can be found in respective patches.


This patch (of 6):

The freepage is chained with buddy_list in freelist head. Use buddy_list
instead of lru to correct the list operation.

Link: https://lkml.kernel.org/r/20230901155141.249860-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20230901155141.249860-2-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index 38c8d216c6a3..e3ee1bc1c0ad 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1395,8 +1395,8 @@ move_freelist_head(struct list_head *freelist, struct page *freepage)
 {
 	LIST_HEAD(sublist);
 
-	if (!list_is_last(freelist, &freepage->lru)) {
-		list_cut_before(&sublist, freelist, &freepage->lru);
+	if (!list_is_last(freelist, &freepage->buddy_list)) {
+		list_cut_before(&sublist, freelist, &freepage->buddy_list);
 		list_splice_tail(&sublist, freelist);
 	}
 }
@@ -1412,8 +1412,8 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
 {
 	LIST_HEAD(sublist);
 
-	if (!list_is_first(freelist, &freepage->lru)) {
-		list_cut_position(&sublist, freelist, &freepage->lru);
+	if (!list_is_first(freelist, &freepage->buddy_list)) {
+		list_cut_position(&sublist, freelist, &freepage->buddy_list);
 		list_splice_tail(&sublist, freelist);
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4c17989116cb0a6a91f4184077c342a9097b748e Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:37 +0800
Subject: mm/compaction: call list_is_{first}/{last} more intuitively in
 move_freelist_{head}/{tail}

We use move_freelist_head after list_for_each_entry_reverse to skip recent
pages.  And there is no need to do actual move if all freepages are
searched in list_for_each_entry_reverse, e.g.  freepage point to first
page in freelist.  It's more intuitively to call list_is_first with list
entry as the first argument and list head as the second argument to check
if list entry is the first list entry instead of call list_is_last with
list entry and list head passed in reverse.

Similarly, call list_is_last in move_freelist_tail is more intuitively.

Link: https://lkml.kernel.org/r/20230901155141.249860-3-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index e3ee1bc1c0ad..a40550a33aee 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1395,7 +1395,7 @@ move_freelist_head(struct list_head *freelist, struct page *freepage)
 {
 	LIST_HEAD(sublist);
 
-	if (!list_is_last(freelist, &freepage->buddy_list)) {
+	if (!list_is_first(&freepage->buddy_list, freelist)) {
 		list_cut_before(&sublist, freelist, &freepage->buddy_list);
 		list_splice_tail(&sublist, freelist);
 	}
@@ -1412,7 +1412,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
 {
 	LIST_HEAD(sublist);
 
-	if (!list_is_first(freelist, &freepage->buddy_list)) {
+	if (!list_is_last(&freepage->buddy_list, freelist)) {
 		list_cut_position(&sublist, freelist, &freepage->buddy_list);
 		list_splice_tail(&sublist, freelist);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 3da0272a4c7d0d37b47b28e87014f421296fc2be Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:38 +0800
Subject: mm/compaction: correctly return failure with bogus compound_order in
 strict mode

In strict mode, we should return 0 if there is any hole in pageblock.  If
we successfully isolated pages at beginning at pageblock and then have a
bogus compound_order outside pageblock in next page.  We will abort search
loop with blockpfn > end_pfn.  Although we will limit blockpfn to end_pfn,
we will treat it as a successful isolation in strict mode as blockpfn is
not < end_pfn and return partial isolated pages.  Then
isolate_freepages_range may success unexpectly with hole in isolated
range.

Link: https://lkml.kernel.org/r/20230901155141.249860-4-shikemeng@huaweicloud.com
Fixes: 9fcd6d2e052e ("mm, compaction: skip compound pages by order in free scanner")
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index a40550a33aee..9ecbfbc695e5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -626,11 +626,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 		if (PageCompound(page)) {
 			const unsigned int order = compound_order(page);
 
-			if (likely(order <= MAX_ORDER)) {
+			if (blockpfn + (1UL << order) <= end_pfn) {
 				blockpfn += (1UL << order) - 1;
 				page += (1UL << order) - 1;
 				nr_scanned += (1UL << order) - 1;
 			}
+
 			goto isolate_fail;
 		}
 
@@ -678,8 +679,7 @@ isolate_fail:
 		spin_unlock_irqrestore(&cc->zone->lock, flags);
 
 	/*
-	 * There is a tiny chance that we have read bogus compound_order(),
-	 * so be careful to not go outside of the pageblock.
+	 * Be careful to not go outside of the pageblock.
 	 */
 	if (unlikely(blockpfn > end_pfn))
 		blockpfn = end_pfn;
-- 
cgit v1.2.3-59-g8ed1b


From 8df4e28c64188911fba33789bf2cb882b3ae524e Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:39 +0800
Subject: mm/compaction: remove repeat compact_blockskip_flush check in
 reset_isolation_suitable

We have compact_blockskip_flush check in __reset_isolation_suitable, just
remove repeat check before __reset_isolation_suitable in
compact_blockskip_flush.

Link: https://lkml.kernel.org/r/20230901155141.249860-5-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index 9ecbfbc695e5..c377d78e0f15 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -382,6 +382,7 @@ static void __reset_isolation_suitable(struct zone *zone)
 	bool source_set = false;
 	bool free_set = false;
 
+	/* Only flush if a full compaction finished recently */
 	if (!zone->compact_blockskip_flush)
 		return;
 
@@ -434,9 +435,7 @@ void reset_isolation_suitable(pg_data_t *pgdat)
 		if (!populated_zone(zone))
 			continue;
 
-		/* Only flush if a full compaction finished recently */
-		if (zone->compact_blockskip_flush)
-			__reset_isolation_suitable(zone);
+		__reset_isolation_suitable(zone);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9cc17ede5125933ab47f8f359c2cce3aca8ee757 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:40 +0800
Subject: mm/compaction: improve comment of is_via_compact_memory

We do proactive compaction with order == -1 via
1. /proc/sys/vm/compact_memory
2. /sys/devices/system/node/nodex/compact
3. /proc/sys/vm/compaction_proactiveness
Add missed situation in which order == -1.

Link: https://lkml.kernel.org/r/20230901155141.249860-6-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index c377d78e0f15..ff3426a0d9c5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2065,8 +2065,10 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 }
 
 /*
- * order == -1 is expected when compacting via
- * /proc/sys/vm/compact_memory
+ * order == -1 is expected when compacting proactively via
+ * 1. /proc/sys/vm/compact_memory
+ * 2. /sys/devices/system/node/nodex/compact
+ * 3. /proc/sys/vm/compaction_proactiveness
  */
 static inline bool is_via_compact_memory(int order)
 {
-- 
cgit v1.2.3-59-g8ed1b


From e19a3f595ae47bd8c034b98eb0b28a3877413387 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Fri, 1 Sep 2023 23:51:41 +0800
Subject: mm/compaction: factor out code to test if we should run compaction
 for target order

We always do zone_watermark_ok check and compaction_suitable check
together to test if compaction for target order should be ran.  Factor
these code out to remove repeat code.

Link: https://lkml.kernel.org/r/20230901155141.249860-7-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 66 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 27 deletions(-)

(limited to 'mm/compaction.c')

diff --git a/mm/compaction.c b/mm/compaction.c
index ff3426a0d9c5..01ba298739dd 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2378,6 +2378,30 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 	return false;
 }
 
+/*
+ * Should we do compaction for target allocation order.
+ * Return COMPACT_SUCCESS if allocation for target order can be already
+ * satisfied
+ * Return COMPACT_SKIPPED if compaction for target order is likely to fail
+ * Return COMPACT_CONTINUE if compaction for target order should be ran
+ */
+static enum compact_result
+compaction_suit_allocation_order(struct zone *zone, unsigned int order,
+				 int highest_zoneidx, unsigned int alloc_flags)
+{
+	unsigned long watermark;
+
+	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
+	if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
+			      alloc_flags))
+		return COMPACT_SUCCESS;
+
+	if (!compaction_suitable(zone, order, highest_zoneidx))
+		return COMPACT_SKIPPED;
+
+	return COMPACT_CONTINUE;
+}
+
 static enum compact_result
 compact_zone(struct compact_control *cc, struct capture_control *capc)
 {
@@ -2403,19 +2427,11 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 	cc->migratetype = gfp_migratetype(cc->gfp_mask);
 
 	if (!is_via_compact_memory(cc->order)) {
-		unsigned long watermark;
-
-		/* Allocation can already succeed, nothing to do */
-		watermark = wmark_pages(cc->zone,
-					cc->alloc_flags & ALLOC_WMARK_MASK);
-		if (zone_watermark_ok(cc->zone, cc->order, watermark,
-				      cc->highest_zoneidx, cc->alloc_flags))
-			return COMPACT_SUCCESS;
-
-		/* Compaction is likely to fail */
-		if (!compaction_suitable(cc->zone, cc->order,
-					 cc->highest_zoneidx))
-			return COMPACT_SKIPPED;
+		ret = compaction_suit_allocation_order(cc->zone, cc->order,
+						       cc->highest_zoneidx,
+						       cc->alloc_flags);
+		if (ret != COMPACT_CONTINUE)
+			return ret;
 	}
 
 	/*
@@ -2914,6 +2930,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 	int zoneid;
 	struct zone *zone;
 	enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
+	enum compact_result ret;
 
 	for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
 		zone = &pgdat->node_zones[zoneid];
@@ -2921,14 +2938,10 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 		if (!populated_zone(zone))
 			continue;
 
-		/* Allocation can already succeed, check other zones */
-		if (zone_watermark_ok(zone, pgdat->kcompactd_max_order,
-				      min_wmark_pages(zone),
-				      highest_zoneidx, 0))
-			continue;
-
-		if (compaction_suitable(zone, pgdat->kcompactd_max_order,
-					highest_zoneidx))
+		ret = compaction_suit_allocation_order(zone,
+				pgdat->kcompactd_max_order,
+				highest_zoneidx, ALLOC_WMARK_MIN);
+		if (ret == COMPACT_CONTINUE)
 			return true;
 	}
 
@@ -2951,6 +2964,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		.ignore_skip_hint = false,
 		.gfp_mask = GFP_KERNEL,
 	};
+	enum compact_result ret;
+
 	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
 							cc.highest_zoneidx);
 	count_compact_event(KCOMPACTD_WAKE);
@@ -2965,12 +2980,9 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		if (compaction_deferred(zone, cc.order))
 			continue;
 
-		/* Allocation can already succeed, nothing to do */
-		if (zone_watermark_ok(zone, cc.order,
-				      min_wmark_pages(zone), zoneid, 0))
-			continue;
-
-		if (!compaction_suitable(zone, cc.order, zoneid))
+		ret = compaction_suit_allocation_order(zone,
+				cc.order, zoneid, ALLOC_WMARK_MIN);
+		if (ret != COMPACT_CONTINUE)
 			continue;
 
 		if (kthread_should_stop())
-- 
cgit v1.2.3-59-g8ed1b