From bbefa0fc04bab21e85f6b2ee7984c59694366f6a Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:36 +0800 Subject: mm/compaction: use correct list in move_freelist_{head}/{tail} Patch series "Fixes and cleanups to compaction", v3. This is a series to do fix and clean up to compaction. Patch 1-2 fix and clean up freepage list operation. Patch 3-4 fix and clean up isolation of freepages Patch 7 factor code to check if compaction is needed for allocation order. More details can be found in respective patches. This patch (of 6): The freepage is chained with buddy_list in freelist head. Use buddy_list instead of lru to correct the list operation. Link: https://lkml.kernel.org/r/20230901155141.249860-1-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20230901155141.249860-2-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/compaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 38c8d216c6a3..e3ee1bc1c0ad 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1395,8 +1395,8 @@ move_freelist_head(struct list_head *freelist, struct page *freepage) { LIST_HEAD(sublist); - if (!list_is_last(freelist, &freepage->lru)) { - list_cut_before(&sublist, freelist, &freepage->lru); + if (!list_is_last(freelist, &freepage->buddy_list)) { + list_cut_before(&sublist, freelist, &freepage->buddy_list); list_splice_tail(&sublist, freelist); } } @@ -1412,8 +1412,8 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage) { LIST_HEAD(sublist); - if (!list_is_first(freelist, &freepage->lru)) { - list_cut_position(&sublist, freelist, &freepage->lru); + if (!list_is_first(freelist, &freepage->buddy_list)) { + list_cut_position(&sublist, freelist, &freepage->buddy_list); list_splice_tail(&sublist, freelist); } } -- cgit v1.2.3-59-g8ed1b From 4c17989116cb0a6a91f4184077c342a9097b748e Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:37 +0800 Subject: mm/compaction: call list_is_{first}/{last} more intuitively in move_freelist_{head}/{tail} We use move_freelist_head after list_for_each_entry_reverse to skip recent pages. And there is no need to do actual move if all freepages are searched in list_for_each_entry_reverse, e.g. freepage point to first page in freelist. It's more intuitively to call list_is_first with list entry as the first argument and list head as the second argument to check if list entry is the first list entry instead of call list_is_last with list entry and list head passed in reverse. Similarly, call list_is_last in move_freelist_tail is more intuitively. Link: https://lkml.kernel.org/r/20230901155141.249860-3-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/compaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index e3ee1bc1c0ad..a40550a33aee 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1395,7 +1395,7 @@ move_freelist_head(struct list_head *freelist, struct page *freepage) { LIST_HEAD(sublist); - if (!list_is_last(freelist, &freepage->buddy_list)) { + if (!list_is_first(&freepage->buddy_list, freelist)) { list_cut_before(&sublist, freelist, &freepage->buddy_list); list_splice_tail(&sublist, freelist); } @@ -1412,7 +1412,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage) { LIST_HEAD(sublist); - if (!list_is_first(freelist, &freepage->buddy_list)) { + if (!list_is_last(&freepage->buddy_list, freelist)) { list_cut_position(&sublist, freelist, &freepage->buddy_list); list_splice_tail(&sublist, freelist); } -- cgit v1.2.3-59-g8ed1b From 3da0272a4c7d0d37b47b28e87014f421296fc2be Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:38 +0800 Subject: mm/compaction: correctly return failure with bogus compound_order in strict mode In strict mode, we should return 0 if there is any hole in pageblock. If we successfully isolated pages at beginning at pageblock and then have a bogus compound_order outside pageblock in next page. We will abort search loop with blockpfn > end_pfn. Although we will limit blockpfn to end_pfn, we will treat it as a successful isolation in strict mode as blockpfn is not < end_pfn and return partial isolated pages. Then isolate_freepages_range may success unexpectly with hole in isolated range. Link: https://lkml.kernel.org/r/20230901155141.249860-4-shikemeng@huaweicloud.com Fixes: 9fcd6d2e052e ("mm, compaction: skip compound pages by order in free scanner") Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/compaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index a40550a33aee..9ecbfbc695e5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -626,11 +626,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (PageCompound(page)) { const unsigned int order = compound_order(page); - if (likely(order <= MAX_ORDER)) { + if (blockpfn + (1UL << order) <= end_pfn) { blockpfn += (1UL << order) - 1; page += (1UL << order) - 1; nr_scanned += (1UL << order) - 1; } + goto isolate_fail; } @@ -678,8 +679,7 @@ isolate_fail: spin_unlock_irqrestore(&cc->zone->lock, flags); /* - * There is a tiny chance that we have read bogus compound_order(), - * so be careful to not go outside of the pageblock. + * Be careful to not go outside of the pageblock. */ if (unlikely(blockpfn > end_pfn)) blockpfn = end_pfn; -- cgit v1.2.3-59-g8ed1b From 8df4e28c64188911fba33789bf2cb882b3ae524e Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:39 +0800 Subject: mm/compaction: remove repeat compact_blockskip_flush check in reset_isolation_suitable We have compact_blockskip_flush check in __reset_isolation_suitable, just remove repeat check before __reset_isolation_suitable in compact_blockskip_flush. Link: https://lkml.kernel.org/r/20230901155141.249860-5-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/compaction.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 9ecbfbc695e5..c377d78e0f15 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -382,6 +382,7 @@ static void __reset_isolation_suitable(struct zone *zone) bool source_set = false; bool free_set = false; + /* Only flush if a full compaction finished recently */ if (!zone->compact_blockskip_flush) return; @@ -434,9 +435,7 @@ void reset_isolation_suitable(pg_data_t *pgdat) if (!populated_zone(zone)) continue; - /* Only flush if a full compaction finished recently */ - if (zone->compact_blockskip_flush) - __reset_isolation_suitable(zone); + __reset_isolation_suitable(zone); } } -- cgit v1.2.3-59-g8ed1b From 9cc17ede5125933ab47f8f359c2cce3aca8ee757 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:40 +0800 Subject: mm/compaction: improve comment of is_via_compact_memory We do proactive compaction with order == -1 via 1. /proc/sys/vm/compact_memory 2. /sys/devices/system/node/nodex/compact 3. /proc/sys/vm/compaction_proactiveness Add missed situation in which order == -1. Link: https://lkml.kernel.org/r/20230901155141.249860-6-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/compaction.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index c377d78e0f15..ff3426a0d9c5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2065,8 +2065,10 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc) } /* - * order == -1 is expected when compacting via - * /proc/sys/vm/compact_memory + * order == -1 is expected when compacting proactively via + * 1. /proc/sys/vm/compact_memory + * 2. /sys/devices/system/node/nodex/compact + * 3. /proc/sys/vm/compaction_proactiveness */ static inline bool is_via_compact_memory(int order) { -- cgit v1.2.3-59-g8ed1b From e19a3f595ae47bd8c034b98eb0b28a3877413387 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 1 Sep 2023 23:51:41 +0800 Subject: mm/compaction: factor out code to test if we should run compaction for target order We always do zone_watermark_ok check and compaction_suitable check together to test if compaction for target order should be ran. Factor these code out to remove repeat code. Link: https://lkml.kernel.org/r/20230901155141.249860-7-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Baolin Wang Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Signed-off-by: Andrew Morton --- mm/compaction.c | 66 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 27 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index ff3426a0d9c5..01ba298739dd 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2378,6 +2378,30 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, return false; } +/* + * Should we do compaction for target allocation order. + * Return COMPACT_SUCCESS if allocation for target order can be already + * satisfied + * Return COMPACT_SKIPPED if compaction for target order is likely to fail + * Return COMPACT_CONTINUE if compaction for target order should be ran + */ +static enum compact_result +compaction_suit_allocation_order(struct zone *zone, unsigned int order, + int highest_zoneidx, unsigned int alloc_flags) +{ + unsigned long watermark; + + watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); + if (zone_watermark_ok(zone, order, watermark, highest_zoneidx, + alloc_flags)) + return COMPACT_SUCCESS; + + if (!compaction_suitable(zone, order, highest_zoneidx)) + return COMPACT_SKIPPED; + + return COMPACT_CONTINUE; +} + static enum compact_result compact_zone(struct compact_control *cc, struct capture_control *capc) { @@ -2403,19 +2427,11 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) cc->migratetype = gfp_migratetype(cc->gfp_mask); if (!is_via_compact_memory(cc->order)) { - unsigned long watermark; - - /* Allocation can already succeed, nothing to do */ - watermark = wmark_pages(cc->zone, - cc->alloc_flags & ALLOC_WMARK_MASK); - if (zone_watermark_ok(cc->zone, cc->order, watermark, - cc->highest_zoneidx, cc->alloc_flags)) - return COMPACT_SUCCESS; - - /* Compaction is likely to fail */ - if (!compaction_suitable(cc->zone, cc->order, - cc->highest_zoneidx)) - return COMPACT_SKIPPED; + ret = compaction_suit_allocation_order(cc->zone, cc->order, + cc->highest_zoneidx, + cc->alloc_flags); + if (ret != COMPACT_CONTINUE) + return ret; } /* @@ -2914,6 +2930,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) int zoneid; struct zone *zone; enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx; + enum compact_result ret; for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) { zone = &pgdat->node_zones[zoneid]; @@ -2921,14 +2938,10 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) if (!populated_zone(zone)) continue; - /* Allocation can already succeed, check other zones */ - if (zone_watermark_ok(zone, pgdat->kcompactd_max_order, - min_wmark_pages(zone), - highest_zoneidx, 0)) - continue; - - if (compaction_suitable(zone, pgdat->kcompactd_max_order, - highest_zoneidx)) + ret = compaction_suit_allocation_order(zone, + pgdat->kcompactd_max_order, + highest_zoneidx, ALLOC_WMARK_MIN); + if (ret == COMPACT_CONTINUE) return true; } @@ -2951,6 +2964,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) .ignore_skip_hint = false, .gfp_mask = GFP_KERNEL, }; + enum compact_result ret; + trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, cc.highest_zoneidx); count_compact_event(KCOMPACTD_WAKE); @@ -2965,12 +2980,9 @@ static void kcompactd_do_work(pg_data_t *pgdat) if (compaction_deferred(zone, cc.order)) continue; - /* Allocation can already succeed, nothing to do */ - if (zone_watermark_ok(zone, cc.order, - min_wmark_pages(zone), zoneid, 0)) - continue; - - if (!compaction_suitable(zone, cc.order, zoneid)) + ret = compaction_suit_allocation_order(zone, + cc.order, zoneid, ALLOC_WMARK_MIN); + if (ret != COMPACT_CONTINUE) continue; if (kthread_should_stop()) -- cgit v1.2.3-59-g8ed1b