From f238b8c33c6738f146bbfbb09b78870ea157c2b7 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 23 Mar 2024 00:41:36 +1300 Subject: arm64: mm: swap: support THP_SWAP on hardware with MTE Commit d0637c505f8a1 ("arm64: enable THP_SWAP for arm64") brings up THP_SWAP on ARM64, but it doesn't enable THP_SWP on hardware with MTE as the MTE code works with the assumption tags save/restore is always handling a folio with only one page. The limitation should be removed as more and more ARM64 SoCs have this feature. Co-existence of MTE and THP_SWAP becomes more and more important. This patch makes MTE tags saving support large folios, then we don't need to split large folios into base pages for swapping out on ARM64 SoCs with MTE any more. arch_prepare_to_swap() should take folio rather than page as parameter because we support THP swap-out as a whole. It saves tags for all pages in a large folio. As now we are restoring tags based-on folio, in arch_swap_restore(), we may increase some extra loops and early-exitings while refaulting a large folio which is still in swapcache in do_swap_page(). In case a large folio has nr pages, do_swap_page() will only set the PTE of the particular page which is causing the page fault. Thus do_swap_page() runs nr times, and each time, arch_swap_restore() will loop nr times for those subpages in the folio. So right now the algorithmic complexity becomes O(nr^2). Once we support mapping large folios in do_swap_page(), extra loops and early-exitings will decrease while not being completely removed as a large folio might get partially tagged in corner cases such as, 1. a large folio in swapcache can be partially unmapped, thus, MTE tags for the unmapped pages will be invalidated; 2. users might use mprotect() to set MTEs on a part of a large folio. arch_thp_swp_supported() is dropped since ARM64 MTE was the only one who needed it. Link: https://lkml.kernel.org/r/20240322114136.61386-2-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Steven Price Acked-by: Chris Li Reviewed-by: Ryan Roberts Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: David Hildenbrand Cc: Kemeng Shi Cc: "Matthew Wilcox (Oracle)" Cc: Anshuman Khandual Cc: Peter Collingbourne Cc: Yosry Ahmed Cc: Peter Xu Cc: Lorenzo Stoakes Cc: "Mike Rapoport (IBM)" Cc: Hugh Dickins Cc: "Aneesh Kumar K.V" Cc: Rick Edgecombe Signed-off-by: Andrew Morton --- mm/page_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_io.c') diff --git a/mm/page_io.c b/mm/page_io.c index ae2b49055e43..a9a7c236aecc 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -189,7 +189,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) * Arch code may have to preserve more data than just the page * contents, e.g. memory tags. */ - ret = arch_prepare_to_swap(&folio->page); + ret = arch_prepare_to_swap(folio); if (ret) { folio_mark_dirty(folio); folio_unlock(folio); -- cgit v1.2.3-59-g8ed1b From d0f048ac39f6a71566d3f49a5922dfd7fa0d585b Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 12 Apr 2024 23:48:56 +1200 Subject: mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters This helps to display the fragmentation situation of the swapfile, knowing the proportion of how much we haven't split large folios. So far, we only support non-split swapout for anon memory, with the possibility of expanding to shmem in the future. So, we add the "anon" prefix to the counter names. Link: https://lkml.kernel.org/r/20240412114858.407208-3-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Ryan Roberts Acked-by: David Hildenbrand Cc: Chris Li Cc: Domenico Cerasuolo Cc: Kairui Song Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 ++ mm/huge_memory.c | 4 ++++ mm/page_io.c | 1 + mm/vmscan.c | 3 +++ 4 files changed, 10 insertions(+) (limited to 'mm/page_io.c') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d4fdb2641070..7cd07b83a3d0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -268,6 +268,8 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ANON_SWPOUT, + MTHP_STAT_ANON_SWPOUT_FALLBACK, __MTHP_STAT_COUNT }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 01f01d6a50a1..264e09043f09 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -555,11 +555,15 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); +DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT); +DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK); static struct attribute *stats_attrs[] = { &anon_fault_alloc_attr.attr, &anon_fault_fallback_attr.attr, &anon_fault_fallback_charge_attr.attr, + &anon_swpout_attr.attr, + &anon_swpout_fallback_attr.attr, NULL, }; diff --git a/mm/page_io.c b/mm/page_io.c index a9a7c236aecc..46c603dddf04 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -217,6 +217,7 @@ static inline void count_swpout_vm_event(struct folio *folio) count_memcg_folio_events(folio, THP_SWPOUT, 1); count_vm_event(THP_SWPOUT); } + count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_SWPOUT); #endif count_vm_events(PSWPOUT, folio_nr_pages(folio)); } diff --git a/mm/vmscan.c b/mm/vmscan.c index da93213af981..d194e7240df4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1214,6 +1214,8 @@ retry: goto activate_locked; } if (!add_to_swap(folio)) { + int __maybe_unused order = folio_order(folio); + if (!folio_test_large(folio)) goto activate_locked_split; /* Fallback to swap normal pages */ @@ -1225,6 +1227,7 @@ retry: THP_SWPOUT_FALLBACK, 1); count_vm_event(THP_SWPOUT_FALLBACK); } + count_mthp_stat(order, MTHP_STAT_ANON_SWPOUT_FALLBACK); #endif if (!add_to_swap(folio)) goto activate_locked_split; -- cgit v1.2.3-59-g8ed1b