From a4864671ca0bf51c8e78242951741df52c06766f Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 16 Apr 2024 01:18:55 +0800 Subject: lib/xarray: introduce a new helper xas_get_order It can be used after xas_load to check the order of loaded entries. Compared to xa_get_order, it saves an XA_STATE and avoid a rewalk. Added new test for xas_get_order, to make the test work, we have to export xas_get_order with EXPORT_SYMBOL_GPL. Also fix a sparse warning by checking the slot value with xa_entry instead of accessing it directly, as suggested by Matthew Wilcox. [kasong@tencent.com: simplify comment, sparse warning fix, per Matthew Wilcox] Link: https://lkml.kernel.org/r/20240416071722.45997-4-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240415171857.19244-4-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/test_xarray.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'lib/test_xarray.c') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index ebe2af2e072d..0efde8f93490 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1984,6 +1984,39 @@ static noinline void check_get_order(struct xarray *xa) } } +static noinline void check_xas_get_order(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j; + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xas_set_order(&xas, i << order, order); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(i)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + for (j = i << order; j < (i + 1) << order; j++) { + xas_set_order(&xas, j, 0); + rcu_read_lock(); + xas_load(&xas); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + rcu_read_unlock(); + } + + xas_lock(&xas); + xas_set_order(&xas, i << order, order); + xas_store(&xas, NULL); + xas_unlock(&xas); + } + } +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -2035,6 +2068,7 @@ static int xarray_checks(void) check_multi_store(&array); check_multi_store_advanced(&array); check_get_order(&array); + check_xas_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); -- cgit v1.2.3-59-g8ed1b From 6758c1128ceb45d1a35298912b974eb4895b7dd9 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 16 Apr 2024 01:18:56 +0800 Subject: mm/filemap: optimize filemap folio adding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of doing multiple tree walks, do one optimism range check with lock hold, and exit if raced with another insertion. If a shadow exists, check it with a new xas_get_order helper before releasing the lock to avoid redundant tree walks for getting its order. Drop the lock and do the allocation only if a split is needed. In the best case, it only need to walk the tree once. If it needs to alloc and split, 3 walks are issued (One for first ranged conflict check and order retrieving, one for the second check after allocation, one for the insert after split). Testing with 4K pages, in an 8G cgroup, with 16G brd as block device: echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap --rw=randread --time_based \ --ramp_time=30s --runtime=5m --group_reporting Before: bw ( MiB/s): min= 1027, max= 3520, per=100.00%, avg=2445.02, stdev=18.90, samples=8691 iops : min=263001, max=901288, avg=625924.36, stdev=4837.28, samples=8691 After (+7.3%): bw ( MiB/s): min= 493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651 iops : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651 Test result with THP (do a THP randread then switch to 4K page in hope it issues a lot of splitting): echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap -thp=1 --readonly \ --rw=randread --time_based --ramp_time=30s --runtime=10m \ --group_reporting fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap \ --rw=randread --time_based --runtime=5s --group_reporting Before: bw ( KiB/s): min= 4141, max=14202, per=100.00%, avg=7935.51, stdev=96.85, samples=18976 iops : min= 1029, max= 3548, avg=1979.52, stdev=24.23, samples=18976ยท READ: bw=4545B/s (4545B/s), 4545B/s-4545B/s (4545B/s-4545B/s), io=64.0KiB (65.5kB), run=14419-14419msec After (+12.5%): bw ( KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146 iops : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146 READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec The performance is better for both 4K (+7.5%) and THP (+12.5%) cached read. Link: https://lkml.kernel.org/r/20240415171857.19244-5-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/test_xarray.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 56 ++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 100 insertions(+), 15 deletions(-) (limited to 'lib/test_xarray.c') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 0efde8f93490..8732a311f613 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -2017,6 +2017,64 @@ static noinline void check_xas_get_order(struct xarray *xa) } } +static noinline void check_xas_conflict_get_order(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + void *entry; + int only_once; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j, k; + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xas_set_order(&xas, i << order, order); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(i)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + /* + * Ensure xas_get_order works with xas_for_each_conflict. + */ + j = i << order; + for (k = 0; k < order; k++) { + only_once = 0; + xas_set_order(&xas, j + (1 << k), k); + xas_lock(&xas); + xas_for_each_conflict(&xas, entry) { + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + only_once++; + } + XA_BUG_ON(xa, only_once != 1); + xas_unlock(&xas); + } + + if (order < max_order - 1) { + only_once = 0; + xas_set_order(&xas, (i & ~1UL) << order, order + 1); + xas_lock(&xas); + xas_for_each_conflict(&xas, entry) { + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + only_once++; + } + XA_BUG_ON(xa, only_once != 1); + xas_unlock(&xas); + } + + xas_set_order(&xas, i << order, order); + xas_lock(&xas); + xas_store(&xas, NULL); + xas_unlock(&xas); + } + } +} + + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -2069,6 +2127,7 @@ static int xarray_checks(void) check_multi_store_advanced(&array); check_get_order(&array); check_xas_get_order(&array); + check_xas_conflict_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); diff --git a/mm/filemap.c b/mm/filemap.c index 17a66ea544e7..7b0b2229d4ed 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -852,7 +852,9 @@ noinline int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { XA_STATE(xas, &mapping->i_pages, index); - bool huge = folio_test_hugetlb(folio); + void *alloced_shadow = NULL; + int alloced_order = 0; + bool huge; long nr; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); @@ -861,6 +863,7 @@ noinline int __filemap_add_folio(struct address_space *mapping, VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); xas_set_order(&xas, index, folio_order(folio)); + huge = folio_test_hugetlb(folio); nr = folio_nr_pages(folio); gfp &= GFP_RECLAIM_MASK; @@ -868,16 +871,10 @@ noinline int __filemap_add_folio(struct address_space *mapping, folio->mapping = mapping; folio->index = xas.xa_index; - do { - unsigned int order = xa_get_order(xas.xa, xas.xa_index); + for (;;) { + int order = -1, split_order = 0; void *entry, *old = NULL; - if (order > folio_order(folio)) { - xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), - order, gfp); - if (xas_error(&xas)) - goto error; - } xas_lock_irq(&xas); xas_for_each_conflict(&xas, entry) { old = entry; @@ -885,19 +882,33 @@ noinline int __filemap_add_folio(struct address_space *mapping, xas_set_err(&xas, -EEXIST); goto unlock; } + /* + * If a larger entry exists, + * it will be the first and only entry iterated. + */ + if (order == -1) + order = xas_get_order(&xas); + } + + /* entry may have changed before we re-acquire the lock */ + if (alloced_order && (old != alloced_shadow || order != alloced_order)) { + xas_destroy(&xas); + alloced_order = 0; } if (old) { - if (shadowp) - *shadowp = old; - /* entry may have been split before we acquired lock */ - order = xa_get_order(xas.xa, xas.xa_index); - if (order > folio_order(folio)) { + if (order > 0 && order > folio_order(folio)) { /* How to handle large swap entries? */ BUG_ON(shmem_mapping(mapping)); + if (!alloced_order) { + split_order = order; + goto unlock; + } xas_split(&xas, old, order); xas_reset(&xas); } + if (shadowp) + *shadowp = old; } xas_store(&xas, folio); @@ -913,9 +924,24 @@ noinline int __filemap_add_folio(struct address_space *mapping, __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); } + unlock: xas_unlock_irq(&xas); - } while (xas_nomem(&xas, gfp)); + + /* split needed, alloc here and retry. */ + if (split_order) { + xas_split_alloc(&xas, old, split_order, gfp); + if (xas_error(&xas)) + goto error; + alloced_shadow = old; + alloced_order = split_order; + xas_reset(&xas); + continue; + } + + if (!xas_nomem(&xas, gfp)) + break; + } if (xas_error(&xas)) goto error; -- cgit v1.2.3-59-g8ed1b From 2aaba39e783a10fd1368626bce6f618a6a2a78b0 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Tue, 23 Apr 2024 12:22:21 -0700 Subject: lib/test_xarray.c: fix error assumptions on check_xa_multi_store_adv_add() While testing lib/test_xarray in userspace I've noticed we can fail with: make -C tools/testing/radix-tree ./tools/testing/radix-tree/xarray BUG at check_xa_multi_store_adv_add:749 xarray: 0x55905fb21a00x head 0x55905fa1d8e0x flags 0 marks 0 0 0 0: 0x55905fa1d8e0x xarray: ../../../lib/test_xarray.c:749: check_xa_multi_store_adv_add: Assertion `0' failed. Aborted We get a failure with a BUG_ON(), and that is because we actually can fail due to -ENOMEM, the check in xas_nomem() will fix this for us so it makes no sense to expect no failure inside the loop. So modify the check and since this is also useful for instructional purposes clarify the situation. The check for XA_BUG_ON(xa, xa_load(xa, index) != p) is already done at the end of the loop so just remove the bogus on inside the loop. With this we now pass the test in both kernel and userspace: In userspace: ./tools/testing/radix-tree/xarray XArray: 149092856 of 149092856 tests passed In kernel space: XArray: 148257077 of 148257077 tests passed Link: https://lkml.kernel.org/r/20240423192221.301095-3-mcgrof@kernel.org Fixes: a60cc288a1a2 ("test_xarray: add tests for advanced multi-index use") Signed-off-by: Luis Chamberlain Cc: Daniel Gomez Cc: Darrick J. Wong Cc: Dave Chinner Cc: "Liam R. Howlett" Cc: Matthew Wilcox (Oracle) Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- lib/test_xarray.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib/test_xarray.c') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index ebe2af2e072d..5ab35190aae3 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -744,15 +744,20 @@ static noinline void check_xa_multi_store_adv_add(struct xarray *xa, do { xas_lock_irq(&xas); - xas_store(&xas, p); - XA_BUG_ON(xa, xas_error(&xas)); - XA_BUG_ON(xa, xa_load(xa, index) != p); - xas_unlock_irq(&xas); + /* + * In our selftest case the only failure we can expect is for + * there not to be enough memory as we're not mimicking the + * entire page cache, so verify that's the only error we can run + * into here. The xas_nomem() which follows will ensure to fix + * that condition for us so to chug on on the loop. + */ + XA_BUG_ON(xa, xas_error(&xas) && xas_error(&xas) != -ENOMEM); } while (xas_nomem(&xas, GFP_KERNEL)); XA_BUG_ON(xa, xas_error(&xas)); + XA_BUG_ON(xa, xa_load(xa, index) != p); } /* mimics page_cache_delete() */ -- cgit v1.2.3-59-g8ed1b From 2a0774c2886d25f4d2987cd3e3813d16bf96f34f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 May 2024 16:31:18 +0100 Subject: XArray: set the marks correctly when splitting an entry If we created a new node to replace an entry which had search marks set, we were setting the search mark on every entry in that node. That works fine when we're splitting to order 0, but when splitting to a larger order, we must not set the search marks on the sibling entries. Link: https://lkml.kernel.org/r/20240501153120.4094530-1-willy@infradead.org Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Luis Chamberlain Link: https://lore.kernel.org/r/ZjFGCOYk3FK_zVy3@bombadil.infradead.org Tested-by: Luis Chamberlain Cc: Zi Yan Signed-off-by: Andrew Morton --- lib/test_xarray.c | 14 +++++++++++++- lib/xarray.c | 23 +++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'lib/test_xarray.c') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 5ab35190aae3..928fc20337e6 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1788,9 +1788,11 @@ static void check_split_1(struct xarray *xa, unsigned long index, unsigned int order, unsigned int new_order) { XA_STATE_ORDER(xas, xa, index, new_order); - unsigned int i; + unsigned int i, found; + void *entry; xa_store_order(xa, index, order, xa, GFP_KERNEL); + xa_set_mark(xa, index, XA_MARK_1); xas_split_alloc(&xas, xa, order, GFP_KERNEL); xas_lock(&xas); @@ -1807,6 +1809,16 @@ static void check_split_1(struct xarray *xa, unsigned long index, xa_set_mark(xa, index, XA_MARK_0); XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + xas_set_order(&xas, index, 0); + found = 0; + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_1) { + found++; + XA_BUG_ON(xa, xa_is_internal(entry)); + } + rcu_read_unlock(); + XA_BUG_ON(xa, found != 1 << (order - new_order)); + xa_destroy(xa); } diff --git a/lib/xarray.c b/lib/xarray.c index 39f07bfc4dcc..5e7d6334d70d 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -969,8 +969,22 @@ static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) return marks; } +static inline void node_mark_slots(struct xa_node *node, unsigned int sibs, + xa_mark_t mark) +{ + int i; + + if (sibs == 0) + node_mark_all(node, mark); + else { + for (i = 0; i < XA_CHUNK_SIZE; i += sibs + 1) + node_set_mark(node, i, mark); + } +} + static void node_set_marks(struct xa_node *node, unsigned int offset, - struct xa_node *child, unsigned int marks) + struct xa_node *child, unsigned int sibs, + unsigned int marks) { xa_mark_t mark = XA_MARK_0; @@ -978,7 +992,7 @@ static void node_set_marks(struct xa_node *node, unsigned int offset, if (marks & (1 << (__force unsigned int)mark)) { node_set_mark(node, offset, mark); if (child) - node_mark_all(child, mark); + node_mark_slots(child, sibs, mark); } if (mark == XA_MARK_MAX) break; @@ -1077,7 +1091,8 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) child->nr_values = xa_is_value(entry) ? XA_CHUNK_SIZE : 0; RCU_INIT_POINTER(child->parent, node); - node_set_marks(node, offset, child, marks); + node_set_marks(node, offset, child, xas->xa_sibs, + marks); rcu_assign_pointer(node->slots[offset], xa_mk_node(child)); if (xa_is_value(curr)) @@ -1086,7 +1101,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) } else { unsigned int canon = offset - xas->xa_sibs; - node_set_marks(node, canon, NULL, marks); + node_set_marks(node, canon, NULL, 0, marks); rcu_assign_pointer(node->slots[canon], entry); while (offset > canon) rcu_assign_pointer(node->slots[offset--], -- cgit v1.2.3-59-g8ed1b