aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorSuren Baghdasaryan <surenb@google.com>2025-02-13 14:46:38 -0800
committerAndrew Morton <akpm@linux-foundation.org>2025-03-16 22:06:17 -0700
commitb2ae5fccb8c0ec2167e6e6c5c5a5eb8d656f70ef (patch)
tree326c76d7c696b0f7d8dccb692d563a18760bd916
parentmm: avoid splitting pmd for lazyfree pmd-mapped THP in try_to_unmap (diff)
downloadwireguard-linux-b2ae5fccb8c0ec2167e6e6c5c5a5eb8d656f70ef.tar.xz
wireguard-linux-b2ae5fccb8c0ec2167e6e6c5c5a5eb8d656f70ef.zip
mm: introduce vma_start_read_locked{_nested} helpers
Patch series "reimplement per-vma lock as a refcount", v10. Back when per-vma locks were introduces, vm_lock was moved out of vm_area_struct in [1] because of the performance regression caused by false cacheline sharing. Recent investigation [2] revealed that the regressions is limited to a rather old Broadwell microarchitecture and even there it can be mitigated by disabling adjacent cacheline prefetching, see [3]. Splitting single logical structure into multiple ones leads to more complicated management, extra pointer dereferences and overall less maintainable code. When that split-away part is a lock, it complicates things even further. With no performance benefits, there are no reasons for this split. Merging the vm_lock back into vm_area_struct also allows vm_area_struct to use SLAB_TYPESAFE_BY_RCU later in this patchset. This patchset: 1. moves vm_lock back into vm_area_struct, aligning it at the cacheline boundary and changing the cache to be cacheline-aligned to minimize cacheline sharing; 2. changes vm_area_struct initialization to mark new vma as detached until it is inserted into vma tree; 3. replaces vm_lock and vma->detached flag with a reference counter; 4. regroups vm_area_struct members to fit them into 3 cachelines; 5. changes vm_area_struct cache to SLAB_TYPESAFE_BY_RCU to allow for their reuse and to minimize call_rcu() calls. Pagefault microbenchmarks show performance improvement: Hmean faults/cpu-1 507926.5547 ( 0.00%) 506519.3692 * -0.28%* Hmean faults/cpu-4 479119.7051 ( 0.00%) 481333.6802 * 0.46%* Hmean faults/cpu-7 452880.2961 ( 0.00%) 455845.6211 * 0.65%* Hmean faults/cpu-12 347639.1021 ( 0.00%) 352004.2254 * 1.26%* Hmean faults/cpu-21 200061.2238 ( 0.00%) 229597.0317 * 14.76%* Hmean faults/cpu-30 145251.2001 ( 0.00%) 164202.5067 * 13.05%* Hmean faults/cpu-48 106848.4434 ( 0.00%) 120641.5504 * 12.91%* Hmean faults/cpu-56 92472.3835 ( 0.00%) 103464.7916 * 11.89%* Hmean faults/sec-1 507566.1468 ( 0.00%) 506139.0811 * -0.28%* Hmean faults/sec-4 1880478.2402 ( 0.00%) 1886795.6329 * 0.34%* Hmean faults/sec-7 3106394.3438 ( 0.00%) 3140550.7485 * 1.10%* Hmean faults/sec-12 4061358.4795 ( 0.00%) 4112477.0206 * 1.26%* Hmean faults/sec-21 3988619.1169 ( 0.00%) 4577747.1436 * 14.77%* Hmean faults/sec-30 3909839.5449 ( 0.00%) 4311052.2787 * 10.26%* Hmean faults/sec-48 4761108.4691 ( 0.00%) 5283790.5026 * 10.98%* Hmean faults/sec-56 4885561.4590 ( 0.00%) 5415839.4045 * 10.85%* This patch (of 18): Introduce helper functions which can be used to read-lock a VMA when holding mmap_lock for read. Replace direct accesses to vma->vm_lock with these new helpers. Link: https://lkml.kernel.org/r/20250213224655.1680278-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-2-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Tested-by: Shivank Garg <shivankg@amd.com> Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: "Paul E . McKenney" <paulmck@kernel.org> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Sourav Panda <souravpanda@google.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Will Deacon <will@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/mm.h24
-rw-r--r--mm/userfaultfd.c22
2 files changed, 29 insertions, 17 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fd1e85b4b48a..6a4914bc1a38 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -735,6 +735,30 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
return true;
}
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+{
+ mmap_assert_locked(vma->vm_mm);
+ down_read_nested(&vma->vm_lock->lock, subclass);
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline void vma_start_read_locked(struct vm_area_struct *vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+ down_read(&vma->vm_lock->lock);
+}
+
static inline void vma_end_read(struct vm_area_struct *vma)
{
rcu_read_lock(); /* keeps vma alive till the end of up_read */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index d06453fa8aba..48ac81bbfee6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -85,16 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
mmap_read_lock(mm);
vma = find_vma_and_prepare_anon(mm, address);
- if (!IS_ERR(vma)) {
- /*
- * We cannot use vma_start_read() as it may fail due to
- * false locked (see comment in vma_start_read()). We
- * can avoid that by directly locking vm_lock under
- * mmap_lock, which guarantees that nobody can lock the
- * vma for write (vma_start_write()) under us.
- */
- down_read(&vma->vm_lock->lock);
- }
+ if (!IS_ERR(vma))
+ vma_start_read_locked(vma);
mmap_read_unlock(mm);
return vma;
@@ -1564,14 +1556,10 @@ static int uffd_move_lock(struct mm_struct *mm,
mmap_read_lock(mm);
err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
if (!err) {
- /*
- * See comment in uffd_lock_vma() as to why not using
- * vma_start_read() here.
- */
- down_read(&(*dst_vmap)->vm_lock->lock);
+ vma_start_read_locked(*dst_vmap);
if (*dst_vmap != *src_vmap)
- down_read_nested(&(*src_vmap)->vm_lock->lock,
- SINGLE_DEPTH_NESTING);
+ vma_start_read_locked_nested(*src_vmap,
+ SINGLE_DEPTH_NESTING);
}
mmap_read_unlock(mm);
return err;