aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-08-06 04:41:21 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-08-06 04:41:21 +0300
commitadf12a394c8eb4b857b8f70cc6594a9ab25e3fc6 (patch)
tree28f42ac3595174de2897928fc1771844bcc6a94f
parentnet: usbnet: Fix the wrong netif_carrier_on() call (diff)
parentselftests/perf_events: Add a mmap() correctness test (diff)
downloadwireguard-linux-adf12a394c8eb4b857b8f70cc6594a9ab25e3fc6.tar.xz
wireguard-linux-adf12a394c8eb4b857b8f70cc6594a9ab25e3fc6.zip
Merge tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
Pull perf fixes from Thomas Gleixner: "Perf fixes for perf_mmap() reference counting to prevent potential reference count leaks which are caused by: - VMA splits, which change the offset or size of a mapping, which causes perf_mmap_close() to ignore the unmap or unmap the wrong buffer. - Several internal issues of perf_mmap(), which can cause reference count leaks in the perf mmap, corrupt accounting or cause leaks in perf drivers. The main fix is to prevent VMA splits by implementing the [may_]split() callback for vm operations. The other issues are addressed by rearranging code, early returns on failure and invocation of cleanups. Also provide a selftest to validate the fixes. The reference counting should be converted to refcount_t, but that requires larger refactoring of the code and will be done once these fixes are upstream" * tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git: selftests/perf_events: Add a mmap() correctness test perf/core: Prevent VMA split of buffer mappings perf/core: Handle buffer mapping fail correctly in perf_mmap() perf/core: Exit early on perf_mmap() fail perf/core: Don't leak AUX buffer refcount on allocation failure perf/core: Preserve AUX buffer allocation failure result
-rw-r--r--kernel/events/core.c36
-rw-r--r--tools/testing/selftests/perf_events/.gitignore1
-rw-r--r--tools/testing/selftests/perf_events/Makefile2
-rw-r--r--tools/testing/selftests/perf_events/mmap.c236
4 files changed, 266 insertions, 9 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 22fdf0c187cd..8060c2857bb2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS;
}
+static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr)
+{
+ /*
+ * Forbid splitting perf mappings to prevent refcount leaks due to
+ * the resulting non-matching offsets and sizes. See open()/close().
+ */
+ return -EINVAL;
+}
+
static const struct vm_operations_struct perf_mmap_vmops = {
.open = perf_mmap_open,
.close = perf_mmap_close, /* non mergeable */
.pfn_mkwrite = perf_mmap_pfn_mkwrite,
+ .may_split = perf_mmap_may_split,
};
static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
@@ -7051,8 +7061,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0;
goto unlock;
}
-
- atomic_set(&rb->aux_mmap_count, 1);
}
user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
@@ -7115,15 +7123,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
perf_event_update_time(event);
perf_event_init_userpage(event);
perf_event_update_userpage(event);
+ ret = 0;
} else {
ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
event->attr.aux_watermark, flags);
- if (!ret)
+ if (!ret) {
+ atomic_set(&rb->aux_mmap_count, 1);
rb->aux_mmap_locked = extra;
+ }
}
- ret = 0;
-
unlock:
if (!ret) {
atomic_long_add(user_extra, &user->locked_vm);
@@ -7131,6 +7140,7 @@ unlock:
atomic_inc(&event->mmap_count);
} else if (rb) {
+ /* AUX allocation failed */
atomic_dec(&rb->mmap_count);
}
aux_unlock:
@@ -7138,6 +7148,9 @@ aux_unlock:
mutex_unlock(aux_mutex);
mutex_unlock(&event->mmap_mutex);
+ if (ret)
+ return ret;
+
/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
@@ -7145,13 +7158,20 @@ aux_unlock:
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &perf_mmap_vmops;
- if (!ret)
- ret = map_range(rb, vma);
-
mapped = get_mapped(event, event_mapped);
if (mapped)
mapped(event, vma->vm_mm);
+ /*
+ * Try to map it into the page table. On fail, invoke
+ * perf_mmap_close() to undo the above, as the callsite expects
+ * full cleanup in this case and therefore does not invoke
+ * vmops::close().
+ */
+ ret = map_range(rb, vma);
+ if (ret)
+ perf_mmap_close(vma);
+
return ret;
}
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
index ee93dc4969b8..4931b3b6bbd3 100644
--- a/tools/testing/selftests/perf_events/.gitignore
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -2,3 +2,4 @@
sigtrap_threads
remove_on_exec
watermark_signal
+mmap
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
index 70e3ff211278..2e5d85770dfe 100644
--- a/tools/testing/selftests/perf_events/Makefile
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -2,5 +2,5 @@
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
LDFLAGS += -lpthread
-TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap
include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c
new file mode 100644
index 000000000000..ea0427aac1f9
--- /dev/null
+++ b/tools/testing/selftests/perf_events/mmap.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <dirent.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#include <linux/perf_event.h>
+
+#include "../kselftest_harness.h"
+
+#define RB_SIZE 0x3000
+#define AUX_SIZE 0x10000
+#define AUX_OFFS 0x4000
+
+#define HOLE_SIZE 0x1000
+
+/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */
+#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE)
+#define REGION_AUX_OFFS (2 * RB_SIZE)
+
+#define MAP_BASE 1
+#define MAP_AUX 2
+
+#define EVENT_SRC_DIR "/sys/bus/event_source/devices"
+
+FIXTURE(perf_mmap)
+{
+ int fd;
+ void *ptr;
+ void *region;
+};
+
+FIXTURE_VARIANT(perf_mmap)
+{
+ bool aux;
+ unsigned long ptr_size;
+};
+
+FIXTURE_VARIANT_ADD(perf_mmap, rb)
+{
+ .aux = false,
+ .ptr_size = RB_SIZE,
+};
+
+FIXTURE_VARIANT_ADD(perf_mmap, aux)
+{
+ .aux = true,
+ .ptr_size = AUX_SIZE,
+};
+
+static bool read_event_type(struct dirent *dent, __u32 *type)
+{
+ char typefn[512];
+ FILE *fp;
+ int res;
+
+ snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name);
+ fp = fopen(typefn, "r");
+ if (!fp)
+ return false;
+
+ res = fscanf(fp, "%u", type);
+ fclose(fp);
+ return res > 0;
+}
+
+FIXTURE_SETUP(perf_mmap)
+{
+ struct perf_event_attr attr = {
+ .size = sizeof(attr),
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ };
+ struct perf_event_attr attr_ok = {};
+ unsigned int eacces = 0, map = 0;
+ struct perf_event_mmap_page *rb;
+ struct dirent *dent;
+ void *aux, *region;
+ DIR *dir;
+
+ self->ptr = NULL;
+
+ dir = opendir(EVENT_SRC_DIR);
+ if (!dir)
+ SKIP(return, "perf not available.");
+
+ region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(region, MAP_FAILED);
+ self->region = region;
+
+ // Try to find a suitable event on this system
+ while ((dent = readdir(dir))) {
+ int fd;
+
+ if (!read_event_type(dent, &attr.type))
+ continue;
+
+ fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+ if (fd < 0) {
+ if (errno == EACCES)
+ eacces++;
+ continue;
+ }
+
+ // Check whether the event supports mmap()
+ rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
+ if (rb == MAP_FAILED) {
+ close(fd);
+ continue;
+ }
+
+ if (!map) {
+ // Save the event in case that no AUX capable event is found
+ attr_ok = attr;
+ map = MAP_BASE;
+ }
+
+ if (!variant->aux)
+ continue;
+
+ rb->aux_offset = AUX_OFFS;
+ rb->aux_size = AUX_SIZE;
+
+ // Check whether it supports a AUX buffer
+ aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, AUX_OFFS);
+ if (aux == MAP_FAILED) {
+ munmap(rb, RB_SIZE);
+ close(fd);
+ continue;
+ }
+
+ attr_ok = attr;
+ map = MAP_AUX;
+ munmap(aux, AUX_SIZE);
+ munmap(rb, RB_SIZE);
+ close(fd);
+ break;
+ }
+ closedir(dir);
+
+ if (!map) {
+ if (!eacces)
+ SKIP(return, "No mappable perf event found.");
+ else
+ SKIP(return, "No permissions for perf_event_open()");
+ }
+
+ self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0);
+ ASSERT_NE(self->fd, -1);
+
+ rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0);
+ ASSERT_NE(rb, MAP_FAILED);
+
+ if (!variant->aux) {
+ self->ptr = rb;
+ return;
+ }
+
+ if (map != MAP_AUX)
+ SKIP(return, "No AUX event found.");
+
+ rb->aux_offset = AUX_OFFS;
+ rb->aux_size = AUX_SIZE;
+ aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS);
+ ASSERT_NE(aux, MAP_FAILED);
+ self->ptr = aux;
+}
+
+FIXTURE_TEARDOWN(perf_mmap)
+{
+ ASSERT_EQ(munmap(self->region, REGION_SIZE), 0);
+ if (self->fd != -1)
+ ASSERT_EQ(close(self->fd), 0);
+}
+
+TEST_F(perf_mmap, remap)
+{
+ void *tmp, *ptr = self->ptr;
+ unsigned long size = variant->ptr_size;
+
+ // Test the invalid remaps
+ ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
+ ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
+ ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED);
+ // Shrink the end of the mapping such that we only unmap past end of the VMA,
+ // which should succeed and poke a hole into the PROT_NONE region
+ ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
+
+ // Remap the whole buffer to a new address
+ tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(tmp, MAP_FAILED);
+
+ // Try splitting offset 1 hole size into VMA, this should fail
+ ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE,
+ MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED);
+ // Remapping the whole thing should succeed fine
+ ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp);
+ ASSERT_EQ(ptr, tmp);
+ ASSERT_EQ(munmap(tmp, size), 0);
+}
+
+TEST_F(perf_mmap, unmap)
+{
+ unsigned long size = variant->ptr_size;
+
+ // Try to poke holes into the mappings
+ ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0);
+ ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0);
+ ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0);
+}
+
+TEST_F(perf_mmap, map)
+{
+ unsigned long size = variant->ptr_size;
+
+ // Try to poke holes into the mappings by mapping anonymous memory over it
+ ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
+ ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
+ ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
+}
+
+TEST_HARNESS_MAIN