aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorBeau Belgrave <beaub@linux.microsoft.com>2022-07-28 16:33:08 -0700
committerSteven Rostedt (Google) <rostedt@goodmis.org>2022-09-29 10:17:37 -0400
commit39d6d08b2edf99c4b39a689a70bf0adee065b357 (patch)
tree2dfb47d9ea7e51415af2cd79815f2c9597786655 /kernel/trace
parenttracing/user_events: Use refcount instead of atomic for ref tracking (diff)
downloadlinux-dev-39d6d08b2edf99c4b39a689a70bf0adee065b357.tar.xz
linux-dev-39d6d08b2edf99c4b39a689a70bf0adee065b357.zip
tracing/user_events: Use bits vs bytes for enabled status page data
User processes may require many events and when they do the cache performance of a byte index status check is less ideal than a bit index. The previous event limit per-page was 4096, the new limit is 32,768. This change adds a bitwise index to the user_reg struct. Programs check that the bit at status_bit has a bit set within the status page(s). Link: https://lkml.kernel.org/r/20220728233309.1896-6-beaub@linux.microsoft.com Link: https://lore.kernel.org/all/2059213643.196683.1648499088753.JavaMail.zimbra@efficios.com/ Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace_events_user.c75
1 files changed, 67 insertions, 8 deletions
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 2bcae7abfa81..2c0a6ec75548 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -40,17 +40,44 @@
*/
#define MAX_PAGE_ORDER 0
#define MAX_PAGES (1 << MAX_PAGE_ORDER)
-#define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
+#define MAX_BYTES (MAX_PAGES * PAGE_SIZE)
+#define MAX_EVENTS (MAX_BYTES * 8)
/* Limit how long of an event name plus args within the subsystem. */
#define MAX_EVENT_DESC 512
#define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
#define MAX_FIELD_ARRAY_SIZE 1024
+/*
+ * The MAP_STATUS_* macros are used for taking a index and determining the
+ * appropriate byte and the bit in the byte to set/reset for an event.
+ *
+ * The lower 3 bits of the index decide which bit to set.
+ * The remaining upper bits of the index decide which byte to use for the bit.
+ *
+ * This is used when an event has a probe attached/removed to reflect live
+ * status of the event wanting tracing or not to user-programs via shared
+ * memory maps.
+ */
+#define MAP_STATUS_BYTE(index) ((index) >> 3)
+#define MAP_STATUS_MASK(index) BIT((index) & 7)
+
+/*
+ * Internal bits (kernel side only) to keep track of connected probes:
+ * These are used when status is requested in text form about an event. These
+ * bits are compared against an internal byte on the event to determine which
+ * probes to print out to the user.
+ *
+ * These do not reflect the mapped bytes between the user and kernel space.
+ */
+#define EVENT_STATUS_FTRACE BIT(0)
+#define EVENT_STATUS_PERF BIT(1)
+#define EVENT_STATUS_OTHER BIT(7)
+
static char *register_page_data;
static DEFINE_MUTEX(reg_mutex);
-static DEFINE_HASHTABLE(register_table, 4);
+static DEFINE_HASHTABLE(register_table, 8);
static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
/*
@@ -72,6 +99,7 @@ struct user_event {
int index;
int flags;
int min_size;
+ char status;
};
/*
@@ -106,6 +134,22 @@ static u32 user_event_key(char *name)
return jhash(name, strlen(name), 0);
}
+static __always_inline
+void user_event_register_set(struct user_event *user)
+{
+ int i = user->index;
+
+ register_page_data[MAP_STATUS_BYTE(i)] |= MAP_STATUS_MASK(i);
+}
+
+static __always_inline
+void user_event_register_clear(struct user_event *user)
+{
+ int i = user->index;
+
+ register_page_data[MAP_STATUS_BYTE(i)] &= ~MAP_STATUS_MASK(i);
+}
+
static __always_inline __must_check
bool user_event_last_ref(struct user_event *user)
{
@@ -648,7 +692,7 @@ static int destroy_user_event(struct user_event *user)
dyn_event_remove(&user->devent);
- register_page_data[user->index] = 0;
+ user_event_register_clear(user);
clear_bit(user->index, page_bitmap);
hash_del(&user->node);
@@ -827,7 +871,12 @@ static void update_reg_page_for(struct user_event *user)
rcu_read_unlock_sched();
}
- register_page_data[user->index] = status;
+ if (status)
+ user_event_register_set(user);
+ else
+ user_event_register_clear(user);
+
+ user->status = status;
}
/*
@@ -1332,7 +1381,17 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
if (size > PAGE_SIZE)
return -E2BIG;
- return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
+ if (size < offsetofend(struct user_reg, write_index))
+ return -EINVAL;
+
+ ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
+
+ if (ret)
+ return ret;
+
+ kreg->size = size;
+
+ return 0;
}
/*
@@ -1376,7 +1435,7 @@ static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
return ret;
put_user((u32)ret, &ureg->write_index);
- put_user(user->index, &ureg->status_index);
+ put_user(user->index, &ureg->status_bit);
return 0;
}
@@ -1485,7 +1544,7 @@ static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
{
unsigned long size = vma->vm_end - vma->vm_start;
- if (size != MAX_EVENTS)
+ if (size != MAX_BYTES)
return -EINVAL;
return remap_pfn_range(vma, vma->vm_start,
@@ -1520,7 +1579,7 @@ static int user_seq_show(struct seq_file *m, void *p)
mutex_lock(&reg_mutex);
hash_for_each(register_table, i, user, node) {
- status = register_page_data[user->index];
+ status = user->status;
flags = user->flags;
seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));