aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/events.rst14
-rw-r--r--fs/tracefs/Makefile1
-rw-r--r--fs/tracefs/event_inode.c807
-rw-r--r--fs/tracefs/inode.c157
-rw-r--r--fs/tracefs/internal.h29
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/trace_events.h2
-rw-r--r--include/linux/tracefs.h23
-rw-r--r--kernel/trace/ring_buffer.c20
-rw-r--r--kernel/trace/trace.c99
-rw-r--r--kernel/trace/trace.h14
-rw-r--r--kernel/trace/trace_entries.h2
-rw-r--r--kernel/trace/trace_events.c76
-rw-r--r--kernel/trace/trace_events_filter.c302
-rw-r--r--kernel/trace/trace_events_user.c15
-rw-r--r--kernel/trace/trace_export.c9
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc9
18 files changed, 1424 insertions, 169 deletions
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 15f78e772384..759907c20e75 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -219,6 +219,20 @@ the function "security_prepare_creds" and less than the end of that function.
The ".function" postfix can only be attached to values of size long, and can only
be compared with "==" or "!=".
+Cpumask fields or scalar fields that encode a CPU number can be filtered using
+a user-provided cpumask in cpulist format. The format is as follows::
+
+ CPUS{$cpulist}
+
+Operators available to cpumask filtering are:
+
+& (intersection), ==, !=
+
+For example, this will filter events that have their .target_cpu field present
+in the given cpumask::
+
+ target_cpu & CPUS{17-42}
+
5.2 Setting filters
-------------------
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
index 7c35a282b484..73c56da8e284 100644
--- a/fs/tracefs/Makefile
+++ b/fs/tracefs/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
tracefs-objs := inode.o
+tracefs-objs += event_inode.o
obj-$(CONFIG_TRACING) += tracefs.o
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
new file mode 100644
index 000000000000..237c6f370ad9
--- /dev/null
+++ b/fs/tracefs/event_inode.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * event_inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) <rostedt@goodmis.org>
+ * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
+ *
+ * eventfs is used to dynamically create inodes and dentries based on the
+ * meta data provided by the tracing system.
+ *
+ * eventfs stores the meta-data of files/dirs and holds off on creating
+ * inodes/dentries of the files. When accessed, the eventfs will create the
+ * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
+ * and delete the inodes/dentries when they are no longer referenced.
+ */
+#include <linux/fsnotify.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/tracefs.h>
+#include <linux/kref.h>
+#include <linux/delay.h>
+#include "internal.h"
+
+struct eventfs_inode {
+ struct list_head e_top_files;
+};
+
+/*
+ * struct eventfs_file - hold the properties of the eventfs files and
+ * directories.
+ * @name: the name of the file or directory to create
+ * @d_parent: holds parent's dentry
+ * @dentry: once accessed holds dentry
+ * @list: file or directory to be added to parent directory
+ * @ei: list of files and directories within directory
+ * @fop: file_operations for file or directory
+ * @iop: inode_operations for file or directory
+ * @data: something that the caller will want to get to later on
+ * @mode: the permission that the file or directory should have
+ */
+struct eventfs_file {
+ const char *name;
+ struct dentry *d_parent;
+ struct dentry *dentry;
+ struct list_head list;
+ struct eventfs_inode *ei;
+ const struct file_operations *fop;
+ const struct inode_operations *iop;
+ /*
+ * Union - used for deletion
+ * @del_list: list of eventfs_file to delete
+ * @rcu: eventfs_file to delete in RCU
+ * @is_freed: node is freed if one of the above is set
+ */
+ union {
+ struct list_head del_list;
+ struct rcu_head rcu;
+ unsigned long is_freed;
+ };
+ void *data;
+ umode_t mode;
+};
+
+static DEFINE_MUTEX(eventfs_mutex);
+DEFINE_STATIC_SRCU(eventfs_srcu);
+
+static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
+static int eventfs_release(struct inode *inode, struct file *file);
+
+static const struct inode_operations eventfs_root_dir_inode_operations = {
+ .lookup = eventfs_root_lookup,
+};
+
+static const struct file_operations eventfs_file_operations = {
+ .open = dcache_dir_open_wrapper,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .llseek = generic_file_llseek,
+ .release = eventfs_release,
+};
+
+/**
+ * create_file - create a file in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This is the basic "create a file" function for tracefs. It allows for a
+ * wide range of flexibility in creating a file.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+static struct dentry *create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+{
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return NULL;
+
+ dentry = eventfs_start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+ inode->i_mode = mode;
+ inode->i_fop = fop;
+ inode->i_private = data;
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return eventfs_end_creating(dentry);
+};
+
+/**
+ * create_dir - create a dir in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ *
+ * This is the basic "create a dir" function for eventfs. It allows for a
+ * wide range of flexibility in creating a dir.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
+{
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = eventfs_start_creating(name, parent);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+ inode->i_private = data;
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return eventfs_end_creating(dentry);
+}
+
+/**
+ * eventfs_set_ef_status_free - set the ef->status to free
+ * @dentry: dentry who's status to be freed
+ *
+ * eventfs_set_ef_status_free will be called if no more
+ * references remain
+ */
+void eventfs_set_ef_status_free(struct dentry *dentry)
+{
+ struct tracefs_inode *ti_parent;
+ struct eventfs_file *ef;
+
+ mutex_lock(&eventfs_mutex);
+ ti_parent = get_tracefs(dentry->d_parent->d_inode);
+ if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+ goto out;
+
+ ef = dentry->d_fsdata;
+ if (!ef)
+ goto out;
+
+ /*
+ * If ef was freed, then the LSB bit is set for d_fsdata.
+ * But this should not happen, as it should still have a
+ * ref count that prevents it. Warn in case it does.
+ */
+ if (WARN_ON_ONCE((unsigned long)ef & 1))
+ goto out;
+
+ dentry->d_fsdata = NULL;
+ ef->dentry = NULL;
+out:
+ mutex_unlock(&eventfs_mutex);
+}
+
+/**
+ * eventfs_post_create_dir - post create dir routine
+ * @ef: eventfs_file of recently created dir
+ *
+ * Map the meta-data of files within an eventfs dir to their parent dentry
+ */
+static void eventfs_post_create_dir(struct eventfs_file *ef)
+{
+ struct eventfs_file *ef_child;
+ struct tracefs_inode *ti;
+
+ /* srcu lock already held */
+ /* fill parent-child relation */
+ list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ ef_child->d_parent = ef->dentry;
+ }
+
+ ti = get_tracefs(ef->dentry->d_inode);
+ ti->private = ef->ei;
+}
+
+/**
+ * create_dentry - helper function to create dentry
+ * @ef: eventfs_file of file or directory to create
+ * @parent: parent dentry
+ * @lookup: true if called from lookup routine
+ *
+ * Used to create a dentry for file/dir, executes post dentry creation routine
+ */
+static struct dentry *
+create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
+{
+ bool invalidate = false;
+ struct dentry *dentry;
+
+ mutex_lock(&eventfs_mutex);
+ if (ef->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+ return NULL;
+ }
+ if (ef->dentry) {
+ dentry = ef->dentry;
+ /* On dir open, up the ref count */
+ if (!lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+ mutex_unlock(&eventfs_mutex);
+
+ if (!lookup)
+ inode_lock(parent->d_inode);
+
+ if (ef->ei)
+ dentry = create_dir(ef->name, parent, ef->data);
+ else
+ dentry = create_file(ef->name, ef->mode, parent,
+ ef->data, ef->fop);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+
+ mutex_lock(&eventfs_mutex);
+ if (IS_ERR_OR_NULL(dentry)) {
+ /* If the ef was already updated get it */
+ dentry = ef->dentry;
+ if (dentry && !lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+
+ if (!ef->dentry && !ef->is_freed) {
+ ef->dentry = dentry;
+ if (ef->ei)
+ eventfs_post_create_dir(ef);
+ dentry->d_fsdata = ef;
+ } else {
+ /* A race here, should try again (unless freed) */
+ invalidate = true;
+
+ /*
+ * Should never happen unless we get here due to being freed.
+ * Otherwise it means two dentries exist with the same name.
+ */
+ WARN_ON_ONCE(!ef->is_freed);
+ }
+ mutex_unlock(&eventfs_mutex);
+ if (invalidate)
+ d_invalidate(dentry);
+
+ if (lookup || invalidate)
+ dput(dentry);
+
+ return invalidate ? NULL : dentry;
+}
+
+static bool match_event_file(struct eventfs_file *ef, const char *name)
+{
+ bool ret;
+
+ mutex_lock(&eventfs_mutex);
+ ret = !ef->is_freed && strcmp(ef->name, name) == 0;
+ mutex_unlock(&eventfs_mutex);
+
+ return ret;
+}
+
+/**
+ * eventfs_root_lookup - lookup routine to create file/dir
+ * @dir: in which a lookup is being done
+ * @dentry: file/dir dentry
+ * @flags: to pass as flags parameter to simple lookup
+ *
+ * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
+ * list of meta data to find the information needed to create the file/dir.
+ */
+static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *ret = NULL;
+ int idx;
+
+ ti = get_tracefs(dir);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return NULL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ if (!match_event_file(ef, dentry->d_name.name))
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+ create_dentry(ef, ef->d_parent, true);
+ break;
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return ret;
+}
+
+/**
+ * eventfs_release - called to release eventfs file/dir
+ * @inode: inode to be released
+ * @file: file to be released (not used)
+ */
+static int eventfs_release(struct inode *inode, struct file *file)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *dentry;
+ int idx;
+
+ ti = get_tracefs(inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ mutex_lock(&eventfs_mutex);
+ dentry = ef->dentry;
+ mutex_unlock(&eventfs_mutex);
+ if (dentry)
+ dput(dentry);
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return dcache_dir_close(inode, file);
+}
+
+/**
+ * dcache_dir_open_wrapper - eventfs open wrapper
+ * @inode: not used
+ * @file: dir to be opened (to create its child)
+ *
+ * Used to dynamically create the file/dir within @file. @file is really a
+ * directory and all the files/dirs of the children within @file will be
+ * created. If any of the files/dirs have already been created, their
+ * reference count will be incremented.
+ */
+static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *dentry = file_dentry(file);
+ struct inode *f_inode = file_inode(file);
+ int idx;
+
+ ti = get_tracefs(f_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_rcu(ef, &ei->e_top_files, list) {
+ create_dentry(ef, dentry, false);
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return dcache_dir_open(inode, file);
+}
+
+/**
+ * eventfs_prepare_ef - helper function to prepare eventfs_file
+ * @name: the name of the file/directory to create.
+ * @mode: the permission that the file should have.
+ * @fop: struct file_operations that should be used for this file/directory.
+ * @iop: struct inode_operations that should be used for this file/directory.
+ * @data: something that the caller will want to get to later on. The
+ * inode.i_private pointer will point to this value on the open() call.
+ *
+ * This function allocates and fills the eventfs_file structure.
+ */
+static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode,
+ const struct file_operations *fop,
+ const struct inode_operations *iop,
+ void *data)
+{
+ struct eventfs_file *ef;
+
+ ef = kzalloc(sizeof(*ef), GFP_KERNEL);
+ if (!ef)
+ return ERR_PTR(-ENOMEM);
+
+ ef->name = kstrdup(name, GFP_KERNEL);
+ if (!ef->name) {
+ kfree(ef);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (S_ISDIR(mode)) {
+ ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL);
+ if (!ef->ei) {
+ kfree(ef->name);
+ kfree(ef);
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&ef->ei->e_top_files);
+ } else {
+ ef->ei = NULL;
+ }
+
+ ef->iop = iop;
+ ef->fop = fop;
+ ef->mode = mode;
+ ef->data = data;
+ return ef;
+}
+
+/**
+ * eventfs_create_events_dir - create the trace event structure
+ * @name: the name of the directory to create.
+ * @parent: parent dentry for this file. This should be a directory dentry
+ * if set. If this parameter is NULL, then the directory will be
+ * created in the root of the tracefs filesystem.
+ *
+ * This function creates the top of the trace event directory.
+ */
+struct dentry *eventfs_create_events_dir(const char *name,
+ struct dentry *parent)
+{
+ struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct eventfs_inode *ei;
+ struct tracefs_inode *ti;
+ struct inode *inode;
+
+ if (IS_ERR(dentry))
+ return dentry;
+
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei)
+ return ERR_PTR(-ENOMEM);
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode)) {
+ kfree(ei);
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&ei->e_top_files);
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+ ti->private = ei;
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return tracefs_end_creating(dentry);
+}
+
+/**
+ * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
+ * @name: the name of the file to create.
+ * @parent: parent dentry for this dir.
+ *
+ * This function adds eventfs subsystem dir to list.
+ * And all these dirs are created on the fly when they are looked up,
+ * and the dentry and inodes will be removed when they are done.
+ */
+struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+ struct dentry *parent)
+{
+ struct tracefs_inode *ti_parent;
+ struct eventfs_inode *ei_parent;
+ struct eventfs_file *ef;
+
+ if (!parent)
+ return ERR_PTR(-EINVAL);
+
+ ti_parent = get_tracefs(parent->d_inode);
+ ei_parent = ti_parent->private;
+
+ ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+ if (IS_ERR(ef))
+ return ef;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ei_parent->e_top_files);
+ ef->d_parent = parent;
+ mutex_unlock(&eventfs_mutex);
+ return ef;
+}
+
+/**
+ * eventfs_add_dir - add eventfs dir to list to create later
+ * @name: the name of the file to create.
+ * @ef_parent: parent eventfs_file for this dir.
+ *
+ * This function adds eventfs dir to list.
+ * And all these dirs are created on the fly when they are looked up,
+ * and the dentry and inodes will be removed when they are done.
+ */
+struct eventfs_file *eventfs_add_dir(const char *name,
+ struct eventfs_file *ef_parent)
+{
+ struct eventfs_file *ef;
+
+ if (!ef_parent)
+ return ERR_PTR(-EINVAL);
+
+ ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+ if (IS_ERR(ef))
+ return ef;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+ ef->d_parent = ef_parent->dentry;
+ mutex_unlock(&eventfs_mutex);
+ return ef;
+}
+
+/**
+ * eventfs_add_events_file - add the data needed to create a file for later reference
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This function is used to add the information needed to create a
+ * dentry/inode within the top level events directory. The file created
+ * will have the @mode permissions. The @data will be used to fill the
+ * inode.i_private when the open() call is done. The dentry and inodes are
+ * all created when they are referenced, and removed when they are no
+ * longer referenced.
+ */
+int eventfs_add_events_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+
+ if (!parent)
+ return -EINVAL;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ if (!parent->d_inode)
+ return -EINVAL;
+
+ ti = get_tracefs(parent->d_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+
+ if (IS_ERR(ef))
+ return -ENOMEM;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ei->e_top_files);
+ ef->d_parent = parent;
+ mutex_unlock(&eventfs_mutex);
+ return 0;
+}
+
+/**
+ * eventfs_add_file - add eventfs file to list to create later
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @ef_parent: parent eventfs_file for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This function is used to add the information needed to create a
+ * file within a subdirectory of the events directory. The file created
+ * will have the @mode permissions. The @data will be used to fill the
+ * inode.i_private when the open() call is done. The dentry and inodes are
+ * all created when they are referenced, and removed when they are no
+ * longer referenced.
+ */
+int eventfs_add_file(const char *name, umode_t mode,
+ struct eventfs_file *ef_parent,
+ void *data,
+ const struct file_operations *fop)
+{
+ struct eventfs_file *ef;
+
+ if (!ef_parent)
+ return -EINVAL;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+ if (IS_ERR(ef))
+ return -ENOMEM;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+ ef->d_parent = ef_parent->dentry;
+ mutex_unlock(&eventfs_mutex);
+ return 0;
+}
+
+static void free_ef(struct rcu_head *head)
+{
+ struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+
+ kfree(ef->name);
+ kfree(ef->ei);
+ kfree(ef);
+}
+
+/**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ef: eventfs_file to be removed.
+ * @head: to create list of eventfs_file to be deleted
+ * @level: to check recursion depth
+ *
+ * The helper function eventfs_remove_rec() is used to clean up and free the
+ * associated data from eventfs for both of the added functions.
+ */
+static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
+{
+ struct eventfs_file *ef_child;
+
+ if (!ef)
+ return;
+ /*
+ * Check recursion depth. It should never be greater than 3:
+ * 0 - events/
+ * 1 - events/group/
+ * 2 - events/group/event/
+ * 3 - events/group/event/file
+ */
+ if (WARN_ON_ONCE(level > 3))
+ return;
+
+ if (ef->ei) {
+ /* search for nested folders or files */
+ list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+ lockdep_is_held(&eventfs_mutex)) {
+ eventfs_remove_rec(ef_child, head, level + 1);
+ }
+ }
+
+ list_del_rcu(&ef->list);
+ list_add_tail(&ef->del_list, head);
+}
+
+/**
+ * eventfs_remove - remove eventfs dir or file from list
+ * @ef: eventfs_file to be removed.
+ *
+ * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
+ */
+void eventfs_remove(struct eventfs_file *ef)
+{
+ struct eventfs_file *tmp;
+ LIST_HEAD(ef_del_list);
+ struct dentry *dentry_list = NULL;
+ struct dentry *dentry;
+
+ if (!ef)
+ return;
+
+ mutex_lock(&eventfs_mutex);
+ eventfs_remove_rec(ef, &ef_del_list, 0);
+ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+ if (ef->dentry) {
+ unsigned long ptr = (unsigned long)dentry_list;
+
+ /* Keep the dentry from being freed yet */
+ dget(ef->dentry);
+
+ /*
+ * Paranoid: The dget() above should prevent the dentry
+ * from being freed and calling eventfs_set_ef_status_free().
+ * But just in case, set the link list LSB pointer to 1
+ * and have eventfs_set_ef_status_free() check that to
+ * make sure that if it does happen, it will not think
+ * the d_fsdata is an event_file.
+ *
+ * For this to work, no event_file should be allocated
+ * on a odd space, as the ef should always be allocated
+ * to be at least word aligned. Check for that too.
+ */
+ WARN_ON_ONCE(ptr & 1);
+
+ ef->dentry->d_fsdata = (void *)(ptr | 1);
+ dentry_list = ef->dentry;
+ ef->dentry = NULL;
+ }
+ call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
+ }
+ mutex_unlock(&eventfs_mutex);
+
+ while (dentry_list) {
+ unsigned long ptr;
+
+ dentry = dentry_list;
+ ptr = (unsigned long)dentry->d_fsdata & ~1UL;
+ dentry_list = (struct dentry *)ptr;
+ dentry->d_fsdata = NULL;
+ d_invalidate(dentry);
+ mutex_lock(&eventfs_mutex);
+ /* dentry should now have at least a single reference */
+ WARN_ONCE((int)d_count(dentry) < 1,
+ "dentry %p less than one reference (%d) after invalidate\n",
+ dentry, d_count(dentry));
+ mutex_unlock(&eventfs_mutex);
+ dput(dentry);
+ }
+}
+
+/**
+ * eventfs_remove_events_dir - remove eventfs dir or file from list
+ * @dentry: events's dentry to be removed.
+ *
+ * This function remove events main directory
+ */
+void eventfs_remove_events_dir(struct dentry *dentry)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+
+ if (!dentry || !dentry->d_inode)
+ return;
+
+ ti = get_tracefs(dentry->d_inode);
+ if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+ return;
+
+ ei = ti->private;
+ d_invalidate(dentry);
+ dput(dentry);
+ kfree(ei);
+}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 2feb6c58648c..de5b72216b1a 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -21,13 +21,33 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include "internal.h"
#define TRACEFS_DEFAULT_MODE 0700
+static struct kmem_cache *tracefs_inode_cachep __ro_after_init;
static struct vfsmount *tracefs_mount;
static int tracefs_mount_count;
static bool tracefs_registered;
+static struct inode *tracefs_alloc_inode(struct super_block *sb)
+{
+ struct tracefs_inode *ti;
+
+ ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL);
+ if (!ti)
+ return NULL;
+
+ ti->flags = 0;
+
+ return &ti->vfs_inode;
+}
+
+static void tracefs_free_inode(struct inode *inode)
+{
+ kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode));
+}
+
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -127,7 +147,7 @@ static const struct inode_operations tracefs_dir_inode_operations = {
.rmdir = tracefs_syscall_rmdir,
};
-static struct inode *tracefs_get_inode(struct super_block *sb)
+struct inode *tracefs_get_inode(struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
@@ -290,6 +310,7 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
struct tracefs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
+ umode_t tmp_mode;
/*
* On remount, only reset mode/uid/gid if they were provided as mount
@@ -297,8 +318,9 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
*/
if (!remount || opts->opts & BIT(Opt_mode)) {
- inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= opts->mode;
+ tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO;
+ tmp_mode |= opts->mode;
+ WRITE_ONCE(inode->i_mode, tmp_mode);
}
if (!remount || opts->opts & BIT(Opt_uid))
@@ -346,11 +368,31 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
}
static const struct super_operations tracefs_super_operations = {
+ .alloc_inode = tracefs_alloc_inode,
+ .free_inode = tracefs_free_inode,
+ .drop_inode = generic_delete_inode,
.statfs = simple_statfs,
.remount_fs = tracefs_remount,
.show_options = tracefs_show_options,
};
+static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct tracefs_inode *ti;
+
+ if (!dentry || !inode)
+ return;
+
+ ti = get_tracefs(inode);
+ if (ti && ti->flags & TRACEFS_EVENT_INODE)
+ eventfs_set_ef_status_free(dentry);
+ iput(inode);
+}
+
+static const struct dentry_operations tracefs_dentry_operations = {
+ .d_iput = tracefs_dentry_iput,
+};
+
static int trace_fill_super(struct super_block *sb, void *data, int silent)
{
static const struct tree_descr trace_files[] = {{""}};
@@ -373,6 +415,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
goto fail;
sb->s_op = &tracefs_super_operations;
+ sb->s_d_op = &tracefs_dentry_operations;
tracefs_apply_options(sb, false);
@@ -399,7 +442,7 @@ static struct file_system_type trace_fs_type = {
};
MODULE_ALIAS_FS("tracefs");
-static struct dentry *start_creating(const char *name, struct dentry *parent)
+struct dentry *tracefs_start_creating(const char *name, struct dentry *parent)
{
struct dentry *dentry;
int error;
@@ -437,7 +480,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
return dentry;
}
-static struct dentry *failed_creating(struct dentry *dentry)
+struct dentry *tracefs_failed_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
@@ -445,13 +488,87 @@ static struct dentry *failed_creating(struct dentry *dentry)
return NULL;
}
-static struct dentry *end_creating(struct dentry *dentry)
+struct dentry *tracefs_end_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
/**
+ * eventfs_start_creating - start the process of creating a dentry
+ * @name: Name of the file created for the dentry
+ * @parent: The parent dentry where this dentry will be created
+ *
+ * This is a simple helper function for the dynamically created eventfs
+ * files. When the directory of the eventfs files are accessed, their
+ * dentries are created on the fly. This function is used to start that
+ * process.
+ */
+struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ int error;
+
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+ /*
+ * If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = tracefs_mount->mnt_root;
+
+ if (unlikely(IS_DEADDIR(parent->d_inode)))
+ dentry = ERR_PTR(-ENOENT);
+ else
+ dentry = lookup_one_len(name, parent, strlen(name));
+
+ if (!IS_ERR(dentry) && dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ }
+
+ if (IS_ERR(dentry))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+ return dentry;
+}
+
+/**
+ * eventfs_failed_creating - clean up a failed eventfs dentry creation
+ * @dentry: The dentry to clean up
+ *
+ * If after calling eventfs_start_creating(), a failure is detected, the
+ * resources created by eventfs_start_creating() needs to be cleaned up. In
+ * that case, this function should be called to perform that clean up.
+ */
+struct dentry *eventfs_failed_creating(struct dentry *dentry)
+{
+ dput(dentry);
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ return NULL;
+}
+
+/**
+ * eventfs_end_creating - Finish the process of creating a eventfs dentry
+ * @dentry: The dentry that has successfully been created.
+ *
+ * This function is currently just a place holder to match
+ * eventfs_start_creating(). In case any synchronization needs to be added,
+ * this function will be used to implement that without having to modify
+ * the callers of eventfs_start_creating().
+ */
+struct dentry *eventfs_end_creating(struct dentry *dentry)
+{
+ return dentry;
+}
+
+/**
* tracefs_create_file - create a file in the tracefs filesystem
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have.
@@ -490,14 +607,14 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
if (!(mode & S_IFMT))
mode |= S_IFREG;
BUG_ON(!S_ISREG(mode));
- dentry = start_creating(name, parent);
+ dentry = tracefs_start_creating(name, parent);
if (IS_ERR(dentry))
return NULL;
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return failed_creating(dentry);
+ return tracefs_failed_creating(dentry);
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
@@ -506,13 +623,13 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return tracefs_end_creating(dentry);
}
static struct dentry *__create_dir(const char *name, struct dentry *parent,
const struct inode_operations *ops)
{
- struct dentry *dentry = start_creating(name, parent);
+ struct dentry *dentry = tracefs_start_creating(name, parent);
struct inode *inode;
if (IS_ERR(dentry))
@@ -520,7 +637,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return failed_creating(dentry);
+ return tracefs_failed_creating(dentry);
/* Do not set bits for OTH */
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
@@ -534,7 +651,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
d_instantiate(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return tracefs_end_creating(dentry);
}
/**
@@ -628,10 +745,26 @@ bool tracefs_initialized(void)
return tracefs_registered;
}
+static void init_once(void *foo)
+{
+ struct tracefs_inode *ti = (struct tracefs_inode *) foo;
+
+ inode_init_once(&ti->vfs_inode);
+}
+
static int __init tracefs_init(void)
{
int retval;
+ tracefs_inode_cachep = kmem_cache_create("tracefs_inode_cache",
+ sizeof(struct tracefs_inode),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ init_once);
+ if (!tracefs_inode_cachep)
+ return -ENOMEM;
+
retval = sysfs_create_mount_point(kernel_kobj, "tracing");
if (retval)
return -EINVAL;
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
new file mode 100644
index 000000000000..69c2b1d87c46
--- /dev/null
+++ b/fs/tracefs/internal.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TRACEFS_INTERNAL_H
+#define _TRACEFS_INTERNAL_H
+
+enum {
+ TRACEFS_EVENT_INODE = BIT(1),
+};
+
+struct tracefs_inode {
+ unsigned long flags;
+ void *private;
+ struct inode vfs_inode;
+};
+
+static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
+{
+ return container_of(inode, struct tracefs_inode, vfs_inode);
+}
+
+struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
+struct dentry *tracefs_end_creating(struct dentry *dentry);
+struct dentry *tracefs_failed_creating(struct dentry *dentry);
+struct inode *tracefs_get_inode(struct super_block *sb);
+struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+struct dentry *eventfs_failed_creating(struct dentry *dentry);
+struct dentry *eventfs_end_creating(struct dentry *dentry);
+void eventfs_set_ef_status_free(struct dentry *dentry);
+
+#endif /* _TRACEFS_INTERNAL_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aad9cf8876b5..e8921871ef9a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -862,13 +862,8 @@ extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
extern void ftrace_module_enable(struct module *mod);
extern void ftrace_release_mod(struct module *mod);
-
-extern void ftrace_disable_daemon(void);
-extern void ftrace_enable_daemon(void);
#else /* CONFIG_DYNAMIC_FTRACE */
static inline int skip_trace(unsigned long ip) { return 0; }
-static inline void ftrace_disable_daemon(void) { }
-static inline void ftrace_enable_daemon(void) { }
static inline void ftrace_module_init(struct module *mod) { }
static inline void ftrace_module_enable(struct module *mod) { }
static inline void ftrace_release_mod(struct module *mod) { }
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index c1a0a19d80fb..eb5c3add939b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -649,6 +649,7 @@ struct trace_event_file {
struct list_head list;
struct trace_event_call *event_call;
struct event_filter __rcu *filter;
+ struct eventfs_file *ef;
struct dentry *dir;
struct trace_array *tr;
struct trace_subsystem_dir *system;
@@ -824,6 +825,7 @@ enum {
FILTER_RDYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
+ FILTER_CPUMASK,
FILTER_COMM,
FILTER_CPU,
FILTER_STACKTRACE,
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
index 99912445974c..009072792fa3 100644
--- a/include/linux/tracefs.h
+++ b/include/linux/tracefs.h
@@ -21,6 +21,29 @@ struct file_operations;
#ifdef CONFIG_TRACING
+struct eventfs_file;
+
+struct dentry *eventfs_create_events_dir(const char *name,
+ struct dentry *parent);
+
+struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+ struct dentry *parent);
+
+struct eventfs_file *eventfs_add_dir(const char *name,
+ struct eventfs_file *ef_parent);
+
+int eventfs_add_file(const char *name, umode_t mode,
+ struct eventfs_file *ef_parent, void *data,
+ const struct file_operations *fops);
+
+int eventfs_add_events_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops);
+
+void eventfs_remove(struct eventfs_file *ef);
+
+void eventfs_remove_events_dir(struct dentry *dentry);
+
struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 52dea5dd5362..78502d4c7214 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -692,10 +692,7 @@ static void rb_time_set(rb_time_t *t, u64 val)
static inline bool
rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
{
- unsigned long ret;
-
- ret = local_cmpxchg(l, expect, set);
- return ret == expect;
+ return local_try_cmpxchg(l, &expect, set);
}
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
@@ -752,9 +749,7 @@ static void rb_time_set(rb_time_t *t, u64 val)
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
- u64 val;
- val = local64_cmpxchg(&t->time, expect, set);
- return val == expect;
+ return local64_try_cmpxchg(&t->time, &expect, set);
}
#endif
@@ -1494,14 +1489,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
{
unsigned long *ptr = (unsigned long *)&old->list.prev->next;
unsigned long val;
- unsigned long ret;
val = *ptr & ~RB_FLAG_MASK;
val |= RB_PAGE_HEAD;
- ret = cmpxchg(ptr, val, (unsigned long)&new->list);
-
- return ret == val;
+ return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
}
/*
@@ -3003,7 +2995,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
{
unsigned long new_index, old_index;
struct buffer_page *bpage;
- unsigned long index;
unsigned long addr;
u64 write_stamp;
u64 delta;
@@ -3060,8 +3051,9 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
*/
old_index += write_mask;
new_index += write_mask;
- index = local_cmpxchg(&bpage->write, old_index, new_index);
- if (index == old_index) {
+
+ /* caution: old_index gets updated on cmpxchg failure */
+ if (local_try_cmpxchg(&bpage->write, &old_index, new_index)) {
/* update counters */
local_sub(event_length, &cpu_buffer->entries_bytes);
return true;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8e64aaad5361..3e55375f47e0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3119,7 +3119,6 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
struct ftrace_stack *fstack;
struct stack_entry *entry;
int stackidx;
- void *ptr;
/*
* Add one, for this function and the call to save_stack_trace()
@@ -3157,32 +3156,16 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
nr_entries = stack_trace_save(fstack->calls, size, skip);
}
- size = nr_entries * sizeof(unsigned long);
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
- (sizeof(*entry) - sizeof(entry->caller)) + size,
+ struct_size(entry, caller, nr_entries),
trace_ctx);
if (!event)
goto out;
- ptr = ring_buffer_event_data(event);
- entry = ptr;
-
- /*
- * For backward compatibility reasons, the entry->caller is an
- * array of 8 slots to store the stack. This is also exported
- * to user space. The amount allocated on the ring buffer actually
- * holds enough for the stack specified by nr_entries. This will
- * go into the location of entry->caller. Due to string fortifiers
- * checking the size of the destination of memcpy() it triggers
- * when it detects that size is greater than 8. To hide this from
- * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
- *
- * The below is really just:
- * memcpy(&entry->caller, fstack->calls, size);
- */
- ptr += offsetof(typeof(*entry), caller);
- memcpy(ptr, fstack->calls, size);
+ entry = ring_buffer_event_data(event);
entry->size = nr_entries;
+ memcpy(&entry->caller, fstack->calls,
+ flex_array_size(entry, caller, nr_entries));
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -4206,18 +4189,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
loff_t l = 0;
int cpu;
- /*
- * copy the tracer to avoid using a global lock all around.
- * iter->trace is a copy of current_trace, the pointer to the
- * name may be used instead of a strcmp(), as iter->trace->name
- * will point to the same string as current_trace->name.
- */
mutex_lock(&trace_types_lock);
- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+ if (unlikely(tr->current_trace != iter->trace)) {
/* Close iter->trace before switching to the new current tracer */
if (iter->trace->close)
iter->trace->close(iter);
- *iter->trace = *tr->current_trace;
+ iter->trace = tr->current_trace;
/* Reopen the new current tracer */
if (iter->trace->open)
iter->trace->open(iter);
@@ -4829,6 +4806,25 @@ static const struct seq_operations tracer_seq_ops = {
.show = s_show,
};
+/*
+ * Note, as iter itself can be allocated and freed in different
+ * ways, this function is only used to free its content, and not
+ * the iterator itself. The only requirement to all the allocations
+ * is that it must zero all fields (kzalloc), as freeing works with
+ * ethier allocated content or NULL.
+ */
+static void free_trace_iter_content(struct trace_iterator *iter)
+{
+ /* The fmt is either NULL, allocated or points to static_fmt_buf */
+ if (iter->fmt != static_fmt_buf)
+ kfree(iter->fmt);
+
+ kfree(iter->temp);
+ kfree(iter->buffer_iter);
+ mutex_destroy(&iter->mutex);
+ free_cpumask_var(iter->started);
+}
+
static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
{
@@ -4870,16 +4866,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
iter->fmt = NULL;
iter->fmt_size = 0;
- /*
- * We make a copy of the current tracer to avoid concurrent
- * changes on it while we are reading.
- */
mutex_lock(&trace_types_lock);
- iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
- if (!iter->trace)
- goto fail;
-
- *iter->trace = *tr->current_trace;
+ iter->trace = tr->current_trace;
if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
@@ -4944,9 +4932,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
fail:
mutex_unlock(&trace_types_lock);
- kfree(iter->trace);
- kfree(iter->temp);
- kfree(iter->buffer_iter);
+ free_trace_iter_content(iter);
release:
seq_release_private(inode, file);
return ERR_PTR(-ENOMEM);
@@ -5025,12 +5011,7 @@ static int tracing_release(struct inode *inode, struct file *file)
mutex_unlock(&trace_types_lock);
- mutex_destroy(&iter->mutex);
- free_cpumask_var(iter->started);
- kfree(iter->fmt);
- kfree(iter->temp);
- kfree(iter->trace);
- kfree(iter->buffer_iter);
+ free_trace_iter_content(iter);
seq_release_private(inode, file);
return 0;
@@ -6318,6 +6299,15 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
per_cpu_ptr(buf->data, cpu)->entries = val;
}
+static void update_buffer_entries(struct array_buffer *buf, int cpu)
+{
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
+ } else {
+ per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
+ }
+}
+
#ifdef CONFIG_TRACER_MAX_TRACE
/* resize @tr's buffer to the size of @size_tr's entries */
static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
@@ -6396,18 +6386,12 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
return ret;
}
- if (cpu == RING_BUFFER_ALL_CPUS)
- set_buffer_entries(&tr->max_buffer, size);
- else
- per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
+ update_buffer_entries(&tr->max_buffer, cpu);
out:
#endif /* CONFIG_TRACER_MAX_TRACE */
- if (cpu == RING_BUFFER_ALL_CPUS)
- set_buffer_entries(&tr->array_buffer, size);
- else
- per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
+ update_buffer_entries(&tr->array_buffer, cpu);
return ret;
}
@@ -6825,10 +6809,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
close_pipe_on_cpu(tr, iter->cpu_file);
mutex_unlock(&trace_types_lock);
- free_cpumask_var(iter->started);
- kfree(iter->fmt);
- kfree(iter->temp);
- mutex_destroy(&iter->mutex);
+ free_trace_iter_content(iter);
kfree(iter);
trace_array_put(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 73eaec158473..5669dd1f90d9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -77,6 +77,16 @@ enum trace_type {
#undef __array
#define __array(type, item, size) type item[size];
+/*
+ * For backward compatibility, older user space expects to see the
+ * kernel_stack event with a fixed size caller field. But today the fix
+ * size is ignored by the kernel, and the real structure is dynamic.
+ * Expose to user space: "unsigned long caller[8];" but the real structure
+ * will be "unsigned long caller[] __counted_by(size)"
+ */
+#undef __stack_array
+#define __stack_array(type, item, size, field) type item[] __counted_by(field);
+
#undef __array_desc
#define __array_desc(type, container, item, size)
@@ -596,7 +606,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
void tracing_reset_online_cpus(struct array_buffer *buf);
-void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
void tracing_reset_all_online_cpus_unlocked(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -697,7 +706,6 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
int trace_pid_show(struct seq_file *m, void *v);
-void trace_free_pid_list(struct trace_pid_list *pid_list);
int trace_pid_write(struct trace_pid_list *filtered_pids,
struct trace_pid_list **new_pid_list,
const char __user *ubuf, size_t cnt);
@@ -1334,7 +1342,7 @@ struct trace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
- struct dentry *entry;
+ struct eventfs_file *ef;
int ref_count;
int nr_events;
};
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 340b2fa98218..c47422b20908 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -190,7 +190,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
F_STRUCT(
__field( int, size )
- __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ __stack_array( unsigned long, caller, FTRACE_STACK_ENTRIES, size)
),
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 578f1f7d49a6..ed367d713be0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
return;
if (!--dir->nr_events) {
- tracefs_remove(dir->entry);
+ eventfs_remove(dir->ef);
list_del(&dir->list);
__put_system_dir(dir);
}
@@ -1005,7 +1005,7 @@ static void remove_event_file_dir(struct trace_event_file *file)
tracefs_remove(dir);
}
-
+ eventfs_remove(file->ef);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
@@ -2291,13 +2291,13 @@ create_new_subsystem(const char *name)
return NULL;
}
-static struct dentry *
+static struct eventfs_file *
event_subsystem_dir(struct trace_array *tr, const char *name,
struct trace_event_file *file, struct dentry *parent)
{
struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
- struct dentry *entry;
+ int res;
/* First see if we did not already create this dir */
list_for_each_entry(dir, &tr->systems, list) {
@@ -2305,7 +2305,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
if (strcmp(system->name, name) == 0) {
dir->nr_events++;
file->system = dir;
- return dir->entry;
+ return dir->ef;
}
}
@@ -2329,8 +2329,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);
- dir->entry = tracefs_create_dir(name, parent);
- if (!dir->entry) {
+ dir->ef = eventfs_add_subsystem_dir(name, parent);
+ if (IS_ERR(dir->ef)) {
pr_warn("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
@@ -2345,22 +2345,22 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
/* the ftrace system is special, do not create enable or filter files */
if (strcmp(name, "ftrace") != 0) {
- entry = tracefs_create_file("filter", TRACE_MODE_WRITE,
- dir->entry, dir,
+ res = eventfs_add_file("filter", TRACE_MODE_WRITE,
+ dir->ef, dir,
&ftrace_subsystem_filter_fops);
- if (!entry) {
+ if (res) {
kfree(system->filter);
system->filter = NULL;
pr_warn("Could not create tracefs '%s/filter' entry\n", name);
}
- trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir,
+ eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
&ftrace_system_enable_fops);
}
list_add(&dir->list, &tr->systems);
- return dir->entry;
+ return dir->ef;
out_free:
kfree(dir);
@@ -2413,36 +2413,37 @@ static int
event_create_dir(struct dentry *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
+ struct eventfs_file *ef_subsystem = NULL;
struct trace_array *tr = file->tr;
- struct dentry *d_events;
const char *name;
int ret;
/*
* If the trace point header did not define TRACE_SYSTEM
- * then the system would be called "TRACE_SYSTEM".
+ * then the system would be called "TRACE_SYSTEM". This should
+ * never happen.
*/
- if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
- d_events = event_subsystem_dir(tr, call->class->system, file, parent);
- if (!d_events)
- return -ENOMEM;
- } else
- d_events = parent;
+ if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
+ return -ENODEV;
+
+ ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
+ if (!ef_subsystem)
+ return -ENOMEM;
name = trace_event_name(call);
- file->dir = tracefs_create_dir(name, d_events);
- if (!file->dir) {
+ file->ef = eventfs_add_dir(name, ef_subsystem);
+ if (IS_ERR(file->ef)) {
pr_warn("Could not create tracefs '%s' directory\n", name);
return -1;
}
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
- trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file,
+ eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
&ftrace_enable_fops);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
- trace_create_file("id", TRACE_MODE_READ, file->dir,
+ eventfs_add_file("id", TRACE_MODE_READ, file->ef,
(void *)(long)call->event.type,
&ftrace_event_id_fops);
#endif
@@ -2458,27 +2459,27 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
* triggers or filters.
*/
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
- trace_create_file("filter", TRACE_MODE_WRITE, file->dir,
+ eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
file, &ftrace_event_filter_fops);
- trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
+ eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
file, &event_trigger_fops);
}
#ifdef CONFIG_HIST_TRIGGERS
- trace_create_file("hist", TRACE_MODE_READ, file->dir, file,
+ eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
&event_hist_fops);
#endif
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
- trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file,
+ eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
&event_hist_debug_fops);
#endif
- trace_create_file("format", TRACE_MODE_READ, file->dir, call,
+ eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
&ftrace_event_format_fops);
#ifdef CONFIG_TRACE_EVENT_INJECT
if (call->event.type && call->class->reg)
- trace_create_file("inject", 0200, file->dir, file,
+ eventfs_add_file("inject", 0200, file->ef, file,
&event_inject_fops);
#endif
@@ -3631,21 +3632,22 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
{
struct dentry *d_events;
struct dentry *entry;
+ int error = 0;
entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
tr, &ftrace_set_event_fops);
if (!entry)
return -ENOMEM;
- d_events = tracefs_create_dir("events", parent);
- if (!d_events) {
+ d_events = eventfs_create_events_dir("events", parent);
+ if (IS_ERR(d_events)) {
pr_warn("Could not create tracefs 'events' directory\n");
return -ENOMEM;
}
- entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
+ error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry)
+ if (error)
return -ENOMEM;
/* There are not as crucial, just warn if they are not created */
@@ -3658,11 +3660,11 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
&ftrace_set_event_notrace_pid_fops);
/* ring buffer internal formats */
- trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
@@ -3750,7 +3752,7 @@ int event_trace_del_tracer(struct trace_array *tr)
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
- tracefs_remove(tr->event_dir);
+ eventfs_remove_events_dir(tr->event_dir);
up_write(&trace_event_sem);
tr->event_dir = NULL;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1dad64267878..3a529214a21b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -46,15 +46,19 @@ static const char * ops[] = { OPS };
enum filter_pred_fn {
FILTER_PRED_FN_NOP,
FILTER_PRED_FN_64,
+ FILTER_PRED_FN_64_CPUMASK,
FILTER_PRED_FN_S64,
FILTER_PRED_FN_U64,
FILTER_PRED_FN_32,
+ FILTER_PRED_FN_32_CPUMASK,
FILTER_PRED_FN_S32,
FILTER_PRED_FN_U32,
FILTER_PRED_FN_16,
+ FILTER_PRED_FN_16_CPUMASK,
FILTER_PRED_FN_S16,
FILTER_PRED_FN_U16,
FILTER_PRED_FN_8,
+ FILTER_PRED_FN_8_CPUMASK,
FILTER_PRED_FN_S8,
FILTER_PRED_FN_U8,
FILTER_PRED_FN_COMM,
@@ -64,21 +68,25 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
+ FILTER_PRED_FN_CPU_CPUMASK,
+ FILTER_PRED_FN_CPUMASK,
+ FILTER_PRED_FN_CPUMASK_CPU,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
};
struct filter_pred {
- enum filter_pred_fn fn_num;
- u64 val;
- u64 val2;
- struct regex regex;
+ struct regex *regex;
+ struct cpumask *mask;
unsigned short *ops;
struct ftrace_event_field *field;
- int offset;
+ u64 val;
+ u64 val2;
+ enum filter_pred_fn fn_num;
+ int offset;
int not;
- int op;
+ int op;
};
/*
@@ -94,6 +102,8 @@ struct filter_pred {
C(TOO_MANY_OPEN, "Too many '('"), \
C(TOO_MANY_CLOSE, "Too few '('"), \
C(MISSING_QUOTE, "Missing matching quote"), \
+ C(MISSING_BRACE_OPEN, "Missing '{'"), \
+ C(MISSING_BRACE_CLOSE, "Missing '}'"), \
C(OPERAND_TOO_LONG, "Operand too long"), \
C(EXPECT_STRING, "Expecting string field"), \
C(EXPECT_DIGIT, "Expecting numeric field"), \
@@ -103,6 +113,7 @@ struct filter_pred {
C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \
C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \
C(INVALID_FILTER, "Meaningless filter expression"), \
+ C(INVALID_CPULIST, "Invalid cpulist"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
C(NO_FUNCTION, "Function not found"), \
@@ -186,6 +197,15 @@ enum {
PROCESS_OR = 4,
};
+static void free_predicate(struct filter_pred *pred)
+{
+ if (pred) {
+ kfree(pred->regex);
+ kfree(pred->mask);
+ kfree(pred);
+ }
+}
+
/*
* Without going into a formal proof, this explains the method that is used in
* parsing the logical expressions.
@@ -623,12 +643,64 @@ out_free:
kfree(inverts);
if (prog_stack) {
for (i = 0; prog_stack[i].pred; i++)
- kfree(prog_stack[i].pred);
+ free_predicate(prog_stack[i].pred);
kfree(prog_stack);
}
return ERR_PTR(ret);
}
+static inline int
+do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_equal(mask, cmp);
+ case OP_NE:
+ return !cpumask_equal(mask, cmp);
+ case OP_BAND:
+ return cpumask_intersects(mask, cmp);
+ default:
+ return 0;
+ }
+}
+
+/* Optimisation of do_filter_cpumask() for scalar fields */
+static inline int
+do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask)
+{
+ /*
+ * Per the weight-of-one cpumask optimisations, the mask passed in this
+ * function has a weight >= 2, so it is never equal to a single scalar.
+ */
+ switch (op) {
+ case OP_EQ:
+ return false;
+ case OP_NE:
+ return true;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
+static inline int
+do_filter_cpumask_scalar(int op, const struct cpumask *mask, unsigned int cpu)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_test_cpu(cpu, mask) &&
+ cpumask_nth(1, mask) >= nr_cpu_ids;
+ case OP_NE:
+ return !cpumask_test_cpu(cpu, mask) ||
+ cpumask_nth(1, mask) < nr_cpu_ids;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
enum pred_cmp_types {
PRED_CMP_TYPE_NOP,
PRED_CMP_TYPE_LT,
@@ -672,6 +744,18 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \
} \
}
+#define DEFINE_CPUMASK_COMPARISON_PRED(size) \
+static int filter_pred_##size##_cpumask(struct filter_pred *pred, void *event) \
+{ \
+ u##size *addr = (u##size *)(event + pred->offset); \
+ unsigned int cpu = *addr; \
+ \
+ if (cpu >= nr_cpu_ids) \
+ return 0; \
+ \
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); \
+}
+
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
@@ -693,6 +777,11 @@ DEFINE_COMPARISON_PRED(u16);
DEFINE_COMPARISON_PRED(s8);
DEFINE_COMPARISON_PRED(u8);
+DEFINE_CPUMASK_COMPARISON_PRED(64);
+DEFINE_CPUMASK_COMPARISON_PRED(32);
+DEFINE_CPUMASK_COMPARISON_PRED(16);
+DEFINE_CPUMASK_COMPARISON_PRED(8);
+
DEFINE_EQUALITY_PRED(64);
DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
@@ -750,7 +839,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
char *addr = (char *)(event + pred->offset);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex->match(addr, pred->regex, pred->regex->field_len);
match = cmp ^ pred->not;
@@ -763,7 +852,7 @@ static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
int len;
len = strlen(str) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(str, &pred->regex, len);
+ cmp = pred->regex->match(str, pred->regex, len);
match = cmp ^ pred->not;
@@ -813,7 +902,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
char *addr = (char *)(event + str_loc);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -836,7 +925,7 @@ static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
char *addr = (char *)(&item[1]) + str_loc;
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -869,12 +958,42 @@ static int filter_pred_cpu(struct filter_pred *pred, void *event)
}
}
+/* Filter predicate for current CPU vs user-provided cpumask */
+static int filter_pred_cpu_cpumask(struct filter_pred *pred, void *event)
+{
+ int cpu = raw_smp_processor_id();
+
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask);
+}
+
+/* Filter predicate for cpumask field vs user-provided cpumask */
+static int filter_pred_cpumask(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ const struct cpumask *cmp = pred->mask;
+
+ return do_filter_cpumask(pred->op, mask, cmp);
+}
+
+/* Filter predicate for cpumask field vs user-provided scalar */
+static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ unsigned int cpu = pred->val;
+
+ return do_filter_cpumask_scalar(pred->op, mask, cpu);
+}
+
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp;
- cmp = pred->regex.match(current->comm, &pred->regex,
+ cmp = pred->regex->match(current->comm, pred->regex,
TASK_COMM_LEN);
return cmp ^ pred->not;
}
@@ -1004,7 +1123,7 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
static void filter_build_regex(struct filter_pred *pred)
{
- struct regex *r = &pred->regex;
+ struct regex *r = pred->regex;
char *search;
enum regex_type type = MATCH_FULL;
@@ -1169,7 +1288,7 @@ static void free_prog(struct event_filter *filter)
return;
for (i = 0; prog[i].pred; i++)
- kfree(prog[i].pred);
+ free_predicate(prog[i].pred);
kfree(prog);
}
@@ -1236,8 +1355,12 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
int filter_assign_type(const char *type)
{
- if (strstr(type, "__data_loc") && strstr(type, "char"))
- return FILTER_DYN_STRING;
+ if (strstr(type, "__data_loc")) {
+ if (strstr(type, "char"))
+ return FILTER_DYN_STRING;
+ if (strstr(type, "cpumask_t"))
+ return FILTER_CPUMASK;
+ }
if (strstr(type, "__rel_loc") && strstr(type, "char"))
return FILTER_RDYN_STRING;
@@ -1313,24 +1436,32 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
switch (pred->fn_num) {
case FILTER_PRED_FN_64:
return filter_pred_64(pred, event);
+ case FILTER_PRED_FN_64_CPUMASK:
+ return filter_pred_64_cpumask(pred, event);
case FILTER_PRED_FN_S64:
return filter_pred_s64(pred, event);
case FILTER_PRED_FN_U64:
return filter_pred_u64(pred, event);
case FILTER_PRED_FN_32:
return filter_pred_32(pred, event);
+ case FILTER_PRED_FN_32_CPUMASK:
+ return filter_pred_32_cpumask(pred, event);
case FILTER_PRED_FN_S32:
return filter_pred_s32(pred, event);
case FILTER_PRED_FN_U32:
return filter_pred_u32(pred, event);
case FILTER_PRED_FN_16:
return filter_pred_16(pred, event);
+ case FILTER_PRED_FN_16_CPUMASK:
+ return filter_pred_16_cpumask(pred, event);
case FILTER_PRED_FN_S16:
return filter_pred_s16(pred, event);
case FILTER_PRED_FN_U16:
return filter_pred_u16(pred, event);
case FILTER_PRED_FN_8:
return filter_pred_8(pred, event);
+ case FILTER_PRED_FN_8_CPUMASK:
+ return filter_pred_8_cpumask(pred, event);
case FILTER_PRED_FN_S8:
return filter_pred_s8(pred, event);
case FILTER_PRED_FN_U8:
@@ -1349,6 +1480,12 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
+ case FILTER_PRED_FN_CPU_CPUMASK:
+ return filter_pred_cpu_cpumask(pred, event);
+ case FILTER_PRED_FN_CPUMASK:
+ return filter_pred_cpumask(pred, event);
+ case FILTER_PRED_FN_CPUMASK_CPU:
+ return filter_pred_cpumask_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
@@ -1553,9 +1690,117 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
+
+ } else if (!strncmp(str + i, "CPUS", 4)) {
+ unsigned int maskstart;
+ bool single;
+ char *tmp;
+
+ switch (field->filter_type) {
+ case FILTER_CPUMASK:
+ case FILTER_CPU:
+ case FILTER_OTHER:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ switch (op) {
+ case OP_EQ:
+ case OP_NE:
+ case OP_BAND:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ /* Skip CPUS */
+ i += 4;
+ if (str[i++] != '{') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_OPEN, pos + i);
+ goto err_free;
+ }
+ maskstart = i;
+
+ /* Walk the cpulist until closing } */
+ for (; str[i] && str[i] != '}'; i++);
+ if (str[i] != '}') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i);
+ goto err_free;
+ }
+
+ if (maskstart == i) {
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+
+ /* Copy the cpulist between { and } */
+ tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL);
+ strscpy(tmp, str + maskstart, (i - maskstart) + 1);
+
+ pred->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!pred->mask)
+ goto err_mem;
+
+ /* Now parse it */
+ if (cpulist_parse(tmp, pred->mask)) {
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+
+ /* Move along */
+ i++;
+
+ /*
+ * Optimisation: if the user-provided mask has a weight of one
+ * then we can treat it as a scalar input.
+ */
+ single = cpumask_weight(pred->mask) == 1;
+ if (single) {
+ pred->val = cpumask_first(pred->mask);
+ kfree(pred->mask);
+ }
+
+ if (field->filter_type == FILTER_CPUMASK) {
+ pred->fn_num = single ?
+ FILTER_PRED_FN_CPUMASK_CPU :
+ FILTER_PRED_FN_CPUMASK;
+ } else if (field->filter_type == FILTER_CPU) {
+ if (single) {
+ pred->op = pred->op == OP_BAND ? OP_EQ : pred->op;
+ pred->fn_num = FILTER_PRED_FN_CPU;
+ } else {
+ pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
+ }
+ } else if (single) {
+ pred->op = pred->op == OP_BAND ? OP_EQ : pred->op;
+ pred->fn_num = select_comparison_fn(pred->op, field->size, false);
+ if (pred->op == OP_NE)
+ pred->not = 1;
+ } else {
+ switch (field->size) {
+ case 8:
+ pred->fn_num = FILTER_PRED_FN_64_CPUMASK;
+ break;
+ case 4:
+ pred->fn_num = FILTER_PRED_FN_32_CPUMASK;
+ break;
+ case 2:
+ pred->fn_num = FILTER_PRED_FN_16_CPUMASK;
+ break;
+ case 1:
+ pred->fn_num = FILTER_PRED_FN_8_CPUMASK;
+ break;
+ }
+ }
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
@@ -1597,9 +1842,12 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
filter_build_regex(pred);
@@ -1608,7 +1856,7 @@ static int parse_pred(const char *str, void *data,
} else if (field->filter_type == FILTER_STATIC_STRING) {
pred->fn_num = FILTER_PRED_FN_STRING;
- pred->regex.field_len = field->size;
+ pred->regex->field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRLOC;
@@ -1691,10 +1939,10 @@ static int parse_pred(const char *str, void *data,
return i;
err_free:
- kfree(pred);
+ free_predicate(pred);
return -EINVAL;
err_mem:
- kfree(pred);
+ free_predicate(pred);
return -ENOMEM;
}
@@ -2287,8 +2535,8 @@ static int ftrace_function_set_filter_pred(struct filter_pred *pred,
return ret;
return __ftrace_function_set_filter(pred->op == OP_EQ,
- pred->regex.pattern,
- pred->regex.len,
+ pred->regex->pattern,
+ pred->regex->len,
data);
}
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 33cb6af31f39..6f046650e527 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1328,14 +1328,14 @@ static int user_field_set_string(struct ftrace_event_field *field,
static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
{
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head = &user->fields;
int pos = 0, depth = 0;
const char *str_func;
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (depth != 0)
pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
@@ -1347,7 +1347,7 @@ static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (user_field_is_dyn_string(field->type, &str_func))
pos += snprintf(buf + pos, LEN_OR_ZERO,
", %s(%s)", str_func, field->name);
@@ -1732,7 +1732,7 @@ static int user_event_create(const char *raw_command)
static int user_event_show(struct seq_file *m, struct dyn_event *ev)
{
struct user_event *user = container_of(ev, struct user_event, devent);
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head;
int depth = 0;
@@ -1740,7 +1740,7 @@ static int user_event_show(struct seq_file *m, struct dyn_event *ev)
head = trace_get_fields(&user->call);
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (depth == 0)
seq_puts(m, " ");
else
@@ -1816,13 +1816,14 @@ out:
static bool user_fields_match(struct user_event *user, int argc,
const char **argv)
{
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head = &user->fields;
int i = 0;
- list_for_each_entry_safe_reverse(field, next, head, link)
+ list_for_each_entry_reverse(field, head, link) {
if (!user_field_match(field, argc, argv, &i))
return false;
+ }
if (i != argc)
return false;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 58f3946081e2..1698fc22afa0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -51,6 +51,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __array
#define __array(type, item, size) type item[size];
+#undef __stack_array
+#define __stack_array(type, item, size, field) __array(type, item, size)
+
#undef __array_desc
#define __array_desc(type, container, item, size) type item[size];
@@ -114,6 +117,9 @@ static void __always_unused ____ftrace_check_##name(void) \
is_signed_type(_type), .filter_type = FILTER_OTHER, \
.len = _len },
+#undef __stack_array
+#define __stack_array(_type, _item, _len, _field) __array(_type, _item, _len)
+
#undef __array_desc
#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
@@ -149,6 +155,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __array
#define __array(type, item, len)
+#undef __stack_array
+#define __stack_array(type, item, len, field)
+
#undef __array_desc
#define __array_desc(type, container, item, len)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
index 285b4770efad..ff7499eb98d6 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -34,14 +34,19 @@ mips*)
esac
: "Test get argument (1)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char" > kprobe_events
+if grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1='t'" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1='t' arg2={'t','e','s','t'}" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index a4f8e7c53c1f..a202b2ea4baf 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -37,14 +37,19 @@ loongarch*)
esac
: "Test get argument (1)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+if grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace