aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/ftrace.c89
-rw-r--r--kernel/trace/rv/Kconfig78
-rw-r--r--kernel/trace/rv/Makefile8
-rw-r--r--kernel/trace/rv/monitors/wip/wip.c88
-rw-r--r--kernel/trace/rv/monitors/wip/wip.h46
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.c87
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.h46
-rw-r--r--kernel/trace/rv/reactor_panic.c43
-rw-r--r--kernel/trace/rv/reactor_printk.c42
-rw-r--r--kernel/trace/rv/rv.c799
-rw-r--r--kernel/trace/rv/rv.h68
-rw-r--r--kernel/trace/rv/rv_reactors.c510
-rw-r--r--kernel/trace/trace.c35
-rw-r--r--kernel/trace/trace.h9
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_eprobe.c128
-rw-r--r--kernel/trace/trace_event_perf.c7
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_events_hist.c5
-rw-r--r--kernel/trace/trace_events_user.c2
-rw-r--r--kernel/trace/trace_kprobe.c16
-rw-r--r--kernel/trace/trace_probe.c33
-rw-r--r--kernel/trace/trace_probe.h5
-rw-r--r--kernel/trace/trace_uprobe.c12
26 files changed, 2047 insertions, 115 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ccd6a5ade3e9..1052126bdca2 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1106,4 +1106,6 @@ config HIST_TRIGGERS_DEBUG
If unsure, say N.
+source "kernel/trace/rv/Kconfig"
+
endif # FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 0d261774d6f3..c6651e16b557 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -106,5 +106,6 @@ obj-$(CONFIG_FPROBE) += fprobe.o
obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
+obj-$(CONFIG_RV) += rv/
libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bc921a3f7ea8..439e2ab6905e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,8 +1861,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
ftrace_hash_rec_update_modify(ops, filter_hash, 1);
}
-static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
-
/*
* Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
* or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -2974,6 +2972,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
ftrace_startup_enable(command);
+ /*
+ * If ftrace is in an undefined state, we just remove ops from list
+ * to prevent the NULL pointer, instead of totally rolling it back and
+ * free trampoline, because those actions could cause further damage.
+ */
+ if (unlikely(ftrace_disabled)) {
+ __unregister_ftrace_function(ops);
+ return -ENODEV;
+ }
+
ops->flags &= ~FTRACE_OPS_FL_ADDING;
return 0;
@@ -3108,49 +3116,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
ftrace_hash_empty(ops->func_hash->notrace_hash);
}
-/*
- * Check if the current ops references the given ip.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
-{
- /* If ops isn't enabled, ignore it */
- if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
- return false;
-
- /* If ops traces all then it includes this function */
- if (ops_traces_mod(ops))
- return true;
-
- /* The function must be in the filter */
- if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
- !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
- return false;
-
- /* If in notrace hash, we ignore it too */
- if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
- return false;
-
- return true;
-}
-
-/*
- * Check if the current ops references the record.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
-{
- return ops_references_ip(ops, rec->ip);
-}
-
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
bool init_nop = ftrace_need_init_nop();
@@ -6812,6 +6777,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum,
return -ERANGE;
}
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES)
+/*
+ * Check if the current ops references the given ip.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
+{
+ /* If ops isn't enabled, ignore it */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return false;
+
+ /* If ops traces all then it includes this function */
+ if (ops_traces_mod(ops))
+ return true;
+
+ /* The function must be in the filter */
+ if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
+ !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
+ return false;
+
+ /* If in notrace hash, we ignore it too */
+ if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
+ return false;
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@@ -6824,7 +6821,7 @@ static int referenced_filters(struct dyn_ftrace *rec)
int cnt = 0;
for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
- if (ops_references_rec(ops, rec)) {
+ if (ops_references_ip(ops, rec->ip)) {
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT))
continue;
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY))
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
new file mode 100644
index 000000000000..831779607e84
--- /dev/null
+++ b/kernel/trace/rv/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config DA_MON_EVENTS
+ bool
+
+config DA_MON_EVENTS_IMPLICIT
+ select DA_MON_EVENTS
+ bool
+
+config DA_MON_EVENTS_ID
+ select DA_MON_EVENTS
+ bool
+
+menuconfig RV
+ bool "Runtime Verification"
+ depends on TRACING
+ help
+ Enable the kernel runtime verification infrastructure. RV is a
+ lightweight (yet rigorous) method that complements classical
+ exhaustive verification techniques (such as model checking and
+ theorem proving). RV works by analyzing the trace of the system's
+ actual execution, comparing it against a formal specification of
+ the system behavior.
+
+ For further information, see:
+ Documentation/trace/rv/runtime-verification.rst
+
+config RV_MON_WIP
+ depends on RV
+ depends on PREEMPT_TRACER
+ select DA_MON_EVENTS_IMPLICIT
+ bool "wip monitor"
+ help
+ Enable wip (wakeup in preemptive) sample monitor that illustrates
+ the usage of per-cpu monitors, and one limitation of the
+ preempt_disable/enable events.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_wip.rst
+
+config RV_MON_WWNR
+ depends on RV
+ select DA_MON_EVENTS_ID
+ bool "wwnr monitor"
+ help
+ Enable wwnr (wakeup while not running) sample monitor, this is a
+ sample monitor that illustrates the usage of per-task monitor.
+ The model is borken on purpose: it serves to test reactors.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_wwnr.rst
+
+config RV_REACTORS
+ bool "Runtime verification reactors"
+ default y
+ depends on RV
+ help
+ Enables the online runtime verification reactors. A runtime
+ monitor can cause a reaction to the detection of an exception
+ on the model's execution. By default, the monitors have
+ tracing reactions, printing the monitor output via tracepoints,
+ but other reactions can be added (on-demand) via this interface.
+
+config RV_REACT_PRINTK
+ bool "Printk reactor"
+ depends on RV_REACTORS
+ default y
+ help
+ Enables the printk reactor. The printk reactor emits a printk()
+ message if an exception is found.
+
+config RV_REACT_PANIC
+ bool "Panic reactor"
+ depends on RV_REACTORS
+ default y
+ help
+ Enables the panic reactor. The panic reactor emits a printk()
+ message if an exception is found and panic()s the system.
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
new file mode 100644
index 000000000000..963d14875b45
--- /dev/null
+++ b/kernel/trace/rv/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_RV) += rv.o
+obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o
+obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o
+obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
+obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
+obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o
diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c
new file mode 100644
index 000000000000..83cace53b9fa
--- /dev/null
+++ b/kernel/trace/rv/monitors/wip/wip.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "wip"
+
+#include <trace/events/rv.h>
+#include <trace/events/sched.h>
+#include <trace/events/preemptirq.h>
+
+#include "wip.h"
+
+struct rv_monitor rv_wip;
+DECLARE_DA_MON_PER_CPU(wip, unsigned char);
+
+static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_wip(preempt_disable_wip);
+}
+
+static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_start_event_wip(preempt_enable_wip);
+}
+
+static void handle_sched_waking(void *data, struct task_struct *task)
+{
+ da_handle_event_wip(sched_waking_wip);
+}
+
+static int enable_wip(void)
+{
+ int retval;
+
+ retval = da_monitor_init_wip();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("wip", preempt_enable, handle_preempt_enable);
+ rv_attach_trace_probe("wip", sched_waking, handle_sched_waking);
+ rv_attach_trace_probe("wip", preempt_disable, handle_preempt_disable);
+
+ return 0;
+}
+
+static void disable_wip(void)
+{
+ rv_wip.enabled = 0;
+
+ rv_detach_trace_probe("wip", preempt_disable, handle_preempt_disable);
+ rv_detach_trace_probe("wip", preempt_enable, handle_preempt_enable);
+ rv_detach_trace_probe("wip", sched_waking, handle_sched_waking);
+
+ da_monitor_destroy_wip();
+}
+
+struct rv_monitor rv_wip = {
+ .name = "wip",
+ .description = "wakeup in preemptive per-cpu testing monitor.",
+ .enable = enable_wip,
+ .disable = disable_wip,
+ .reset = da_monitor_reset_all_wip,
+ .enabled = 0,
+};
+
+static int register_wip(void)
+{
+ rv_register_monitor(&rv_wip);
+ return 0;
+}
+
+static void unregister_wip(void)
+{
+ rv_unregister_monitor(&rv_wip);
+}
+
+module_init(register_wip);
+module_exit(unregister_wip);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Bristot de Oliveira <bristot@kernel.org>");
+MODULE_DESCRIPTION("wip: wakeup in preemptive - per-cpu sample monitor.");
diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h
new file mode 100644
index 000000000000..c1c47e2305ef
--- /dev/null
+++ b/kernel/trace/rv/monitors/wip/wip.h
@@ -0,0 +1,46 @@
+/*
+ * Automatically generated C representation of wip automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_wip {
+ preemptive_wip = 0,
+ non_preemptive_wip,
+ state_max_wip
+};
+
+#define INVALID_STATE state_max_wip
+
+enum events_wip {
+ preempt_disable_wip = 0,
+ preempt_enable_wip,
+ sched_waking_wip,
+ event_max_wip
+};
+
+struct automaton_wip {
+ char *state_names[state_max_wip];
+ char *event_names[event_max_wip];
+ unsigned char function[state_max_wip][event_max_wip];
+ unsigned char initial_state;
+ bool final_states[state_max_wip];
+};
+
+struct automaton_wip automaton_wip = {
+ .state_names = {
+ "preemptive",
+ "non_preemptive"
+ },
+ .event_names = {
+ "preempt_disable",
+ "preempt_enable",
+ "sched_waking"
+ },
+ .function = {
+ { non_preemptive_wip, INVALID_STATE, INVALID_STATE },
+ { INVALID_STATE, preemptive_wip, non_preemptive_wip },
+ },
+ .initial_state = preemptive_wip,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c
new file mode 100644
index 000000000000..599225d9cf38
--- /dev/null
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "wwnr"
+
+#include <trace/events/rv.h>
+#include <trace/events/sched.h>
+
+#include "wwnr.h"
+
+struct rv_monitor rv_wwnr;
+DECLARE_DA_MON_PER_TASK(wwnr, unsigned char);
+
+static void handle_switch(void *data, bool preempt, struct task_struct *p,
+ struct task_struct *n, unsigned int prev_state)
+{
+ /* start monitoring only after the first suspension */
+ if (prev_state == TASK_INTERRUPTIBLE)
+ da_handle_start_event_wwnr(p, switch_out_wwnr);
+ else
+ da_handle_event_wwnr(p, switch_out_wwnr);
+
+ da_handle_event_wwnr(n, switch_in_wwnr);
+}
+
+static void handle_wakeup(void *data, struct task_struct *p)
+{
+ da_handle_event_wwnr(p, wakeup_wwnr);
+}
+
+static int enable_wwnr(void)
+{
+ int retval;
+
+ retval = da_monitor_init_wwnr();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("wwnr", sched_switch, handle_switch);
+ rv_attach_trace_probe("wwnr", sched_wakeup, handle_wakeup);
+
+ return 0;
+}
+
+static void disable_wwnr(void)
+{
+ rv_wwnr.enabled = 0;
+
+ rv_detach_trace_probe("wwnr", sched_switch, handle_switch);
+ rv_detach_trace_probe("wwnr", sched_wakeup, handle_wakeup);
+
+ da_monitor_destroy_wwnr();
+}
+
+struct rv_monitor rv_wwnr = {
+ .name = "wwnr",
+ .description = "wakeup while not running per-task testing model.",
+ .enable = enable_wwnr,
+ .disable = disable_wwnr,
+ .reset = da_monitor_reset_all_wwnr,
+ .enabled = 0,
+};
+
+static int register_wwnr(void)
+{
+ rv_register_monitor(&rv_wwnr);
+ return 0;
+}
+
+static void unregister_wwnr(void)
+{
+ rv_unregister_monitor(&rv_wwnr);
+}
+
+module_init(register_wwnr);
+module_exit(unregister_wwnr);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Bristot de Oliveira <bristot@kernel.org>");
+MODULE_DESCRIPTION("wwnr: wakeup while not running monitor");
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h
new file mode 100644
index 000000000000..d1afe55cdd4c
--- /dev/null
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.h
@@ -0,0 +1,46 @@
+/*
+ * Automatically generated C representation of wwnr automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_wwnr {
+ not_running_wwnr = 0,
+ running_wwnr,
+ state_max_wwnr
+};
+
+#define INVALID_STATE state_max_wwnr
+
+enum events_wwnr {
+ switch_in_wwnr = 0,
+ switch_out_wwnr,
+ wakeup_wwnr,
+ event_max_wwnr
+};
+
+struct automaton_wwnr {
+ char *state_names[state_max_wwnr];
+ char *event_names[event_max_wwnr];
+ unsigned char function[state_max_wwnr][event_max_wwnr];
+ unsigned char initial_state;
+ bool final_states[state_max_wwnr];
+};
+
+struct automaton_wwnr automaton_wwnr = {
+ .state_names = {
+ "not_running",
+ "running"
+ },
+ .event_names = {
+ "switch_in",
+ "switch_out",
+ "wakeup"
+ },
+ .function = {
+ { running_wwnr, INVALID_STATE, not_running_wwnr },
+ { INVALID_STATE, not_running_wwnr, INVALID_STATE },
+ },
+ .initial_state = not_running_wwnr,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c
new file mode 100644
index 000000000000..b698d05dd069
--- /dev/null
+++ b/kernel/trace/rv/reactor_panic.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+ *
+ * Panic RV reactor:
+ * Prints the exception msg to the kernel message log and panic().
+ */
+
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+
+static void rv_panic_reaction(char *msg)
+{
+ panic(msg);
+}
+
+static struct rv_reactor rv_panic = {
+ .name = "panic",
+ .description = "panic the system if an exception is found.",
+ .react = rv_panic_reaction
+};
+
+static int register_react_panic(void)
+{
+ rv_register_reactor(&rv_panic);
+ return 0;
+}
+
+static void unregister_react_panic(void)
+{
+ rv_unregister_reactor(&rv_panic);
+}
+
+module_init(register_react_panic);
+module_exit(unregister_react_panic);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Bristot de Oliveira");
+MODULE_DESCRIPTION("panic rv reactor: panic if an exception is found.");
diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c
new file mode 100644
index 000000000000..31899f953af4
--- /dev/null
+++ b/kernel/trace/rv/reactor_printk.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+ *
+ * Printk RV reactor:
+ * Prints the exception msg to the kernel message log.
+ */
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+
+static void rv_printk_reaction(char *msg)
+{
+ printk_deferred(msg);
+}
+
+static struct rv_reactor rv_printk = {
+ .name = "printk",
+ .description = "prints the exception msg to the kernel message log.",
+ .react = rv_printk_reaction
+};
+
+static int register_react_printk(void)
+{
+ rv_register_reactor(&rv_printk);
+ return 0;
+}
+
+static void unregister_react_printk(void)
+{
+ rv_unregister_reactor(&rv_printk);
+}
+
+module_init(register_react_printk);
+module_exit(unregister_react_printk);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Bristot de Oliveira");
+MODULE_DESCRIPTION("printk rv reactor: printk if an exception is hit.");
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
new file mode 100644
index 000000000000..6c97cc2d754a
--- /dev/null
+++ b/kernel/trace/rv/rv.c
@@ -0,0 +1,799 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+ *
+ * This is the online Runtime Verification (RV) interface.
+ *
+ * RV is a lightweight (yet rigorous) method that complements classical
+ * exhaustive verification techniques (such as model checking and
+ * theorem proving) with a more practical approach to complex systems.
+ *
+ * RV works by analyzing the trace of the system's actual execution,
+ * comparing it against a formal specification of the system behavior.
+ * RV can give precise information on the runtime behavior of the
+ * monitored system while enabling the reaction for unexpected
+ * events, avoiding, for example, the propagation of a failure on
+ * safety-critical systems.
+ *
+ * The development of this interface roots in the development of the
+ * paper:
+ *
+ * De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo
+ * Silva. Efficient formal verification for the Linux kernel. In:
+ * International Conference on Software Engineering and Formal Methods.
+ * Springer, Cham, 2019. p. 315-332.
+ *
+ * And:
+ *
+ * De Oliveira, Daniel Bristot, et al. Automata-based formal analysis
+ * and verification of the real-time Linux kernel. PhD Thesis, 2020.
+ *
+ * == Runtime monitor interface ==
+ *
+ * A monitor is the central part of the runtime verification of a system.
+ *
+ * The monitor stands in between the formal specification of the desired
+ * (or undesired) behavior, and the trace of the actual system.
+ *
+ * In Linux terms, the runtime verification monitors are encapsulated
+ * inside the "RV monitor" abstraction. A RV monitor includes a reference
+ * model of the system, a set of instances of the monitor (per-cpu monitor,
+ * per-task monitor, and so on), and the helper functions that glue the
+ * monitor to the system via trace. Generally, a monitor includes some form
+ * of trace output as a reaction for event parsing and exceptions,
+ * as depicted bellow:
+ *
+ * Linux +----- RV Monitor ----------------------------------+ Formal
+ * Realm | | Realm
+ * +-------------------+ +----------------+ +-----------------+
+ * | Linux kernel | | Monitor | | Reference |
+ * | Tracing | -> | Instance(s) | <- | Model |
+ * | (instrumentation) | | (verification) | | (specification) |
+ * +-------------------+ +----------------+ +-----------------+
+ * | | |
+ * | V |
+ * | +----------+ |
+ * | | Reaction | |
+ * | +--+--+--+-+ |
+ * | | | | |
+ * | | | +-> trace output ? |
+ * +------------------------|--|----------------------+
+ * | +----> panic ?
+ * +-------> <user-specified>
+ *
+ * This file implements the interface for loading RV monitors, and
+ * to control the verification session.
+ *
+ * == Registering monitors ==
+ *
+ * The struct rv_monitor defines a set of callback functions to control
+ * a verification session. For instance, when a given monitor is enabled,
+ * the "enable" callback function is called to hook the instrumentation
+ * functions to the kernel trace events. The "disable" function is called
+ * when disabling the verification session.
+ *
+ * A RV monitor is registered via:
+ * int rv_register_monitor(struct rv_monitor *monitor);
+ * And unregistered via:
+ * int rv_unregister_monitor(struct rv_monitor *monitor);
+ *
+ * == User interface ==
+ *
+ * The user interface resembles kernel tracing interface. It presents
+ * these files:
+ *
+ * "available_monitors"
+ * - List the available monitors, one per line.
+ *
+ * For example:
+ * # cat available_monitors
+ * wip
+ * wwnr
+ *
+ * "enabled_monitors"
+ * - Lists the enabled monitors, one per line;
+ * - Writing to it enables a given monitor;
+ * - Writing a monitor name with a '!' prefix disables it;
+ * - Truncating the file disables all enabled monitors.
+ *
+ * For example:
+ * # cat enabled_monitors
+ * # echo wip > enabled_monitors
+ * # echo wwnr >> enabled_monitors
+ * # cat enabled_monitors
+ * wip
+ * wwnr
+ * # echo '!wip' >> enabled_monitors
+ * # cat enabled_monitors
+ * wwnr
+ * # echo > enabled_monitors
+ * # cat enabled_monitors
+ * #
+ *
+ * Note that more than one monitor can be enabled concurrently.
+ *
+ * "monitoring_on"
+ * - It is an on/off general switcher for monitoring. Note
+ * that it does not disable enabled monitors or detach events,
+ * but stops the per-entity monitors from monitoring the events
+ * received from the instrumentation. It resembles the "tracing_on"
+ * switcher.
+ *
+ * "monitors/"
+ * Each monitor will have its own directory inside "monitors/". There
+ * the monitor specific files will be presented.
+ * The "monitors/" directory resembles the "events" directory on
+ * tracefs.
+ *
+ * For example:
+ * # cd monitors/wip/
+ * # ls
+ * desc enable
+ * # cat desc
+ * auto-generated wakeup in preemptive monitor.
+ * # cat enable
+ * 0
+ *
+ * For further information, see:
+ * Documentation/trace/rv/runtime-verification.rst
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_DA_MON_EVENTS
+#define CREATE_TRACE_POINTS
+#include <trace/events/rv.h>
+#endif
+
+#include "rv.h"
+
+DEFINE_MUTEX(rv_interface_lock);
+
+static struct rv_interface rv_root;
+
+struct dentry *get_monitors_root(void)
+{
+ return rv_root.monitors_dir;
+}
+
+/*
+ * Interface for the monitor register.
+ */
+static LIST_HEAD(rv_monitors_list);
+
+static int task_monitor_count;
+static bool task_monitor_slots[RV_PER_TASK_MONITORS];
+
+int rv_get_task_monitor_slot(void)
+{
+ int i;
+
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (task_monitor_count == RV_PER_TASK_MONITORS)
+ return -EBUSY;
+
+ task_monitor_count++;
+
+ for (i = 0; i < RV_PER_TASK_MONITORS; i++) {
+ if (task_monitor_slots[i] == false) {
+ task_monitor_slots[i] = true;
+ return i;
+ }
+ }
+
+ WARN_ONCE(1, "RV task_monitor_count and slots are out of sync\n");
+
+ return -EINVAL;
+}
+
+void rv_put_task_monitor_slot(int slot)
+{
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (slot < 0 || slot >= RV_PER_TASK_MONITORS) {
+ WARN_ONCE(1, "RV releasing an invalid slot!: %d\n", slot);
+ return;
+ }
+
+ WARN_ONCE(!task_monitor_slots[slot], "RV releasing unused task_monitor_slots: %d\n",
+ slot);
+
+ task_monitor_count--;
+ task_monitor_slots[slot] = false;
+}
+
+/*
+ * This section collects the monitor/ files and folders.
+ */
+static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count,
+ loff_t *ppos)
+{
+ struct rv_monitor_def *mdef = filp->private_data;
+ const char *buff;
+
+ buff = mdef->monitor->enabled ? "1\n" : "0\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1);
+}
+
+/*
+ * __rv_disable_monitor - disabled an enabled monitor
+ */
+static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
+{
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (mdef->monitor->enabled) {
+ mdef->monitor->enabled = 0;
+ mdef->monitor->disable();
+
+ /*
+ * Wait for the execution of all events to finish.
+ * Otherwise, the data used by the monitor could
+ * be inconsistent. i.e., if the monitor is re-enabled.
+ */
+ if (sync)
+ tracepoint_synchronize_unregister();
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * rv_disable_monitor - disable a given runtime monitor
+ *
+ * Returns 0 on success.
+ */
+int rv_disable_monitor(struct rv_monitor_def *mdef)
+{
+ __rv_disable_monitor(mdef, true);
+ return 0;
+}
+
+/**
+ * rv_enable_monitor - enable a given runtime monitor
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int rv_enable_monitor(struct rv_monitor_def *mdef)
+{
+ int retval;
+
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (mdef->monitor->enabled)
+ return 0;
+
+ retval = mdef->monitor->enable();
+
+ if (!retval)
+ mdef->monitor->enabled = 1;
+
+ return retval;
+}
+
+/*
+ * interface for enabling/disabling a monitor.
+ */
+static ssize_t monitor_enable_write_data(struct file *filp, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct rv_monitor_def *mdef = filp->private_data;
+ int retval;
+ bool val;
+
+ retval = kstrtobool_from_user(user_buf, count, &val);
+ if (retval)
+ return retval;
+
+ retval = count;
+
+ mutex_lock(&rv_interface_lock);
+
+ if (val)
+ retval = rv_enable_monitor(mdef);
+ else
+ retval = rv_disable_monitor(mdef);
+
+ mutex_unlock(&rv_interface_lock);
+
+ return retval ? : count;
+}
+
+static const struct file_operations interface_enable_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = monitor_enable_write_data,
+ .read = monitor_enable_read_data,
+};
+
+/*
+ * Interface to read monitors description.
+ */
+static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf, size_t count,
+ loff_t *ppos)
+{
+ struct rv_monitor_def *mdef = filp->private_data;
+ char buff[256];
+
+ memset(buff, 0, sizeof(buff));
+
+ snprintf(buff, sizeof(buff), "%s\n", mdef->monitor->description);
+
+ return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1);
+}
+
+static const struct file_operations interface_desc_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .read = monitor_desc_read_data,
+};
+
+/*
+ * During the registration of a monitor, this function creates
+ * the monitor dir, where the specific options of the monitor
+ * are exposed.
+ */
+static int create_monitor_dir(struct rv_monitor_def *mdef)
+{
+ struct dentry *root = get_monitors_root();
+ const char *name = mdef->monitor->name;
+ struct dentry *tmp;
+ int retval;
+
+ mdef->root_d = rv_create_dir(name, root);
+ if (!mdef->root_d)
+ return -ENOMEM;
+
+ tmp = rv_create_file("enable", RV_MODE_WRITE, mdef->root_d, mdef, &interface_enable_fops);
+ if (!tmp) {
+ retval = -ENOMEM;
+ goto out_remove_root;
+ }
+
+ tmp = rv_create_file("desc", RV_MODE_READ, mdef->root_d, mdef, &interface_desc_fops);
+ if (!tmp) {
+ retval = -ENOMEM;
+ goto out_remove_root;
+ }
+
+ retval = reactor_populate_monitor(mdef);
+ if (retval)
+ goto out_remove_root;
+
+ return 0;
+
+out_remove_root:
+ rv_remove(mdef->root_d);
+ return retval;
+}
+
+/*
+ * Available/Enable monitor shared seq functions.
+ */
+static int monitors_show(struct seq_file *m, void *p)
+{
+ struct rv_monitor_def *mon_def = p;
+
+ seq_printf(m, "%s\n", mon_def->monitor->name);
+ return 0;
+}
+
+/*
+ * Used by the seq file operations at the end of a read
+ * operation.
+ */
+static void monitors_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&rv_interface_lock);
+}
+
+/*
+ * Available monitor seq functions.
+ */
+static void *available_monitors_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&rv_interface_lock);
+ return seq_list_start(&rv_monitors_list, *pos);
+}
+
+static void *available_monitors_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &rv_monitors_list, pos);
+}
+
+/*
+ * Enable monitor seq functions.
+ */
+static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct rv_monitor_def *m_def = p;
+
+ (*pos)++;
+
+ list_for_each_entry_continue(m_def, &rv_monitors_list, list) {
+ if (m_def->monitor->enabled)
+ return m_def;
+ }
+
+ return NULL;
+}
+
+static void *enabled_monitors_start(struct seq_file *m, loff_t *pos)
+{
+ struct rv_monitor_def *m_def;
+ loff_t l;
+
+ mutex_lock(&rv_interface_lock);
+
+ if (list_empty(&rv_monitors_list))
+ return NULL;
+
+ m_def = list_entry(&rv_monitors_list, struct rv_monitor_def, list);
+
+ for (l = 0; l <= *pos; ) {
+ m_def = enabled_monitors_next(m, m_def, &l);
+ if (!m_def)
+ break;
+ }
+
+ return m_def;
+}
+
+/*
+ * available/enabled monitors seq definition.
+ */
+static const struct seq_operations available_monitors_seq_ops = {
+ .start = available_monitors_start,
+ .next = available_monitors_next,
+ .stop = monitors_stop,
+ .show = monitors_show
+};
+
+static const struct seq_operations enabled_monitors_seq_ops = {
+ .start = enabled_monitors_start,
+ .next = enabled_monitors_next,
+ .stop = monitors_stop,
+ .show = monitors_show
+};
+
+/*
+ * available_monitors interface.
+ */
+static int available_monitors_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &available_monitors_seq_ops);
+};
+
+static const struct file_operations available_monitors_ops = {
+ .open = available_monitors_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/*
+ * enabled_monitors interface.
+ */
+static void disable_all_monitors(void)
+{
+ struct rv_monitor_def *mdef;
+ int enabled = 0;
+
+ mutex_lock(&rv_interface_lock);
+
+ list_for_each_entry(mdef, &rv_monitors_list, list)
+ enabled += __rv_disable_monitor(mdef, false);
+
+ if (enabled) {
+ /*
+ * Wait for the execution of all events to finish.
+ * Otherwise, the data used by the monitor could
+ * be inconsistent. i.e., if the monitor is re-enabled.
+ */
+ tracepoint_synchronize_unregister();
+ }
+
+ mutex_unlock(&rv_interface_lock);
+}
+
+static int enabled_monitors_open(struct inode *inode, struct file *file)
+{
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
+ disable_all_monitors();
+
+ return seq_open(file, &enabled_monitors_seq_ops);
+};
+
+static ssize_t enabled_monitors_write(struct file *filp, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buff[MAX_RV_MONITOR_NAME_SIZE + 2];
+ struct rv_monitor_def *mdef;
+ int retval = -EINVAL;
+ bool enable = true;
+ char *ptr = buff;
+ int len;
+
+ if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1)
+ return -EINVAL;
+
+ memset(buff, 0, sizeof(buff));
+
+ retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count);
+ if (retval < 0)
+ return -EFAULT;
+
+ ptr = strim(buff);
+
+ if (ptr[0] == '!') {
+ enable = false;
+ ptr++;
+ }
+
+ len = strlen(ptr);
+ if (!len)
+ return count;
+
+ mutex_lock(&rv_interface_lock);
+
+ retval = -EINVAL;
+
+ list_for_each_entry(mdef, &rv_monitors_list, list) {
+ if (strcmp(ptr, mdef->monitor->name) != 0)
+ continue;
+
+ /*
+ * Monitor found!
+ */
+ if (enable)
+ retval = rv_enable_monitor(mdef);
+ else
+ retval = rv_disable_monitor(mdef);
+
+ if (!retval)
+ retval = count;
+
+ break;
+ }
+
+ mutex_unlock(&rv_interface_lock);
+ return retval;
+}
+
+static const struct file_operations enabled_monitors_ops = {
+ .open = enabled_monitors_open,
+ .read = seq_read,
+ .write = enabled_monitors_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * Monitoring on global switcher!
+ */
+static bool __read_mostly monitoring_on;
+
+/**
+ * rv_monitoring_on - checks if monitoring is on
+ *
+ * Returns 1 if on, 0 otherwise.
+ */
+bool rv_monitoring_on(void)
+{
+ /* Ensures that concurrent monitors read consistent monitoring_on */
+ smp_rmb();
+ return READ_ONCE(monitoring_on);
+}
+
+/*
+ * monitoring_on general switcher.
+ */
+static ssize_t monitoring_on_read_data(struct file *filp, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ const char *buff;
+
+ buff = rv_monitoring_on() ? "1\n" : "0\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1);
+}
+
+static void turn_monitoring_off(void)
+{
+ WRITE_ONCE(monitoring_on, false);
+ /* Ensures that concurrent monitors read consistent monitoring_on */
+ smp_wmb();
+}
+
+static void reset_all_monitors(void)
+{
+ struct rv_monitor_def *mdef;
+
+ list_for_each_entry(mdef, &rv_monitors_list, list) {
+ if (mdef->monitor->enabled)
+ mdef->monitor->reset();
+ }
+}
+
+static void turn_monitoring_on(void)
+{
+ WRITE_ONCE(monitoring_on, true);
+ /* Ensures that concurrent monitors read consistent monitoring_on */
+ smp_wmb();
+}
+
+static void turn_monitoring_on_with_reset(void)
+{
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (rv_monitoring_on())
+ return;
+
+ /*
+ * Monitors might be out of sync with the system if events were not
+ * processed because of !rv_monitoring_on().
+ *
+ * Reset all monitors, forcing a re-sync.
+ */
+ reset_all_monitors();
+ turn_monitoring_on();
+}
+
+static ssize_t monitoring_on_write_data(struct file *filp, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int retval;
+ bool val;
+
+ retval = kstrtobool_from_user(user_buf, count, &val);
+ if (retval)
+ return retval;
+
+ mutex_lock(&rv_interface_lock);
+
+ if (val)
+ turn_monitoring_on_with_reset();
+ else
+ turn_monitoring_off();
+
+ /*
+ * Wait for the execution of all events to finish
+ * before returning to user-space.
+ */
+ tracepoint_synchronize_unregister();
+
+ mutex_unlock(&rv_interface_lock);
+
+ return count;
+}
+
+static const struct file_operations monitoring_on_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = monitoring_on_write_data,
+ .read = monitoring_on_read_data,
+};
+
+static void destroy_monitor_dir(struct rv_monitor_def *mdef)
+{
+ reactor_cleanup_monitor(mdef);
+ rv_remove(mdef->root_d);
+}
+
+/**
+ * rv_register_monitor - register a rv monitor.
+ * @monitor: The rv_monitor to be registered.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int rv_register_monitor(struct rv_monitor *monitor)
+{
+ struct rv_monitor_def *r;
+ int retval = 0;
+
+ if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) {
+ pr_info("Monitor %s has a name longer than %d\n", monitor->name,
+ MAX_RV_MONITOR_NAME_SIZE);
+ return -1;
+ }
+
+ mutex_lock(&rv_interface_lock);
+
+ list_for_each_entry(r, &rv_monitors_list, list) {
+ if (strcmp(monitor->name, r->monitor->name) == 0) {
+ pr_info("Monitor %s is already registered\n", monitor->name);
+ retval = -1;
+ goto out_unlock;
+ }
+ }
+
+ r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL);
+ if (!r) {
+ retval = -ENOMEM;
+ goto out_unlock;
+ }
+
+ r->monitor = monitor;
+
+ retval = create_monitor_dir(r);
+ if (retval) {
+ kfree(r);
+ goto out_unlock;
+ }
+
+ list_add_tail(&r->list, &rv_monitors_list);
+
+out_unlock:
+ mutex_unlock(&rv_interface_lock);
+ return retval;
+}
+
+/**
+ * rv_unregister_monitor - unregister a rv monitor.
+ * @monitor: The rv_monitor to be unregistered.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int rv_unregister_monitor(struct rv_monitor *monitor)
+{
+ struct rv_monitor_def *ptr, *next;
+
+ mutex_lock(&rv_interface_lock);
+
+ list_for_each_entry_safe(ptr, next, &rv_monitors_list, list) {
+ if (strcmp(monitor->name, ptr->monitor->name) == 0) {
+ rv_disable_monitor(ptr);
+ list_del(&ptr->list);
+ destroy_monitor_dir(ptr);
+ }
+ }
+
+ mutex_unlock(&rv_interface_lock);
+ return 0;
+}
+
+int __init rv_init_interface(void)
+{
+ struct dentry *tmp;
+ int retval;
+
+ rv_root.root_dir = rv_create_dir("rv", NULL);
+ if (!rv_root.root_dir)
+ goto out_err;
+
+ rv_root.monitors_dir = rv_create_dir("monitors", rv_root.root_dir);
+ if (!rv_root.monitors_dir)
+ goto out_err;
+
+ tmp = rv_create_file("available_monitors", RV_MODE_READ, rv_root.root_dir, NULL,
+ &available_monitors_ops);
+ if (!tmp)
+ goto out_err;
+
+ tmp = rv_create_file("enabled_monitors", RV_MODE_WRITE, rv_root.root_dir, NULL,
+ &enabled_monitors_ops);
+ if (!tmp)
+ goto out_err;
+
+ tmp = rv_create_file("monitoring_on", RV_MODE_WRITE, rv_root.root_dir, NULL,
+ &monitoring_on_fops);
+ if (!tmp)
+ goto out_err;
+ retval = init_rv_reactors(rv_root.root_dir);
+ if (retval)
+ goto out_err;
+
+ turn_monitoring_on();
+
+ return 0;
+
+out_err:
+ rv_remove(rv_root.root_dir);
+ printk(KERN_ERR "RV: Error while creating the RV interface\n");
+ return 1;
+}
diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h
new file mode 100644
index 000000000000..db6cb0913dbd
--- /dev/null
+++ b/kernel/trace/rv/rv.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/mutex.h>
+
+struct rv_interface {
+ struct dentry *root_dir;
+ struct dentry *monitors_dir;
+};
+
+#include "../trace.h"
+#include <linux/tracefs.h>
+#include <linux/rv.h>
+
+#define RV_MODE_WRITE TRACE_MODE_WRITE
+#define RV_MODE_READ TRACE_MODE_READ
+
+#define rv_create_dir tracefs_create_dir
+#define rv_create_file tracefs_create_file
+#define rv_remove tracefs_remove
+
+#define MAX_RV_MONITOR_NAME_SIZE 32
+#define MAX_RV_REACTOR_NAME_SIZE 32
+
+extern struct mutex rv_interface_lock;
+
+#ifdef CONFIG_RV_REACTORS
+struct rv_reactor_def {
+ struct list_head list;
+ struct rv_reactor *reactor;
+ /* protected by the monitor interface lock */
+ int counter;
+};
+#endif
+
+struct rv_monitor_def {
+ struct list_head list;
+ struct rv_monitor *monitor;
+ struct dentry *root_d;
+#ifdef CONFIG_RV_REACTORS
+ struct rv_reactor_def *rdef;
+ bool reacting;
+#endif
+ bool task_monitor;
+};
+
+struct dentry *get_monitors_root(void);
+int rv_disable_monitor(struct rv_monitor_def *mdef);
+int rv_enable_monitor(struct rv_monitor_def *mdef);
+
+#ifdef CONFIG_RV_REACTORS
+int reactor_populate_monitor(struct rv_monitor_def *mdef);
+void reactor_cleanup_monitor(struct rv_monitor_def *mdef);
+int init_rv_reactors(struct dentry *root_dir);
+#else
+static inline int reactor_populate_monitor(struct rv_monitor_def *mdef)
+{
+ return 0;
+}
+
+static inline void reactor_cleanup_monitor(struct rv_monitor_def *mdef)
+{
+ return;
+}
+
+static inline int init_rv_reactors(struct dentry *root_dir)
+{
+ return 0;
+}
+#endif
diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c
new file mode 100644
index 000000000000..6aae106695b6
--- /dev/null
+++ b/kernel/trace/rv/rv_reactors.c
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+ *
+ * Runtime reactor interface.
+ *
+ * A runtime monitor can cause a reaction to the detection of an
+ * exception on the model's execution. By default, the monitors have
+ * tracing reactions, printing the monitor output via tracepoints.
+ * But other reactions can be added (on-demand) via this interface.
+ *
+ * == Registering reactors ==
+ *
+ * The struct rv_reactor defines a callback function to be executed
+ * in case of a model exception happens. The callback function
+ * receives a message to be (optionally) printed before executing
+ * the reaction.
+ *
+ * A RV reactor is registered via:
+ * int rv_register_reactor(struct rv_reactor *reactor)
+ * And unregistered via:
+ * int rv_unregister_reactor(struct rv_reactor *reactor)
+ *
+ * These functions are exported to modules, enabling reactors to be
+ * dynamically loaded.
+ *
+ * == User interface ==
+ *
+ * The user interface resembles the kernel tracing interface and
+ * presents these files:
+ *
+ * "available_reactors"
+ * - List the available reactors, one per line.
+ *
+ * For example:
+ * # cat available_reactors
+ * nop
+ * panic
+ * printk
+ *
+ * "reacting_on"
+ * - It is an on/off general switch for reactors, disabling
+ * all reactions.
+ *
+ * "monitors/MONITOR/reactors"
+ * - List available reactors, with the select reaction for the given
+ * MONITOR inside []. The default one is the nop (no operation)
+ * reactor.
+ * - Writing the name of an reactor enables it to the given
+ * MONITOR.
+ *
+ * For example:
+ * # cat monitors/wip/reactors
+ * [nop]
+ * panic
+ * printk
+ * # echo panic > monitors/wip/reactors
+ * # cat monitors/wip/reactors
+ * nop
+ * [panic]
+ * printk
+ */
+
+#include <linux/slab.h>
+
+#include "rv.h"
+
+/*
+ * Interface for the reactor register.
+ */
+static LIST_HEAD(rv_reactors_list);
+
+static struct rv_reactor_def *get_reactor_rdef_by_name(char *name)
+{
+ struct rv_reactor_def *r;
+
+ list_for_each_entry(r, &rv_reactors_list, list) {
+ if (strcmp(name, r->reactor->name) == 0)
+ return r;
+ }
+ return NULL;
+}
+
+/*
+ * Available reactors seq functions.
+ */
+static int reactors_show(struct seq_file *m, void *p)
+{
+ struct rv_reactor_def *rea_def = p;
+
+ seq_printf(m, "%s\n", rea_def->reactor->name);
+ return 0;
+}
+
+static void reactors_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&rv_interface_lock);
+}
+
+static void *reactors_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&rv_interface_lock);
+ return seq_list_start(&rv_reactors_list, *pos);
+}
+
+static void *reactors_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &rv_reactors_list, pos);
+}
+
+/*
+ * available_reactors seq definition.
+ */
+static const struct seq_operations available_reactors_seq_ops = {
+ .start = reactors_start,
+ .next = reactors_next,
+ .stop = reactors_stop,
+ .show = reactors_show
+};
+
+/*
+ * available_reactors interface.
+ */
+static int available_reactors_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &available_reactors_seq_ops);
+};
+
+static const struct file_operations available_reactors_ops = {
+ .open = available_reactors_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/*
+ * Monitor's reactor file.
+ */
+static int monitor_reactor_show(struct seq_file *m, void *p)
+{
+ struct rv_monitor_def *mdef = m->private;
+ struct rv_reactor_def *rdef = p;
+
+ if (mdef->rdef == rdef)
+ seq_printf(m, "[%s]\n", rdef->reactor->name);
+ else
+ seq_printf(m, "%s\n", rdef->reactor->name);
+ return 0;
+}
+
+/*
+ * available_reactors seq definition.
+ */
+static const struct seq_operations monitor_reactors_seq_ops = {
+ .start = reactors_start,
+ .next = reactors_next,
+ .stop = reactors_stop,
+ .show = monitor_reactor_show
+};
+
+static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor_def *rdef,
+ bool reacting)
+{
+ bool monitor_enabled;
+
+ /* nothing to do */
+ if (mdef->rdef == rdef)
+ return;
+
+ monitor_enabled = mdef->monitor->enabled;
+ if (monitor_enabled)
+ rv_disable_monitor(mdef);
+
+ /* swap reactor's usage */
+ mdef->rdef->counter--;
+ rdef->counter++;
+
+ mdef->rdef = rdef;
+ mdef->reacting = reacting;
+ mdef->monitor->react = rdef->reactor->react;
+
+ if (monitor_enabled)
+ rv_enable_monitor(mdef);
+}
+
+static ssize_t
+monitor_reactors_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buff[MAX_RV_REACTOR_NAME_SIZE + 2];
+ struct rv_monitor_def *mdef;
+ struct rv_reactor_def *rdef;
+ struct seq_file *seq_f;
+ int retval = -EINVAL;
+ bool enable;
+ char *ptr;
+ int len;
+
+ if (count < 1 || count > MAX_RV_REACTOR_NAME_SIZE + 1)
+ return -EINVAL;
+
+ memset(buff, 0, sizeof(buff));
+
+ retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count);
+ if (retval < 0)
+ return -EFAULT;
+
+ ptr = strim(buff);
+
+ len = strlen(ptr);
+ if (!len)
+ return count;
+
+ /*
+ * See monitor_reactors_open()
+ */
+ seq_f = file->private_data;
+ mdef = seq_f->private;
+
+ mutex_lock(&rv_interface_lock);
+
+ retval = -EINVAL;
+
+ list_for_each_entry(rdef, &rv_reactors_list, list) {
+ if (strcmp(ptr, rdef->reactor->name) != 0)
+ continue;
+
+ if (rdef == get_reactor_rdef_by_name("nop"))
+ enable = false;
+ else
+ enable = true;
+
+ monitor_swap_reactors(mdef, rdef, enable);
+
+ retval = count;
+ break;
+ }
+
+ mutex_unlock(&rv_interface_lock);
+
+ return retval;
+}
+
+/*
+ * available_reactors interface.
+ */
+static int monitor_reactors_open(struct inode *inode, struct file *file)
+{
+ struct rv_monitor_def *mdef = inode->i_private;
+ struct seq_file *seq_f;
+ int ret;
+
+ ret = seq_open(file, &monitor_reactors_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * seq_open stores the seq_file on the file->private data.
+ */
+ seq_f = file->private_data;
+
+ /*
+ * Copy the create file "private" data to the seq_file private data.
+ */
+ seq_f->private = mdef;
+
+ return 0;
+};
+
+static const struct file_operations monitor_reactors_ops = {
+ .open = monitor_reactors_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = monitor_reactors_write
+};
+
+static int __rv_register_reactor(struct rv_reactor *reactor)
+{
+ struct rv_reactor_def *r;
+
+ list_for_each_entry(r, &rv_reactors_list, list) {
+ if (strcmp(reactor->name, r->reactor->name) == 0) {
+ pr_info("Reactor %s is already registered\n", reactor->name);
+ return -EINVAL;
+ }
+ }
+
+ r = kzalloc(sizeof(struct rv_reactor_def), GFP_KERNEL);
+ if (!r)
+ return -ENOMEM;
+
+ r->reactor = reactor;
+ r->counter = 0;
+
+ list_add_tail(&r->list, &rv_reactors_list);
+
+ return 0;
+}
+
+/**
+ * rv_register_reactor - register a rv reactor.
+ * @reactor: The rv_reactor to be registered.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int rv_register_reactor(struct rv_reactor *reactor)
+{
+ int retval = 0;
+
+ if (strlen(reactor->name) >= MAX_RV_REACTOR_NAME_SIZE) {
+ pr_info("Reactor %s has a name longer than %d\n",
+ reactor->name, MAX_RV_MONITOR_NAME_SIZE);
+ return -EINVAL;
+ }
+
+ mutex_lock(&rv_interface_lock);
+ retval = __rv_register_reactor(reactor);
+ mutex_unlock(&rv_interface_lock);
+ return retval;
+}
+
+/**
+ * rv_unregister_reactor - unregister a rv reactor.
+ * @reactor: The rv_reactor to be unregistered.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int rv_unregister_reactor(struct rv_reactor *reactor)
+{
+ struct rv_reactor_def *ptr, *next;
+ int ret = 0;
+
+ mutex_lock(&rv_interface_lock);
+
+ list_for_each_entry_safe(ptr, next, &rv_reactors_list, list) {
+ if (strcmp(reactor->name, ptr->reactor->name) == 0) {
+
+ if (!ptr->counter) {
+ list_del(&ptr->list);
+ } else {
+ printk(KERN_WARNING
+ "rv: the rv_reactor %s is in use by %d monitor(s)\n",
+ ptr->reactor->name, ptr->counter);
+ printk(KERN_WARNING "rv: the rv_reactor %s cannot be removed\n",
+ ptr->reactor->name);
+ ret = -EBUSY;
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&rv_interface_lock);
+ return ret;
+}
+
+/*
+ * reacting_on interface.
+ */
+static bool __read_mostly reacting_on;
+
+/**
+ * rv_reacting_on - checks if reacting is on
+ *
+ * Returns 1 if on, 0 otherwise.
+ */
+bool rv_reacting_on(void)
+{
+ /* Ensures that concurrent monitors read consistent reacting_on */
+ smp_rmb();
+ return READ_ONCE(reacting_on);
+}
+
+static ssize_t reacting_on_read_data(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char *buff;
+
+ buff = rv_reacting_on() ? "1\n" : "0\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1);
+}
+
+static void turn_reacting_off(void)
+{
+ WRITE_ONCE(reacting_on, false);
+ /* Ensures that concurrent monitors read consistent reacting_on */
+ smp_wmb();
+}
+
+static void turn_reacting_on(void)
+{
+ WRITE_ONCE(reacting_on, true);
+ /* Ensures that concurrent monitors read consistent reacting_on */
+ smp_wmb();
+}
+
+static ssize_t reacting_on_write_data(struct file *filp, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int retval;
+ bool val;
+
+ retval = kstrtobool_from_user(user_buf, count, &val);
+ if (retval)
+ return retval;
+
+ mutex_lock(&rv_interface_lock);
+
+ if (val)
+ turn_reacting_on();
+ else
+ turn_reacting_off();
+
+ /*
+ * Wait for the execution of all events to finish
+ * before returning to user-space.
+ */
+ tracepoint_synchronize_unregister();
+
+ mutex_unlock(&rv_interface_lock);
+
+ return count;
+}
+
+static const struct file_operations reacting_on_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = reacting_on_write_data,
+ .read = reacting_on_read_data,
+};
+
+/**
+ * reactor_populate_monitor - creates per monitor reactors file
+ * @mdef: monitor's definition.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int reactor_populate_monitor(struct rv_monitor_def *mdef)
+{
+ struct dentry *tmp;
+
+ tmp = rv_create_file("reactors", RV_MODE_WRITE, mdef->root_d, mdef, &monitor_reactors_ops);
+ if (!tmp)
+ return -ENOMEM;
+
+ /*
+ * Configure as the rv_nop reactor.
+ */
+ mdef->rdef = get_reactor_rdef_by_name("nop");
+ mdef->rdef->counter++;
+ mdef->reacting = false;
+
+ return 0;
+}
+
+/**
+ * reactor_cleanup_monitor - cleanup a monitor reference
+ * @mdef: monitor's definition.
+ */
+void reactor_cleanup_monitor(struct rv_monitor_def *mdef)
+{
+ lockdep_assert_held(&rv_interface_lock);
+ mdef->rdef->counter--;
+ WARN_ON_ONCE(mdef->rdef->counter < 0);
+}
+
+/*
+ * Nop reactor register
+ */
+static void rv_nop_reaction(char *msg)
+{
+}
+
+static struct rv_reactor rv_nop = {
+ .name = "nop",
+ .description = "no-operation reactor: do nothing.",
+ .react = rv_nop_reaction
+};
+
+int init_rv_reactors(struct dentry *root_dir)
+{
+ struct dentry *available, *reacting;
+ int retval;
+
+ available = rv_create_file("available_reactors", RV_MODE_READ, root_dir, NULL,
+ &available_reactors_ops);
+ if (!available)
+ goto out_err;
+
+ reacting = rv_create_file("reacting_on", RV_MODE_WRITE, root_dir, NULL, &reacting_on_fops);
+ if (!reacting)
+ goto rm_available;
+
+ retval = __rv_register_reactor(&rv_nop);
+ if (retval)
+ goto rm_reacting;
+
+ turn_reacting_on();
+
+ return 0;
+
+rm_reacting:
+ rv_remove(reacting);
+rm_available:
+ rv_remove(available);
+out_err:
+ return -ENOMEM;
+}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0c517c8c8999..d3005279165d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5569,13 +5569,13 @@ static const char readme_msg[] =
#endif
#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
- "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
- "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
+ "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
+ "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
#ifdef CONFIG_HIST_TRIGGERS
"\t s:[synthetic/]<event> <field> [<field>]\n"
#endif
- "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
- "\t -:[<group>/]<event>\n"
+ "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
+ "\t -:[<group>/][<event>]\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
@@ -9101,6 +9101,16 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size
return 0;
}
+static void free_trace_buffer(struct array_buffer *buf)
+{
+ if (buf->buffer) {
+ ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
+ free_percpu(buf->data);
+ buf->data = NULL;
+ }
+}
+
static int allocate_trace_buffers(struct trace_array *tr, int size)
{
int ret;
@@ -9113,10 +9123,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
ret = allocate_trace_buffer(tr, &tr->max_buffer,
allocate_snapshot ? size : 1);
if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
- ring_buffer_free(tr->array_buffer.buffer);
- tr->array_buffer.buffer = NULL;
- free_percpu(tr->array_buffer.data);
- tr->array_buffer.data = NULL;
+ free_trace_buffer(&tr->array_buffer);
return -ENOMEM;
}
tr->allocated_snapshot = allocate_snapshot;
@@ -9131,16 +9138,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
return 0;
}
-static void free_trace_buffer(struct array_buffer *buf)
-{
- if (buf->buffer) {
- ring_buffer_free(buf->buffer);
- buf->buffer = NULL;
- free_percpu(buf->data);
- buf->data = NULL;
- }
-}
-
static void free_trace_buffers(struct trace_array *tr)
{
if (!tr)
@@ -9772,6 +9769,8 @@ static __init int tracer_init_tracefs(void)
tracer_init_tracefs_work_func(NULL);
}
+ rv_init_interface();
+
return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ff816fb41e48..900e75d96c84 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2005,4 +2005,13 @@ struct trace_min_max_param {
extern const struct file_operations trace_min_max_fops;
+#ifdef CONFIG_RV
+extern int rv_init_interface(void);
+#else
+static inline int rv_init_interface(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 076b447a1b88..154996684fb5 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -101,7 +101,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
event = p + 1;
*p = '\0';
}
- if (event[0] == '\0') {
+ if (!system && event[0] == '\0') {
ret = -EINVAL;
goto out;
}
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 7d4478525c66..1783e3478912 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -125,6 +125,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event,
* We match the following:
* event only - match all eprobes with event name
* system and event only - match all system/event probes
+ * system only - match all system probes
*
* The below has the above satisfied with more arguments:
*
@@ -143,7 +144,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event,
return false;
/* Must match the event name */
- if (strcmp(trace_probe_name(&ep->tp), event) != 0)
+ if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0)
return false;
/* No arguments match all */
@@ -226,6 +227,7 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
struct probe_arg *parg = &ep->tp.args[i];
struct ftrace_event_field *field;
struct list_head *head;
+ int ret = -ENOENT;
head = trace_get_fields(ep->event);
list_for_each_entry(field, head, link) {
@@ -235,9 +237,20 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
return 0;
}
}
+
+ /*
+ * Argument not found on event. But allow for comm and COMM
+ * to be used to get the current->comm.
+ */
+ if (strcmp(parg->code->data, "COMM") == 0 ||
+ strcmp(parg->code->data, "comm") == 0) {
+ parg->code->op = FETCH_OP_COMM;
+ ret = 0;
+ }
+
kfree(parg->code->data);
parg->code->data = NULL;
- return -ENOENT;
+ return ret;
}
static int eprobe_event_define_fields(struct trace_event_call *event_call)
@@ -310,6 +323,27 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
addr = rec + field->offset;
+ if (is_string_field(field)) {
+ switch (field->filter_type) {
+ case FILTER_DYN_STRING:
+ val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
+ break;
+ case FILTER_RDYN_STRING:
+ val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
+ break;
+ case FILTER_STATIC_STRING:
+ val = (unsigned long)addr;
+ break;
+ case FILTER_PTR_STRING:
+ val = (unsigned long)(*(char *)addr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+ return val;
+ }
+
switch (field->size) {
case 1:
if (field->is_signed)
@@ -341,16 +375,38 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
static int get_eprobe_size(struct trace_probe *tp, void *rec)
{
+ struct fetch_insn *code;
struct probe_arg *arg;
int i, len, ret = 0;
for (i = 0; i < tp->nr_args; i++) {
arg = tp->args + i;
- if (unlikely(arg->dynamic)) {
+ if (arg->dynamic) {
unsigned long val;
- val = get_event_field(arg->code, rec);
- len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
+ code = arg->code;
+ retry:
+ switch (code->op) {
+ case FETCH_OP_TP_ARG:
+ val = get_event_field(code, rec);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+ case FETCH_OP_DATA:
+ val = (unsigned long)code->data;
+ break;
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ continue;
+ }
+ code++;
+ len = process_fetch_insn_bottom(code, val, NULL, NULL);
if (len > 0)
ret += len;
}
@@ -368,8 +424,28 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
{
unsigned long val;
- val = get_event_field(code, rec);
- return process_fetch_insn_bottom(code + 1, val, dest, base);
+ retry:
+ switch (code->op) {
+ case FETCH_OP_TP_ARG:
+ val = get_event_field(code, rec);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+ case FETCH_OP_DATA:
+ val = (unsigned long)code->data;
+ break;
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+ return process_fetch_insn_bottom(code, val, dest, base);
}
NOKPROBE_SYMBOL(process_fetch_insn)
@@ -838,8 +914,15 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
if (ret)
return ret;
- if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
+ if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) {
ret = trace_eprobe_tp_arg_update(ep, i);
+ if (ret)
+ trace_probe_log_err(0, BAD_ATTACH_ARG);
+ }
+
+ /* Handle symbols "@" */
+ if (!ret)
+ ret = traceprobe_update_arg(&ep->tp.args[i]);
return ret;
}
@@ -848,7 +931,7 @@ static int __trace_eprobe_create(int argc, const char *argv[])
{
/*
* Argument syntax:
- * e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
+ * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS]
* Fetch args:
* <name>=$<field>[:TYPE]
*/
@@ -858,6 +941,7 @@ static int __trace_eprobe_create(int argc, const char *argv[])
struct trace_eprobe *ep = NULL;
char buf1[MAX_EVENT_NAME_LEN];
char buf2[MAX_EVENT_NAME_LEN];
+ char gbuf[MAX_EVENT_NAME_LEN];
int ret = 0;
int i;
@@ -869,25 +953,25 @@ static int __trace_eprobe_create(int argc, const char *argv[])
event = strchr(&argv[0][1], ':');
if (event) {
event++;
- ret = traceprobe_parse_event_name(&event, &group, buf1,
+ ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
goto parse_error;
- } else {
- strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
- sanitize_event_name(buf1);
- event = buf1;
}
- if (!is_good_name(event) || !is_good_name(group))
- goto parse_error;
+ trace_probe_log_set_index(1);
sys_event = argv[1];
- ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
- sys_event - argv[1]);
- if (ret || !sys_name)
- goto parse_error;
- if (!is_good_name(sys_event) || !is_good_name(sys_name))
+ ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
+ if (ret || !sys_event || !sys_name) {
+ trace_probe_log_err(0, NO_EVENT_INFO);
goto parse_error;
+ }
+
+ if (!event) {
+ strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
+ sanitize_event_name(buf1);
+ event = buf1;
+ }
mutex_lock(&event_mutex);
event_call = find_and_get_event(sys_name, sys_event);
@@ -896,6 +980,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
if (IS_ERR(ep)) {
ret = PTR_ERR(ep);
+ if (ret == -ENODEV)
+ trace_probe_log_err(0, BAD_ATTACH_EVENT);
/* This must return -ENOMEM or missing event, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
ep = NULL;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a114549720d6..61e3a2620fa3 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -157,7 +157,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
int i;
if (--tp_event->perf_refcount > 0)
- goto out;
+ return;
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
@@ -176,8 +176,6 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
perf_trace_buf[i] = NULL;
}
}
-out:
- trace_event_put_ref(tp_event);
}
static int perf_trace_event_open(struct perf_event *p_event)
@@ -241,6 +239,7 @@ void perf_trace_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
+ trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
}
@@ -292,6 +291,7 @@ void perf_kprobe_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
+ trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
destroy_local_trace_kprobe(p_event->tp_event);
@@ -347,6 +347,7 @@ void perf_uprobe_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
+ trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
destroy_local_trace_uprobe(p_event->tp_event);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 181f08186d32..0356cae0cf74 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -176,6 +176,7 @@ static int trace_define_generic_fields(void)
__generic_field(int, CPU, FILTER_CPU);
__generic_field(int, cpu, FILTER_CPU);
+ __generic_field(int, common_cpu, FILTER_CPU);
__generic_field(char *, COMM, FILTER_COMM);
__generic_field(char *, comm, FILTER_COMM);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index e87a46794079..fdf784620c28 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4455,7 +4455,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data,
ret = parse_var_defs(hist_data);
if (ret)
- goto out;
+ return ret;
ret = create_val_fields(hist_data, file);
if (ret)
@@ -4466,8 +4466,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data,
goto out;
ret = create_key_fields(hist_data, file);
- if (ret)
- goto out;
+
out:
free_var_defs(hist_data);
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 706e1686b5eb..a6621c52ce45 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -567,7 +567,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible)
* to allow user_event files to be less locked down. The extreme case
* being "other" has read/write access to user_events_data/status.
*
- * When not locked down, processes may not have have permissions to
+ * When not locked down, processes may not have permissions to
* add/remove calls themselves to tracefs. We need to temporarily
* switch to root file permission to allow for this scenario.
*/
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a245ea673715..23f7f0ec4f4c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -163,7 +163,8 @@ static bool trace_kprobe_match(const char *system, const char *event,
{
struct trace_kprobe *tk = to_trace_kprobe(ev);
- return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ return (event[0] == '\0' ||
+ strcmp(trace_probe_name(&tk->tp), event) == 0) &&
(!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) &&
trace_kprobe_match_command_head(tk, argc, argv);
}
@@ -708,11 +709,11 @@ static int __trace_kprobe_create(int argc, const char *argv[])
/*
* Argument syntax:
* - Add kprobe:
- * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
+ * p[:[GRP/][EVENT]] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe:
- * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
+ * r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]KSYM[+0] [FETCHARGS]
* Or
- * p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS]
+ * p[:[GRP/][EVENT]] [MOD:]KSYM[+0]%return [FETCHARGS]
*
* Fetch args:
* $retval : fetch return value
@@ -739,6 +740,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
+ char gbuf[MAX_EVENT_NAME_LEN];
unsigned int flags = TPARG_FL_KERNEL;
switch (argv[0][0]) {
@@ -833,11 +835,13 @@ static int __trace_kprobe_create(int argc, const char *argv[])
trace_probe_log_set_index(0);
if (event) {
- ret = traceprobe_parse_event_name(&event, &group, buf,
+ ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
goto parse_error;
- } else {
+ }
+
+ if (!event) {
/* Make a new event name */
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 80863c6508e5..36dff277de46 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -257,6 +257,10 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
}
len = strlen(event);
if (len == 0) {
+ if (slash) {
+ *pevent = NULL;
+ return 0;
+ }
trace_probe_log_err(offset, NO_EVENT_NAME);
return -EINVAL;
} else if (len > MAX_EVENT_NAME_LEN) {
@@ -279,7 +283,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
int ret = 0;
int len;
- if (strcmp(arg, "retval") == 0) {
+ if (flags & TPARG_FL_TPOINT) {
+ if (code->data)
+ return -EFAULT;
+ code->data = kstrdup(arg, GFP_KERNEL);
+ if (!code->data)
+ return -ENOMEM;
+ code->op = FETCH_OP_TP_ARG;
+ } else if (strcmp(arg, "retval") == 0) {
if (flags & TPARG_FL_RETURN) {
code->op = FETCH_OP_RETVAL;
} else {
@@ -303,7 +314,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
}
} else
goto inval_var;
- } else if (strcmp(arg, "comm") == 0) {
+ } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
code->op = FETCH_OP_COMM;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
} else if (((flags & TPARG_FL_MASK) ==
@@ -319,13 +330,6 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
code->op = FETCH_OP_ARG;
code->param = (unsigned int)param - 1;
#endif
- } else if (flags & TPARG_FL_TPOINT) {
- if (code->data)
- return -EFAULT;
- code->data = kstrdup(arg, GFP_KERNEL);
- if (!code->data)
- return -ENOMEM;
- code->op = FETCH_OP_TP_ARG;
} else
goto inval_var;
@@ -380,6 +384,11 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
break;
case '%': /* named register */
+ if (flags & TPARG_FL_TPOINT) {
+ /* eprobes do not handle registers */
+ trace_probe_log_err(offs, BAD_VAR);
+ break;
+ }
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
code->op = FETCH_OP_REG;
@@ -613,9 +622,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
/*
* Since $comm and immediate string can not be dereferenced,
- * we can find those by strcmp.
+ * we can find those by strcmp. But ignore for eprobes.
*/
- if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
+ if (!(flags & TPARG_FL_TPOINT) &&
+ (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
+ strncmp(arg, "\\\"", 2) == 0)) {
/* The type of $comm must be "string", and not an array. */
if (parg->count || (t && strcmp(t, "string")))
goto out;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 92cc149af0fd..3b3869ae8cfd 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -442,7 +442,10 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(FAIL_REG_PROBE, "Failed to register probe event"),\
C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\
C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\
- C(SAME_PROBE, "There is already the exact same probe event"),
+ C(SAME_PROBE, "There is already the exact same probe event"),\
+ C(NO_EVENT_INFO, "This requires both group and event name to attach"),\
+ C(BAD_ATTACH_EVENT, "Attached event does not exist"),\
+ C(BAD_ATTACH_ARG, "Attached event does not have this field"),
#undef C
#define C(a, b) TP_ERR_##a
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 88ba5b4bd0c5..fb58e86dd117 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -313,7 +313,8 @@ static bool trace_uprobe_match(const char *system, const char *event,
{
struct trace_uprobe *tu = to_trace_uprobe(ev);
- return strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+ return (event[0] == '\0' ||
+ strcmp(trace_probe_name(&tu->tp), event) == 0) &&
(!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) &&
trace_uprobe_match_command_head(tu, argc, argv);
}
@@ -533,7 +534,7 @@ end:
/*
* Argument syntax:
- * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS]
+ * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS]
*/
static int __trace_uprobe_create(int argc, const char **argv)
{
@@ -541,6 +542,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
char *arg, *filename, *rctr, *rctr_end, *tmp;
char buf[MAX_EVENT_NAME_LEN];
+ char gbuf[MAX_EVENT_NAME_LEN];
enum probe_print_type ptype;
struct path path;
unsigned long offset, ref_ctr_offset;
@@ -645,11 +647,13 @@ static int __trace_uprobe_create(int argc, const char **argv)
/* setup a probe */
trace_probe_log_set_index(0);
if (event) {
- ret = traceprobe_parse_event_name(&event, &group, buf,
+ ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
goto fail_address_parse;
- } else {
+ }
+
+ if (!event) {
char *tail;
char *ptr;