diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:20 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:21 +0200 |
commit | f6ef56589374670b7c1939720dfa00212bd80a5b (patch) | |
tree | f8f5e66c8fba220b34783b0d38518192bc53bf39 | |
parent | bpf: Add sock_ops R/W access to ipv4 tos (diff) | |
parent | selftests/bpf: test for bpf_get_stackid() from raw tracepoints (diff) | |
download | linux-dev-f6ef56589374670b7c1939720dfa00212bd80a5b.tar.xz linux-dev-f6ef56589374670b7c1939720dfa00212bd80a5b.zip |
Merge branch 'bpf-raw-tracepoints'
Alexei Starovoitov says:
====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
that increases memory overhead, but can be optimized/compressed later.
Now it's zero changes in tracepoint.[ch]
v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
and corresponding bpf probe function instead of kallsyms approach.
dropped kernel_tracepoint_find_by_name() patch
v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
introduce kernel_tracepoint_find_by_name() helper
v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6
v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
struct to u64. That nicely reduced the size of patch 1
v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
(or rather moved them from apparmor)
that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
Now any tracepoint with >12 args will have build error
v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot
v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.
The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929
Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.
Patches 2 and 3 are minor cleanups to address allyesconfig build
Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args
Patch 5 introduces COUNT_ARGS macro
Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint
// and attach bpf program to it
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);
Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.
Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests
samples/bpf/test_overhead performance on 1 cpu:
tracepoint base kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename 1.1M 769K 947K 1.0M
urandom_read 789K 697K 750K 755K
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
30 files changed, 607 insertions, 77 deletions
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 41fafebe3b0d..da4aa1a95b11 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) cinfo.sdma_ring_size = fd->cq->nentries; cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; - trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); + trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo); if (copy_to_user((void __user *)arg, &cinfo, len)) return -EFAULT; diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 4eb4cc798035..e00c8a7d559c 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata, TRACE_EVENT(hfi1_ctxt_info, TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt, unsigned int subctxt, - struct hfi1_ctxt_info cinfo), + struct hfi1_ctxt_info *cinfo), TP_ARGS(dd, ctxt, subctxt, cinfo), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(unsigned int, ctxt) @@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info, TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = ctxt; __entry->subctxt = subctxt; - __entry->egrtids = cinfo.egrtids; - __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; - __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; - __entry->sdma_ring_size = cinfo.sdma_ring_size; - __entry->rcvegr_size = cinfo.rcvegr_size; + __entry->egrtids = cinfo->egrtids; + __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt; + __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize; + __entry->sdma_ring_size = cinfo->sdma_ring_size; + __entry->rcvegr_size = cinfo->rcvegr_size; ), TP_printk("[%s] ctxt %u:%u " CINFO_FMT, __get_str(dev), diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index d11d72615de2..e68254e12764 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv) priv->status, table.valid); } - trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low, - table.data1, table.data2, table.line, - table.blink2, table.ilink1, table.ilink2, - table.bcon_time, table.gp1, table.gp2, - table.gp3, table.ucode_ver, table.hw_ver, - 0, table.brd_ver); + trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver); IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id, desc_lookup(table.error_id)); IWL_ERR(priv, "0x%08X | uPc\n", table.pc); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h index 9518a82f44c2..27e3e4e96aa2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h @@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx, __entry->framelen, __entry->skbaddr) ); +struct iwl_error_event_table; TRACE_EVENT(iwlwifi_dev_ucode_error, - TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low, - u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1, - u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type, - u32 major, u32 minor, u32 hw_ver, u32 brd_ver), - TP_ARGS(dev, desc, tsf_low, data1, data2, line, - blink2, ilink1, ilink2, bcon_time, gp1, gp2, - rev_type, major, minor, hw_ver, brd_ver), + TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table, + u32 hw_ver, u32 brd_ver), + TP_ARGS(dev, table, hw_ver, brd_ver), TP_STRUCT__entry( DEV_ENTRY __field(u32, desc) @@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error, ), TP_fast_assign( DEV_ASSIGN; - __entry->desc = desc; - __entry->tsf_low = tsf_low; - __entry->data1 = data1; - __entry->data2 = data2; - __entry->line = line; - __entry->blink2 = blink2; - __entry->ilink1 = ilink1; - __entry->ilink2 = ilink2; - __entry->bcon_time = bcon_time; - __entry->gp1 = gp1; - __entry->gp2 = gp2; - __entry->rev_type = rev_type; - __entry->major = major; - __entry->minor = minor; + __entry->desc = table->error_id; + __entry->tsf_low = table->tsf_low; + __entry->data1 = table->data1; + __entry->data2 = table->data2; + __entry->line = table->line; + __entry->blink2 = table->blink2; + __entry->ilink1 = table->ilink1; + __entry->ilink2 = table->ilink2; + __entry->bcon_time = table->bcon_time; + __entry->gp1 = table->gp1; + __entry->gp2 = table->gp2; + __entry->rev_type = table->gp3; + __entry->major = table->ucode_ver; + __entry->minor = table->hw_ver; __entry->hw_ver = hw_ver; __entry->brd_ver = brd_ver; ), diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index 50510fb6ab8c..6aa719865a58 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -30,6 +30,7 @@ #ifndef __CHECKER__ #include "iwl-trans.h" +#include "dvm/commands.h" #define CREATE_TRACE_POINTS #include "iwl-devtrace.h" diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d65e1db7c097..5442ead876eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -549,12 +549,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base) IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version); - trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low, - table.data1, table.data2, table.data3, - table.blink2, table.ilink1, - table.ilink2, table.bcon_time, table.gp1, - table.gp2, table.fw_rev_type, table.major, - table.minor, table.hw_ver, table.brd_ver); + trace_iwlwifi_dev_ucode_error(trans->dev, &table, table.hw_ver, table.brd_ver); IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id, desc_lookup(table.error_id)); IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0); diff --git a/drivers/net/wireless/mediatek/mt7601u/trace.h b/drivers/net/wireless/mediatek/mt7601u/trace.h index 289897300ef0..82c8898b9076 100644 --- a/drivers/net/wireless/mediatek/mt7601u/trace.h +++ b/drivers/net/wireless/mediatek/mt7601u/trace.h @@ -34,7 +34,7 @@ #define REG_PR_FMT "%04x=%08x" #define REG_PR_ARG __entry->reg, __entry->val -DECLARE_EVENT_CLASS(dev_reg_evt, +DECLARE_EVENT_CLASS(dev_reg_evtu, TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), TP_ARGS(dev, reg, val), TP_STRUCT__entry( @@ -51,12 +51,12 @@ DECLARE_EVENT_CLASS(dev_reg_evt, ) ); -DEFINE_EVENT(dev_reg_evt, reg_read, +DEFINE_EVENT(dev_reg_evtu, reg_read, TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), TP_ARGS(dev, reg, val) ); -DEFINE_EVENT(dev_reg_evt, reg_write, +DEFINE_EVENT(dev_reg_evtu, reg_write, TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), TP_ARGS(dev, reg, val) ); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 1ab0e520d6fc..8add3493a202 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -178,6 +178,15 @@ #define TRACE_SYSCALLS() #endif +#ifdef CONFIG_BPF_EVENTS +#define BPF_RAW_TP() STRUCT_ALIGN(); \ + VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \ + KEEP(*(__bpf_raw_tp_map)) \ + VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .; +#else +#define BPF_RAW_TP() +#endif + #ifdef CONFIG_SERIAL_EARLYCON #define EARLYCON_TABLE() STRUCT_ALIGN(); \ VMLINUX_SYMBOL(__earlycon_table) = .; \ @@ -249,6 +258,7 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ + BPF_RAW_TP() \ TRACEPOINT_STR() /* diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5e2e8a49fb21..6d7243bfb0ff 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fd291503576..293fa0677fba 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a1442c4e513..b0357cd198b0 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) +{ + return NULL; +} #endif enum { @@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1); +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2); +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3); +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4); +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5); +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6); +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8); +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9); +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10); +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11); +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct trace_event_call *call, u64 count, struct pt_regs *regs, struct hlist_head *head, diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 64ed7064f1fa..22c5a46e9693 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,4 +35,10 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct bpf_raw_event_map { + struct tracepoint *tp; + void *bpf_func; + u32 num_args; +} __aligned(32); + #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h new file mode 100644 index 000000000000..505dae0bed80 --- /dev/null +++ b/include/trace/bpf_probe.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#undef TRACE_SYSTEM_VAR + +#ifdef CONFIG_BPF_EVENTS + +#undef __entry +#define __entry entry + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_dynamic_array_len +#define __get_dynamic_array_len(field) \ + ((__entry->__data_loc_##field >> 16) & 0xffff) + +#undef __get_str +#define __get_str(field) ((char *)__get_dynamic_array(field)) + +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + +#undef __perf_count +#define __perf_count(c) (c) + +#undef __perf_task +#define __perf_task(t) (t) + +/* cast any integer, pointer, or small struct to u64 */ +#define UINTTYPE(size) \ + __typeof__(__builtin_choose_expr(size == 1, (u8)1, \ + __builtin_choose_expr(size == 2, (u16)2, \ + __builtin_choose_expr(size == 4, (u32)3, \ + __builtin_choose_expr(size == 8, (u64)4, \ + (void)5))))) +#define __CAST_TO_U64(x) ({ \ + typeof(x) __src = (x); \ + UINTTYPE(sizeof(x)) __dst; \ + memcpy(&__dst, &__src, sizeof(__dst)); \ + (u64)__dst; }) + +#define __CAST1(a,...) __CAST_TO_U64(a) +#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__) +#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__) +#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__) +#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__) +#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__) +#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__) +#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__) +#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__) +#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__) +#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__) +#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__) +/* tracepoints with more than 12 arguments will hit build error */ +#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static notrace void \ +__bpf_trace_##call(void *__data, proto) \ +{ \ + struct bpf_prog *prog = __data; \ + CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \ +} + +/* + * This part is compiled out, it is only here as a build time check + * to make sure that if the tracepoint handling changes, the + * bpf probe will fail to compile unless it too is updated. + */ +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static inline void bpf_test_probe_##call(void) \ +{ \ + check_trace_callback_type_##call(__bpf_trace_##template); \ +} \ +static struct bpf_raw_event_map __used \ + __attribute__((section("__bpf_raw_tp_map"))) \ +__bpf_trace_tp_map_##call = { \ + .tp = &__tracepoint_##call, \ + .bpf_func = (void *)__bpf_trace_##template, \ + .num_args = COUNT_ARGS(args), \ +}; + + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#endif /* CONFIG_BPF_EVENTS */ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index d9e3d4aa3f6e..cb30c5532144 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -95,6 +95,7 @@ #ifdef TRACEPOINTS_ENABLED #include <trace/trace_events.h> #include <trace/perf.h> +#include <trace/bpf_probe.h> #endif #undef TRACE_EVENT diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 06c87f9f720c..795698925d20 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, TRACE_EVENT(f2fs_truncate_partial_nodes, - TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err), TP_ARGS(inode, nid, depth, err), diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18b7c510c511..1878201c2d77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_cmd { BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, }; enum bpf_map_type { @@ -134,6 +135,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, }; enum bpf_attach_type { @@ -344,6 +346,11 @@ union bpf_attr { __aligned_u64 prog_ids; __u32 prog_cnt; } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 77d45bd9f507..95ca2523fa6e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1315,6 +1315,81 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } +struct bpf_raw_tracepoint { + struct bpf_raw_event_map *btp; + struct bpf_prog *prog; +}; + +static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) +{ + struct bpf_raw_tracepoint *raw_tp = filp->private_data; + + if (raw_tp->prog) { + bpf_probe_unregister(raw_tp->btp, raw_tp->prog); + bpf_prog_put(raw_tp->prog); + } + kfree(raw_tp); + return 0; +} + +static const struct file_operations bpf_raw_tp_fops = { + .release = bpf_raw_tracepoint_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, +}; + +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd + +static int bpf_raw_tracepoint_open(const union bpf_attr *attr) +{ + struct bpf_raw_tracepoint *raw_tp; + struct bpf_raw_event_map *btp; + struct bpf_prog *prog; + char tp_name[128]; + int tp_fd, err; + + if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), + sizeof(tp_name) - 1) < 0) + return -EFAULT; + tp_name[sizeof(tp_name) - 1] = 0; + + btp = bpf_find_raw_tracepoint(tp_name); + if (!btp) + return -ENOENT; + + raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); + if (!raw_tp) + return -ENOMEM; + raw_tp->btp = btp; + + prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, + BPF_PROG_TYPE_RAW_TRACEPOINT); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_free_tp; + } + + err = bpf_probe_register(raw_tp->btp, prog); + if (err) + goto out_put_prog; + + raw_tp->prog = prog; + tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, + O_CLOEXEC); + if (tp_fd < 0) { + bpf_probe_unregister(raw_tp->btp, prog); + err = tp_fd; + goto out_put_prog; + } + return tp_fd; + +out_put_prog: + bpf_prog_put(prog); +out_free_tp: + kfree(raw_tp); + return err; +} + #ifdef CONFIG_CGROUP_BPF #define BPF_PROG_ATTACH_LAST_FIELD attach_flags @@ -1925,6 +2000,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET_INFO_BY_FD: err = bpf_obj_get_info_by_fd(&attr, uattr); break; + case BPF_RAW_TRACEPOINT_OPEN: + err = bpf_raw_tracepoint_open(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7f9691c86b6e..463e72d18c4c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -735,6 +735,86 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id) } } +/* + * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp + * to avoid potential recursive reuse issue when/if tracepoints are added + * inside bpf_*_event_output and/or bpf_get_stack_id + */ +static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); +BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, + struct bpf_map *, map, u64, flags, void *, data, u64, size) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + + perf_fetch_caller_regs(regs); + return ____bpf_perf_event_output(regs, map, flags, data, size); +} + +static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { + .func = bpf_perf_event_output_raw_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + +BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, + struct bpf_map *, map, u64, flags) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + + perf_fetch_caller_regs(regs); + /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ + return bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); +} + +static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { + .func = bpf_get_stackid_raw_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_raw_tp; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto_raw_tp; + default: + return tracing_func_proto(func_id); + } +} + +static bool raw_tp_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + /* largest tracepoint in the kernel has 12 args */ + if (off < 0 || off >= sizeof(__u64) * 12) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { + .get_func_proto = raw_tp_prog_func_proto, + .is_valid_access = raw_tp_prog_is_valid_access, +}; + +const struct bpf_prog_ops raw_tracepoint_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -908,3 +988,106 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return ret; } + +extern struct bpf_raw_event_map __start__bpf_raw_tp[]; +extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; + +struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) +{ + struct bpf_raw_event_map *btp = __start__bpf_raw_tp; + + for (; btp < __stop__bpf_raw_tp; btp++) { + if (!strcmp(btp->tp->name, name)) + return btp; + } + return NULL; +} + +static __always_inline +void __bpf_trace_run(struct bpf_prog *prog, u64 *args) +{ + rcu_read_lock(); + preempt_disable(); + (void) BPF_PROG_RUN(prog, args); + preempt_enable(); + rcu_read_unlock(); +} + +#define UNPACK(...) __VA_ARGS__ +#define REPEAT_1(FN, DL, X, ...) FN(X) +#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) +#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) +#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) +#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) +#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) +#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) +#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) +#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) +#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) +#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) +#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) +#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) + +#define SARG(X) u64 arg##X +#define COPY(X) args[X] = arg##X + +#define __DL_COM (,) +#define __DL_SEM (;) + +#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + +#define BPF_TRACE_DEFN_x(x) \ + void bpf_trace_run##x(struct bpf_prog *prog, \ + REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ + { \ + u64 args[x]; \ + REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ + __bpf_trace_run(prog, args); \ + } \ + EXPORT_SYMBOL_GPL(bpf_trace_run##x) +BPF_TRACE_DEFN_x(1); +BPF_TRACE_DEFN_x(2); +BPF_TRACE_DEFN_x(3); +BPF_TRACE_DEFN_x(4); +BPF_TRACE_DEFN_x(5); +BPF_TRACE_DEFN_x(6); +BPF_TRACE_DEFN_x(7); +BPF_TRACE_DEFN_x(8); +BPF_TRACE_DEFN_x(9); +BPF_TRACE_DEFN_x(10); +BPF_TRACE_DEFN_x(11); +BPF_TRACE_DEFN_x(12); + +static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +{ + struct tracepoint *tp = btp->tp; + + /* + * check that program doesn't access arguments beyond what's + * available in this tracepoint + */ + if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) + return -EINVAL; + + return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); +} + +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +{ + int err; + + mutex_lock(&bpf_event_mutex); + err = __bpf_probe_register(btp, prog); + mutex_unlock(&bpf_event_mutex); + return err; +} + +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +{ + int err; + + mutex_lock(&bpf_event_mutex); + err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); + mutex_unlock(&bpf_event_mutex); + return err; +} diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index 2c8a43d3607f..df855c33daf2 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -33,7 +33,7 @@ /* Tracing for driver callbacks */ -DECLARE_EVENT_CLASS(local_only_evt, +DECLARE_EVENT_CLASS(local_only_evt4, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local), TP_STRUCT__entry( @@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt, TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); -DEFINE_EVENT(local_only_evt, 802154_drv_return_void, +DEFINE_EVENT(local_only_evt4, 802154_drv_return_void, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); @@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int, __entry->ret) ); -DEFINE_EVENT(local_only_evt, 802154_drv_start, +DEFINE_EVENT(local_only_evt4, 802154_drv_start, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); -DEFINE_EVENT(local_only_evt, 802154_drv_stop, +DEFINE_EVENT(local_only_evt4, 802154_drv_stop, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5152938b358d..018c81fa72fb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3137,7 +3137,7 @@ TRACE_EVENT(rdev_start_radar_detection, TRACE_EVENT(rdev_set_mcast_rate, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - int mcast_rate[NUM_NL80211_BANDS]), + int *mcast_rate), TP_ARGS(wiphy, netdev, mcast_rate), TP_STRUCT__entry( WIPHY_ENTRY diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 2c2a587e0942..4d6a6edd4bf6 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -119,6 +119,7 @@ always += offwaketime_kern.o always += spintest_kern.o always += map_perf_test_kern.o always += test_overhead_tp_kern.o +always += test_overhead_raw_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index b1a310c3ae89..bebe4188b4b3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -61,6 +61,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; + bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0; bool is_xdp = strncmp(event, "xdp", 3) == 0; bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; @@ -85,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_KPROBE; } else if (is_tracepoint) { prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (is_raw_tracepoint) { + prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; } else if (is_xdp) { prog_type = BPF_PROG_TYPE_XDP; } else if (is_perf_event) { @@ -131,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return populate_prog_array(event, fd); } + if (is_raw_tracepoint) { + efd = bpf_raw_tracepoint_open(event + 15, fd); + if (efd < 0) { + printf("tracepoint %s %s\n", event + 15, strerror(errno)); + return -1; + } + event_fd[prog_cnt - 1] = efd; + return 0; + } + if (is_kprobe || is_kretprobe) { if (is_kprobe) event += 7; @@ -587,6 +600,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) if (memcmp(shname, "kprobe/", 7) == 0 || memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || + memcmp(shname, "raw_tracepoint/", 15) == 0 || memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c new file mode 100644 index 000000000000..d2af8bc1c805 --- /dev/null +++ b/samples/bpf/test_overhead_raw_tp_kern.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +SEC("raw_tracepoint/task_rename") +int prog(struct bpf_raw_tracepoint_args *ctx) +{ + return 0; +} + +SEC("raw_tracepoint/urandom_read") +int prog2(struct bpf_raw_tracepoint_args *ctx) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index d291167fd3c7..e1d35e07a10e 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -158,5 +158,17 @@ int main(int argc, char **argv) unload_progs(); } + if (test_flags & 0xC0) { + snprintf(filename, sizeof(filename), + "%s_raw_tp_kern.o", argv[0]); + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + printf("w/RAW_TRACEPOINT\n"); + run_perf_test(num_cpu, test_flags >> 6); + unload_progs(); + } + return 0; } diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h index 05fb3305671e..e042b994f2b8 100644 --- a/security/apparmor/include/path.h +++ b/security/apparmor/include/path.h @@ -43,15 +43,10 @@ struct aa_buffers { DECLARE_PER_CPU(struct aa_buffers, aa_buffers); -#define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n -#define CONCAT(X, Y) X ## Y -#define CONCAT_AFTER(X, Y) CONCAT(X, Y) - #define ASSIGN(FN, X, N) ((X) = FN(N)) #define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/ #define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1); EVAL1(FN, Y); } while (0) -#define EVAL(FN, X...) CONCAT_AFTER(EVAL, COUNT_ARGS(X))(FN, X) +#define EVAL(FN, X...) CONCATENATE(EVAL, COUNT_ARGS(X))(FN, X) #define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++) diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index ea0d486652c8..54cdd4ffa9ce 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -14,7 +14,7 @@ #include <linux/tracepoint.h> TRACE_EVENT(in_packet, - TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index), + TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 *cip_header, unsigned int payload_length, unsigned int index), TP_ARGS(s, cycles, cip_header, payload_length, index), TP_STRUCT__entry( __field(unsigned int, second) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d245c41213ac..58060bec999d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_cmd { BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, }; enum bpf_map_type { @@ -134,6 +135,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, }; enum bpf_attach_type { @@ -344,6 +346,11 @@ union bpf_attr { __aligned_u64 prog_ids; __u32 prog_cnt; } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -1151,4 +1158,8 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 592a58a2b681..e0500055f1a6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -428,6 +428,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) return err; } +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.raw_tracepoint.name = ptr_to_u64(name); + attr.raw_tracepoint.prog_fd = prog_fd; + + return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); +} + int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) { struct sockaddr_nl sa; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8d18fb73d7fb..ee59342c6f42 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -79,4 +79,5 @@ int bpf_map_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); +int bpf_raw_tracepoint_open(const char *name, int prog_fd); #endif diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index e9df48b306df..faadbe233966 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -877,7 +877,7 @@ static void test_stacktrace_map() err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) - goto out; + return; /* Get the ID for the sched/sched_switch tracepoint */ snprintf(buf, sizeof(buf), @@ -888,8 +888,7 @@ static void test_stacktrace_map() bytes = read(efd, buf, sizeof(buf)); close(efd); - if (CHECK(bytes <= 0 || bytes >= sizeof(buf), - "read", "bytes %d errno %d\n", bytes, errno)) + if (bytes <= 0 || bytes >= sizeof(buf)) goto close_prog; /* Open the perf event and attach bpf progrram */ @@ -906,29 +905,24 @@ static void test_stacktrace_map() goto close_prog; err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", - err, errno)) - goto close_pmu; + if (err) + goto disable_pmu; err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); - if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", - err, errno)) + if (err) goto disable_pmu; /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) + if (control_map_fd < 0) goto disable_pmu; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) + if (stackid_hmap_fd < 0) goto disable_pmu; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) + if (stackmap_fd < 0) goto disable_pmu; /* give some time for bpf program run */ @@ -945,24 +939,78 @@ static void test_stacktrace_map() err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto disable_pmu_noerr; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - ; /* fall through */ + goto disable_pmu_noerr; + goto disable_pmu_noerr; disable_pmu: + error_cnt++; +disable_pmu_noerr: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); - -close_pmu: close(pmu_fd); - close_prog: bpf_object__close(obj); +} -out: - return; +static void test_stacktrace_map_raw_tp() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int efd, err, prog_fd; + __u32 key, val, duration = 0; + struct bpf_object *obj; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sched_switch", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (control_map_fd < 0) + goto close_prog; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (stackid_hmap_fd < 0) + goto close_prog; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (stackmap_fd < 0) + goto close_prog; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); } static int extract_build_id(char *build_id, size_t size) @@ -1138,6 +1186,7 @@ int main(void) test_tp_attach_query(); test_stacktrace_map(); test_stacktrace_build_id(); + test_stacktrace_map_raw_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; |