aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/lib
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/lib/api/Makefile68
-rw-r--r--tools/lib/api/fd/array.c17
-rw-r--r--tools/lib/api/fd/array.h6
-rw-r--r--tools/lib/api/fs/cgroup.c100
-rw-r--r--tools/lib/api/fs/fs.c306
-rw-r--r--tools/lib/api/fs/tracing_path.c35
-rw-r--r--tools/lib/api/fs/tracing_path.h1
-rw-r--r--tools/lib/api/io.h80
-rw-r--r--tools/lib/bitmap.c34
-rw-r--r--tools/lib/bpf/.gitignore2
-rw-r--r--tools/lib/bpf/Build5
-rw-r--r--tools/lib/bpf/Makefile156
-rw-r--r--tools/lib/bpf/README.rst168
-rw-r--r--tools/lib/bpf/bpf.c1122
-rw-r--r--tools/lib/bpf/bpf.h587
-rw-r--r--tools/lib/bpf/bpf_core_read.h323
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h74
-rw-r--r--tools/lib/bpf/bpf_helpers.h335
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c18
-rw-r--r--tools/lib/bpf/bpf_tracing.h998
-rw-r--r--tools/lib/bpf/btf.c2954
-rw-r--r--tools/lib/bpf/btf.h287
-rw-r--r--tools/lib/bpf/btf_dump.c1316
-rw-r--r--tools/lib/bpf/elf.c558
-rw-r--r--tools/lib/bpf/features.c583
-rw-r--r--tools/lib/bpf/gen_loader.c1123
-rw-r--r--tools/lib/bpf/hashmap.c21
-rw-r--r--tools/lib/bpf/hashmap.h116
-rw-r--r--tools/lib/bpf/libbpf.c12089
-rw-r--r--tools/lib/bpf/libbpf.h1807
-rw-r--r--tools/lib/bpf/libbpf.map279
-rw-r--r--tools/lib/bpf/libbpf_common.h52
-rw-r--r--tools/lib/bpf/libbpf_errno.c23
-rw-r--r--tools/lib/bpf/libbpf_internal.h533
-rw-r--r--tools/lib/bpf/libbpf_legacy.h140
-rw-r--r--tools/lib/bpf/libbpf_probes.c394
-rw-r--r--tools/lib/bpf/libbpf_util.h47
-rw-r--r--tools/lib/bpf/libbpf_version.h9
-rw-r--r--tools/lib/bpf/linker.c2924
-rw-r--r--tools/lib/bpf/netlink.c818
-rw-r--r--tools/lib/bpf/nlattr.c6
-rw-r--r--tools/lib/bpf/nlattr.h72
-rw-r--r--tools/lib/bpf/relo_core.c1687
-rw-r--r--tools/lib/bpf/relo_core.h99
-rw-r--r--tools/lib/bpf/ringbuf.c438
-rw-r--r--tools/lib/bpf/skel_internal.h374
-rw-r--r--tools/lib/bpf/str_error.h3
-rw-r--r--tools/lib/bpf/strset.c177
-rw-r--r--tools/lib/bpf/strset.h21
-rw-r--r--tools/lib/bpf/usdt.bpf.h250
-rw-r--r--tools/lib/bpf/usdt.c1600
-rw-r--r--tools/lib/bpf/xsk.c927
-rw-r--r--tools/lib/bpf/xsk.h255
-rw-r--r--tools/lib/bpf/zip.c333
-rw-r--r--tools/lib/bpf/zip.h47
-rw-r--r--tools/lib/find_bit.c143
-rw-r--r--tools/lib/list_sort.c252
-rw-r--r--tools/lib/lockdep/.gitignore2
-rw-r--r--tools/lib/lockdep/Build1
-rw-r--r--tools/lib/lockdep/Makefile162
-rw-r--r--tools/lib/lockdep/common.c29
-rw-r--r--tools/lib/lockdep/include/liblockdep/common.h54
-rw-r--r--tools/lib/lockdep/include/liblockdep/mutex.h73
-rw-r--r--tools/lib/lockdep/include/liblockdep/rwlock.h87
-rwxr-xr-xtools/lib/lockdep/lockdep3
-rw-r--r--tools/lib/lockdep/lockdep.c33
-rw-r--r--tools/lib/lockdep/lockdep_internals.h1
-rw-r--r--tools/lib/lockdep/lockdep_states.h1
-rw-r--r--tools/lib/lockdep/preload.c443
-rw-r--r--tools/lib/lockdep/rbtree.c1
-rwxr-xr-xtools/lib/lockdep/run_tests.sh47
-rw-r--r--tools/lib/lockdep/tests/AA.c14
-rw-r--r--tools/lib/lockdep/tests/AA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABA.c14
-rw-r--r--tools/lib/lockdep/tests/ABA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABBA.c26
-rw-r--r--tools/lib/lockdep/tests/ABBA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABBA_2threads.c47
-rw-r--r--tools/lib/lockdep/tests/ABBA_2threads.sh2
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.c20
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.c23
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABCABC.c20
-rw-r--r--tools/lib/lockdep/tests/ABCABC.sh2
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.c23
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.sh2
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.c23
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.sh2
-rw-r--r--tools/lib/lockdep/tests/WW.c14
-rw-r--r--tools/lib/lockdep/tests/WW.sh2
-rw-r--r--tools/lib/lockdep/tests/common.h13
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.c15
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.sh2
-rw-r--r--tools/lib/perf/Build2
-rw-r--r--tools/lib/perf/Documentation/examples/sampling.c2
-rw-r--r--tools/lib/perf/Documentation/libperf-sampling.txt2
-rw-r--r--tools/lib/perf/Documentation/libperf.txt23
-rw-r--r--tools/lib/perf/Makefile67
-rw-r--r--tools/lib/perf/cpumap.c349
-rw-r--r--tools/lib/perf/evlist.c240
-rw-r--r--tools/lib/perf/evsel.c338
-rw-r--r--tools/lib/perf/include/internal/cpumap.h25
-rw-r--r--tools/lib/perf/include/internal/evlist.h23
-rw-r--r--tools/lib/perf/include/internal/evsel.h34
-rw-r--r--tools/lib/perf/include/internal/lib.h2
-rw-r--r--tools/lib/perf/include/internal/mmap.h11
-rw-r--r--tools/lib/perf/include/internal/rc_check.h113
-rw-r--r--tools/lib/perf/include/internal/tests.h36
-rw-r--r--tools/lib/perf/include/internal/xyarray.h9
-rw-r--r--tools/lib/perf/include/perf/bpf_perf.h31
-rw-r--r--tools/lib/perf/include/perf/cpumap.h67
-rw-r--r--tools/lib/perf/include/perf/event.h134
-rw-r--r--tools/lib/perf/include/perf/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/evsel.h20
-rw-r--r--tools/lib/perf/include/perf/threadmap.h7
-rw-r--r--tools/lib/perf/lib.c20
-rw-r--r--tools/lib/perf/libperf.map12
-rw-r--r--tools/lib/perf/mmap.c272
-rw-r--r--tools/lib/perf/tests/Build5
-rw-r--r--tools/lib/perf/tests/Makefile38
-rw-r--r--tools/lib/perf/tests/main.c15
-rw-r--r--tools/lib/perf/tests/test-cpumap.c18
-rw-r--r--tools/lib/perf/tests/test-evlist.c218
-rw-r--r--tools/lib/perf/tests/test-evsel.c244
-rw-r--r--tools/lib/perf/tests/test-threadmap.c46
-rw-r--r--tools/lib/perf/tests/tests.h10
-rw-r--r--tools/lib/perf/threadmap.c36
-rw-r--r--tools/lib/slab.c38
-rw-r--r--tools/lib/string.c58
-rw-r--r--tools/lib/subcmd/Makefile54
-rw-r--r--tools/lib/subcmd/exec-cmd.c38
-rw-r--r--tools/lib/subcmd/help.c40
-rw-r--r--tools/lib/subcmd/parse-options.c17
-rw-r--r--tools/lib/subcmd/parse-options.h9
-rw-r--r--tools/lib/subcmd/run-command.c2
-rw-r--r--tools/lib/subcmd/run-command.h2
-rw-r--r--tools/lib/subcmd/subcmd-util.h16
-rw-r--r--tools/lib/symbol/Build1
-rw-r--r--tools/lib/symbol/Makefile122
-rw-r--r--tools/lib/symbol/kallsyms.h2
-rw-r--r--tools/lib/thermal/.gitignore2
-rw-r--r--tools/lib/thermal/Build5
-rw-r--r--tools/lib/thermal/Makefile165
-rw-r--r--tools/lib/thermal/commands.c349
-rw-r--r--tools/lib/thermal/events.c164
-rw-r--r--tools/lib/thermal/include/thermal.h142
-rw-r--r--tools/lib/thermal/libthermal.map25
-rw-r--r--tools/lib/thermal/libthermal.pc.template12
-rw-r--r--tools/lib/thermal/sampling.c75
-rw-r--r--tools/lib/thermal/thermal.c135
-rw-r--r--tools/lib/thermal/thermal_nl.c215
-rw-r--r--tools/lib/thermal/thermal_nl.h46
-rw-r--r--tools/lib/traceevent/.gitignore4
-rw-r--r--tools/lib/traceevent/Build8
-rw-r--r--tools/lib/traceevent/Documentation/Makefile207
-rw-r--r--tools/lib/traceevent/Documentation/asciidoc.conf120
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-commands.txt153
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-cpus.txt77
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt78
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-event_find.txt103
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-event_get.txt99
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-event_list.txt122
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-event_print.txt130
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-field_find.txt118
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt122
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-field_print.txt126
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-field_read.txt81
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-fields.txt105
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt91
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-filter.txt209
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt183
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-func_find.txt88
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-handle.txt101
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-header_page.txt102
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt104
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-long_size.txt78
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-page_size.txt82
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt90
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt82
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-plugins.txt122
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt137
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt156
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt155
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt104
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-strerror.txt85
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-tseq.txt158
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent.txt192
-rw-r--r--tools/lib/traceevent/Documentation/manpage-1.72.xsl14
-rw-r--r--tools/lib/traceevent/Documentation/manpage-base.xsl35
-rw-r--r--tools/lib/traceevent/Documentation/manpage-bold-literal.xsl17
-rw-r--r--tools/lib/traceevent/Documentation/manpage-normal.xsl13
-rw-r--r--tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl21
-rw-r--r--tools/lib/traceevent/Makefile300
-rw-r--r--tools/lib/traceevent/event-parse-api.c333
-rw-r--r--tools/lib/traceevent/event-parse-local.h123
-rw-r--r--tools/lib/traceevent/event-parse.c7603
-rw-r--r--tools/lib/traceevent/event-parse.h749
-rw-r--r--tools/lib/traceevent/event-plugin.c711
-rw-r--r--tools/lib/traceevent/event-utils.h67
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c809
-rw-r--r--tools/lib/traceevent/kbuffer.h68
-rw-r--r--tools/lib/traceevent/libtraceevent.pc.template10
-rw-r--r--tools/lib/traceevent/parse-filter.c2278
-rw-r--r--tools/lib/traceevent/parse-utils.c71
-rw-r--r--tools/lib/traceevent/plugins/Build12
-rw-r--r--tools/lib/traceevent/plugins/Makefile225
-rw-r--r--tools/lib/traceevent/plugins/plugin_cfg80211.c43
-rw-r--r--tools/lib/traceevent/plugins/plugin_function.c282
-rw-r--r--tools/lib/traceevent/plugins/plugin_futex.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_hrtimer.c74
-rw-r--r--tools/lib/traceevent/plugins/plugin_jbd2.c61
-rw-r--r--tools/lib/traceevent/plugins/plugin_kmem.c80
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c527
-rw-r--r--tools/lib/traceevent/plugins/plugin_mac80211.c88
-rw-r--r--tools/lib/traceevent/plugins/plugin_sched_switch.c146
-rw-r--r--tools/lib/traceevent/plugins/plugin_scsi.c434
-rw-r--r--tools/lib/traceevent/plugins/plugin_tlb.c66
-rw-r--r--tools/lib/traceevent/plugins/plugin_xen.c138
-rw-r--r--tools/lib/traceevent/tep_strerror.c53
-rw-r--r--tools/lib/traceevent/trace-seq.c249
-rw-r--r--tools/lib/traceevent/trace-seq.h55
222 files changed, 31613 insertions, 30845 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index a13e9c7f1fc5..044860ac1ed1 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld
MAKEFLAGS += --no-print-directory
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
LIBFILE = $(OUTPUT)libapi.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
@@ -45,10 +55,23 @@ RM = rm -f
API_IN := $(OUTPUT)libapi-in.o
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
all: fixdep $(LIBFILE)
@@ -58,9 +81,52 @@ $(API_IN): FORCE
$(LIBFILE): $(API_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$2' ]; then \
+ $(INSTALL) -d -m 755 '$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+HDRS := cpu.h debug.h io.h
+FD_HDRS := fd/array.h
+FS_HDRS := fs/fs.h fs/tracing_path.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+INSTALL_FD_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FD_HDRS))
+INSTALL_FS_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FS_HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+$(INSTALL_FD_HDRS): $(INSTALL_HDRS_PFX)/fd/%.h: fd/%.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/fd/,644)
+
+$(INSTALL_FS_HDRS): $(INSTALL_HDRS_PFX)/fs/%.h: fs/%.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/fs/,644)
+
+install_headers: $(INSTALL_HDRS) $(INSTALL_FD_HDRS) $(INSTALL_FS_HDRS)
+ $(call QUIET_INSTALL, libapi_headers)
+
+install: install_lib install_headers
+
clean:
$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
- find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
FORCE:
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 5e6cb9debe37..f0f195207fca 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -88,6 +88,23 @@ int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags
return pos;
}
+int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from)
+{
+ struct pollfd *entry;
+ int npos;
+
+ if (pos >= from->nr)
+ return -EINVAL;
+
+ entry = &from->entries[pos];
+
+ npos = fdarray__add(fda, entry->fd, entry->events, from->priv[pos].flags);
+ if (npos >= 0)
+ fda->priv[npos] = from->priv[pos];
+
+ return npos;
+}
+
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
void *arg)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index 7fcf21a33c0c..5c01f7b05dfb 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -31,8 +31,9 @@ struct fdarray {
};
enum fdarray_flags {
- fdarray_flag__default = 0x00000000,
- fdarray_flag__nonfilterable = 0x00000001
+ fdarray_flag__default = 0x00000000,
+ fdarray_flag__nonfilterable = 0x00000001,
+ fdarray_flag__non_perf_event = 0x00000002,
};
void fdarray__init(struct fdarray *fda, int nr_autogrow);
@@ -42,6 +43,7 @@ struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
+int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
index 889a6eb4aaca..250629a09423 100644
--- a/tools/lib/api/fs/cgroup.c
+++ b/tools/lib/api/fs/cgroup.c
@@ -8,12 +8,29 @@
#include <string.h>
#include "fs.h"
+struct cgroupfs_cache_entry {
+ char subsys[32];
+ char mountpoint[PATH_MAX];
+};
+
+/* just cache last used one */
+static struct cgroupfs_cache_entry *cached;
+
int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
{
FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
+ char *line = NULL;
+ size_t len = 0;
+ char *p, *path;
+ char mountpoint[PATH_MAX];
+
+ if (cached && !strcmp(cached->subsys, subsys)) {
+ if (strlen(cached->mountpoint) < maxlen) {
+ strcpy(buf, cached->mountpoint);
+ return 0;
+ }
+ return -1;
+ }
fp = fopen("/proc/mounts", "r");
if (!fp)
@@ -22,45 +39,68 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
/*
* in order to handle split hierarchy, we need to scan /proc/mounts
* and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
+ * the given subsystem. If we found v1, just use it. If not we can
+ * use v2 path as a fallback.
+ */
+ mountpoint[0] = '\0';
+
+ /*
+ * The /proc/mounts has the follow format:
+ *
+ * <devname> <mount point> <fs type> <options> ...
+ *
*/
- path_v1[0] = '\0';
- path_v2[0] = '\0';
+ while (getline(&line, &len, fp) != -1) {
+ /* skip devname */
+ p = strchr(line, ' ');
+ if (p == NULL)
+ continue;
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
+ /* save the mount point */
+ path = ++p;
+ p = strchr(p, ' ');
+ if (p == NULL)
+ continue;
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
+ *p++ = '\0';
- token = strtok_r(tokens, ",", &saved_ptr);
+ /* check filesystem type */
+ if (strncmp(p, "cgroup", 6))
+ continue;
- while (token != NULL) {
- if (subsys && !strcmp(token, subsys)) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
+ if (p[6] == '2') {
+ /* save cgroup v2 path */
+ strcpy(mountpoint, path);
+ continue;
}
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
+ /* now we have cgroup v1, check the options for subsystem */
+ p += 7;
+
+ p = strstr(p, subsys);
+ if (p == NULL)
+ continue;
+
+ /* sanity check: it should be separated by a space or a comma */
+ if (!strchr(" ,", p[-1]) || !strchr(" ,", p[strlen(subsys)]))
+ continue;
- if (path_v1[0] && path_v2[0])
- break;
+ strcpy(mountpoint, path);
+ break;
}
+ free(line);
fclose(fp);
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
+ if (!cached)
+ cached = calloc(1, sizeof(*cached));
+
+ if (cached) {
+ strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1);
+ strcpy(cached->mountpoint, mountpoint);
+ }
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
+ if (mountpoint[0] && strlen(mountpoint) < maxlen) {
+ strcpy(buf, mountpoint);
return 0;
}
return -1;
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 82f53d81a7a7..337fde770e45 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
@@ -10,10 +11,12 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <pthread.h>
#include <unistd.h>
#include <sys/mount.h>
#include "fs.h"
+#include "../io.h"
#include "debug-internal.h"
#define _STR(x) #x
@@ -43,7 +46,7 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-static const char * const sysfs__fs_known_mountpoints[] = {
+static const char * const sysfs__known_mountpoints[] = {
"/sys",
0,
};
@@ -86,87 +89,89 @@ static const char * const bpf_fs__known_mountpoints[] = {
};
struct fs {
- const char *name;
- const char * const *mounts;
- char path[PATH_MAX];
- bool found;
- bool checked;
- long magic;
-};
-
-enum {
- FS__SYSFS = 0,
- FS__PROCFS = 1,
- FS__DEBUGFS = 2,
- FS__TRACEFS = 3,
- FS__HUGETLBFS = 4,
- FS__BPF_FS = 5,
+ const char * const name;
+ const char * const * const mounts;
+ char *path;
+ pthread_mutex_t mount_mutex;
+ const long magic;
};
#ifndef TRACEFS_MAGIC
#define TRACEFS_MAGIC 0x74726163
#endif
-static struct fs fs__entries[] = {
- [FS__SYSFS] = {
- .name = "sysfs",
- .mounts = sysfs__fs_known_mountpoints,
- .magic = SYSFS_MAGIC,
- .checked = false,
- },
- [FS__PROCFS] = {
- .name = "proc",
- .mounts = procfs__known_mountpoints,
- .magic = PROC_SUPER_MAGIC,
- .checked = false,
- },
- [FS__DEBUGFS] = {
- .name = "debugfs",
- .mounts = debugfs__known_mountpoints,
- .magic = DEBUGFS_MAGIC,
- .checked = false,
- },
- [FS__TRACEFS] = {
- .name = "tracefs",
- .mounts = tracefs__known_mountpoints,
- .magic = TRACEFS_MAGIC,
- .checked = false,
- },
- [FS__HUGETLBFS] = {
- .name = "hugetlbfs",
- .mounts = hugetlbfs__known_mountpoints,
- .magic = HUGETLBFS_MAGIC,
- .checked = false,
- },
- [FS__BPF_FS] = {
- .name = "bpf",
- .mounts = bpf_fs__known_mountpoints,
- .magic = BPF_FS_MAGIC,
- .checked = false,
- },
-};
+static void fs__init_once(struct fs *fs);
+static const char *fs__mountpoint(const struct fs *fs);
+static const char *fs__mount(struct fs *fs);
+
+#define FS(lower_name, fs_name, upper_name) \
+static struct fs fs__##lower_name = { \
+ .name = #fs_name, \
+ .mounts = lower_name##__known_mountpoints, \
+ .magic = upper_name##_MAGIC, \
+ .mount_mutex = PTHREAD_MUTEX_INITIALIZER, \
+}; \
+ \
+static void lower_name##_init_once(void) \
+{ \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ fs__init_once(fs); \
+} \
+ \
+const char *lower_name##__mountpoint(void) \
+{ \
+ static pthread_once_t init_once = PTHREAD_ONCE_INIT; \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ pthread_once(&init_once, lower_name##_init_once); \
+ return fs__mountpoint(fs); \
+} \
+ \
+const char *lower_name##__mount(void) \
+{ \
+ const char *mountpoint = lower_name##__mountpoint(); \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ if (mountpoint) \
+ return mountpoint; \
+ \
+ return fs__mount(fs); \
+} \
+ \
+bool lower_name##__configured(void) \
+{ \
+ return lower_name##__mountpoint() != NULL; \
+}
+
+FS(sysfs, sysfs, SYSFS);
+FS(procfs, procfs, PROC_SUPER);
+FS(debugfs, debugfs, DEBUGFS);
+FS(tracefs, tracefs, TRACEFS);
+FS(hugetlbfs, hugetlbfs, HUGETLBFS);
+FS(bpf_fs, bpf, BPF_FS);
static bool fs__read_mounts(struct fs *fs)
{
- bool found = false;
char type[100];
FILE *fp;
+ char path[PATH_MAX + 1];
fp = fopen("/proc/mounts", "r");
if (fp == NULL)
- return NULL;
+ return false;
- while (!found &&
- fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
- fs->path, type) == 2) {
+ while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ path, type) == 2) {
- if (strcmp(type, fs->name) == 0)
- found = true;
+ if (strcmp(type, fs->name) == 0) {
+ fs->path = strdup(path);
+ fclose(fp);
+ return fs->path != NULL;
+ }
}
-
fclose(fp);
- fs->checked = true;
- return fs->found = found;
+ return false;
}
static int fs__valid_mount(const char *fs, long magic)
@@ -188,8 +193,9 @@ static bool fs__check_mounts(struct fs *fs)
ptr = fs->mounts;
while (*ptr) {
if (fs__valid_mount(*ptr, fs->magic) == 0) {
- fs->found = true;
- strcpy(fs->path, *ptr);
+ fs->path = strdup(*ptr);
+ if (!fs->path)
+ return false;
return true;
}
ptr++;
@@ -227,43 +233,26 @@ static bool fs__env_override(struct fs *fs)
if (!override_path)
return false;
- fs->found = true;
- fs->checked = true;
- strncpy(fs->path, override_path, sizeof(fs->path) - 1);
- fs->path[sizeof(fs->path) - 1] = '\0';
+ fs->path = strdup(override_path);
+ if (!fs->path)
+ return false;
return true;
}
-static const char *fs__get_mountpoint(struct fs *fs)
+static void fs__init_once(struct fs *fs)
{
- if (fs__env_override(fs))
- return fs->path;
-
- if (fs__check_mounts(fs))
- return fs->path;
-
- if (fs__read_mounts(fs))
- return fs->path;
-
- return NULL;
+ if (!fs__env_override(fs) &&
+ !fs__check_mounts(fs) &&
+ !fs__read_mounts(fs)) {
+ assert(!fs->path);
+ } else {
+ assert(fs->path);
+ }
}
-static const char *fs__mountpoint(int idx)
+static const char *fs__mountpoint(const struct fs *fs)
{
- struct fs *fs = &fs__entries[idx];
-
- if (fs->found)
- return (const char *)fs->path;
-
- /* the mount point was already checked for the mount point
- * but and did not exist, so return NULL to avoid scanning again.
- * This makes the found and not found paths cost equivalent
- * in case of multiple calls.
- */
- if (fs->checked)
- return NULL;
-
- return fs__get_mountpoint(fs);
+ return fs->path;
}
static const char *mount_overload(struct fs *fs)
@@ -278,45 +267,29 @@ static const char *mount_overload(struct fs *fs)
return getenv(upper_name) ?: *fs->mounts;
}
-static const char *fs__mount(int idx)
+static const char *fs__mount(struct fs *fs)
{
- struct fs *fs = &fs__entries[idx];
const char *mountpoint;
- if (fs__mountpoint(idx))
- return (const char *)fs->path;
+ pthread_mutex_lock(&fs->mount_mutex);
- mountpoint = mount_overload(fs);
-
- if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
- return NULL;
+ /* Check if path found inside the mutex to avoid races with other callers of mount. */
+ mountpoint = fs__mountpoint(fs);
+ if (mountpoint)
+ goto out;
- return fs__check_mounts(fs) ? fs->path : NULL;
-}
+ mountpoint = mount_overload(fs);
-#define FS(name, idx) \
-const char *name##__mountpoint(void) \
-{ \
- return fs__mountpoint(idx); \
-} \
- \
-const char *name##__mount(void) \
-{ \
- return fs__mount(idx); \
-} \
- \
-bool name##__configured(void) \
-{ \
- return name##__mountpoint() != NULL; \
+ if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 &&
+ fs__valid_mount(mountpoint, fs->magic) == 0) {
+ fs->path = strdup(mountpoint);
+ mountpoint = fs->path;
+ }
+out:
+ pthread_mutex_unlock(&fs->mount_mutex);
+ return mountpoint;
}
-FS(sysfs, FS__SYSFS);
-FS(procfs, FS__PROCFS);
-FS(debugfs, FS__DEBUGFS);
-FS(tracefs, FS__TRACEFS);
-FS(hugetlbfs, FS__HUGETLBFS);
-FS(bpf_fs, FS__BPF_FS);
-
int filename__read_int(const char *filename, int *value)
{
char line[64];
@@ -372,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return filename__read_ull_base(filename, value, 0);
}
-#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
-
int filename__read_str(const char *filename, char **buf, size_t *sizep)
{
- size_t size = 0, alloc_size = 0;
- void *bf = NULL, *nbf;
- int fd, n, err = 0;
- char sbuf[STRERR_BUFSIZE];
+ struct io io;
+ char bf[128];
+ int err;
- fd = open(filename, O_RDONLY);
- if (fd < 0)
+ io.fd = open(filename, O_RDONLY);
+ if (io.fd < 0)
return -errno;
-
- do {
- if (size == alloc_size) {
- alloc_size += BUFSIZ;
- nbf = realloc(bf, alloc_size);
- if (!nbf) {
- err = -ENOMEM;
- break;
- }
-
- bf = nbf;
- }
-
- n = read(fd, bf + size, alloc_size - size);
- if (n < 0) {
- if (size) {
- pr_warn("read failed %d: %s\n", errno,
- strerror_r(errno, sbuf, sizeof(sbuf)));
- err = 0;
- } else
- err = -errno;
-
- break;
- }
-
- size += n;
- } while (n > 0);
-
- if (!err) {
- *sizep = size;
- *buf = bf;
+ io__init(&io, io.fd, bf, sizeof(bf));
+ *buf = NULL;
+ err = io__getdelim(&io, buf, sizep, /*delim=*/-1);
+ if (err < 0) {
+ free(*buf);
+ *buf = NULL;
} else
- free(bf);
-
- close(fd);
+ err = 0;
+ close(io.fd);
return err;
}
@@ -503,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
int sysfs__read_bool(const char *entry, bool *value)
{
- char *buf;
- size_t size;
- int ret;
+ struct io io;
+ char bf[16];
+ int ret = 0;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
- ret = sysfs__read_str(entry, &buf, &size);
- if (ret < 0)
- return ret;
+ if (!sysfs)
+ return -1;
- switch (buf[0]) {
+ snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+ io.fd = open(path, O_RDONLY);
+ if (io.fd < 0)
+ return -errno;
+
+ io__init(&io, io.fd, bf, sizeof(bf));
+ switch (io__get_char(&io)) {
case '1':
case 'y':
case 'Y':
@@ -525,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value)
default:
ret = -1;
}
-
- free(buf);
+ close(io.fd);
return ret;
}
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 5afb11b30fca..30745f35d0d2 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,17 +13,12 @@
#include "tracing_path.h"
-static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
-static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
-static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
+static char tracing_path[PATH_MAX] = "/sys/kernel/tracing";
static void __tracing_path_set(const char *tracing, const char *mountpoint)
{
- snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
mountpoint, tracing);
- snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
- mountpoint, tracing, "events");
}
static const char *tracing_path_tracefs_mount(void)
@@ -113,6 +108,22 @@ DIR *tracing_events__opendir(void)
return dir;
}
+int tracing_events__scandir_alphasort(struct dirent ***namelist)
+{
+ char *path = get_tracing_file("events");
+ int ret;
+
+ if (!path) {
+ *namelist = NULL;
+ return 0;
+ }
+
+ ret = scandir(path, namelist, NULL, alphasort);
+ put_events_file(path);
+
+ return ret;
+}
+
int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
const char *sys, const char *name)
{
@@ -133,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
/* sdt markers */
if (!strncmp(filename, "sdt_", 4)) {
snprintf(buf, size,
- "Error:\tFile %s/%s not found.\n"
+ "Error:\tFile %s/events/%s not found.\n"
"Hint:\tSDT event cannot be directly recorded on.\n"
"\tPlease first use 'perf probe %s:%s' before recording it.\n",
- tracing_events_path, filename, sys, name);
+ tracing_path, filename, sys, name);
} else {
snprintf(buf, size,
- "Error:\tFile %s/%s not found.\n"
+ "Error:\tFile %s/events/%s not found.\n"
"Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
- tracing_events_path, filename);
+ tracing_path, filename);
}
break;
}
@@ -153,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
break;
case EACCES: {
snprintf(buf, size,
- "Error:\tNo permissions to read %s/%s\n"
+ "Error:\tNo permissions to read %s/events/%s\n"
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
- tracing_events_path, filename, tracing_path_mount());
+ tracing_path, filename, tracing_path_mount());
}
break;
default:
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index a19136b086dc..fc6347c11deb 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -6,6 +6,7 @@
#include <dirent.h>
DIR *tracing_events__opendir(void);
+int tracing_events__scandir_alphasort(struct dirent ***namelist);
void tracing_path_set(const char *mountpoint);
const char *tracing_path_mount(void);
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index 777c20f6b604..84adf8102018 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -7,8 +7,12 @@
#ifndef __API_IO__
#define __API_IO__
+#include <errno.h>
+#include <poll.h>
#include <stdlib.h>
+#include <string.h>
#include <unistd.h>
+#include <linux/types.h>
struct io {
/* File descriptor being read/ */
@@ -21,6 +25,8 @@ struct io {
char *end;
/* Currently accessed data pointer. */
char *data;
+ /* Read timeout, 0 implies no timeout. */
+ int timeout_ms;
/* Set true on when the end of file on read error. */
bool eof;
};
@@ -33,6 +39,7 @@ static inline void io__init(struct io *io, int fd,
io->buf = buf;
io->end = buf;
io->data = buf;
+ io->timeout_ms = 0;
io->eof = false;
}
@@ -45,7 +52,29 @@ static inline int io__get_char(struct io *io)
return -1;
if (ptr == io->end) {
- ssize_t n = read(io->fd, io->buf, io->buf_len);
+ ssize_t n;
+
+ if (io->timeout_ms != 0) {
+ struct pollfd pfds[] = {
+ {
+ .fd = io->fd,
+ .events = POLLIN,
+ },
+ };
+
+ n = poll(pfds, 1, io->timeout_ms);
+ if (n == 0)
+ errno = ETIMEDOUT;
+ if (n > 0 && !(pfds[0].revents & POLLIN)) {
+ errno = EIO;
+ n = -1;
+ }
+ if (n <= 0) {
+ io->eof = true;
+ return -1;
+ }
+ }
+ n = read(io->fd, io->buf, io->buf_len);
if (n <= 0) {
io->eof = true;
@@ -112,4 +141,53 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
}
}
+/* Read up to and including the first delim. */
+static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim)
+{
+ char buf[128];
+ int buf_pos = 0;
+ char *line = NULL, *temp;
+ size_t line_len = 0;
+ int ch = 0;
+
+ /* TODO: reuse previously allocated memory. */
+ free(*line_out);
+ while (ch != delim) {
+ ch = io__get_char(io);
+
+ if (ch < 0)
+ break;
+
+ if (buf_pos == sizeof(buf)) {
+ temp = realloc(line, line_len + sizeof(buf));
+ if (!temp)
+ goto err_out;
+ line = temp;
+ memcpy(&line[line_len], buf, sizeof(buf));
+ line_len += sizeof(buf);
+ buf_pos = 0;
+ }
+ buf[buf_pos++] = (char)ch;
+ }
+ temp = realloc(line, line_len + buf_pos + 1);
+ if (!temp)
+ goto err_out;
+ line = temp;
+ memcpy(&line[line_len], buf, buf_pos);
+ line[line_len + buf_pos] = '\0';
+ line_len += buf_pos;
+ *line_out = line;
+ *line_len_out = line_len;
+ return line_len;
+err_out:
+ free(line);
+ *line_out = NULL;
+ return -ENOMEM;
+}
+
+static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+{
+ return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n');
+}
+
#endif /* __API_IO__ */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index 5043747ef6c5..c3e4871967bc 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -5,9 +5,9 @@
*/
#include <linux/bitmap.h>
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+unsigned int __bitmap_weight(const unsigned long *bitmap, int bits)
{
- int k, w = 0, lim = bits/BITS_PER_LONG;
+ unsigned int k, w = 0, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; k++)
w += hweight_long(bitmap[k]);
@@ -28,11 +28,11 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
dst[k] = bitmap1[k] | bitmap2[k];
}
-size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits,
char *buf, size_t size)
{
/* current bit is 'cur', most recently seen range is [rbot, rtop] */
- int cur, rbot, rtop;
+ unsigned int cur, rbot, rtop;
bool first = true;
size_t ret = 0;
@@ -57,7 +57,7 @@ size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
return ret;
}
-int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k;
@@ -72,17 +72,31 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
return result != 0;
}
-int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int bits)
+bool __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] != bitmap2[k])
- return 0;
+ return false;
if (bits % BITS_PER_LONG)
if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
- return 0;
+ return false;
- return 1;
+ return true;
+}
+
+bool __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ unsigned int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & bitmap2[k])
+ return true;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return true;
+ return false;
}
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 8a81b3679d2b..0da84cb9e66d 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-libbpf_version.h
libbpf.pc
-FEATURE-DUMP.libbpf
libbpf.so.*
TAGS
tags
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 190366d05588..b6619199a706 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
- netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o
+ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
+ usdt.o zip.o elf.o features.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 5f9abed3e226..2cf892774346 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -2,12 +2,14 @@
# Most of this file is copied from tools/lib/traceevent/Makefile
RM ?= rm
-srctree = $(abs_srctree)
+srctree := $(realpath $(srctree))
+VERSION_SCRIPT := libbpf.map
LIBBPF_VERSION := $(shell \
- grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
sort -rV | head -n1 | cut -d'_' -f2)
-LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MAJOR_VERSION := $(word 1,$(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MINOR_VERSION := $(word 2,$(subst ., ,$(LIBBPF_VERSION)))
MAKEFLAGS += --no-print-directory
@@ -58,28 +60,8 @@ ifndef VERBOSE
VERBOSE = 0
endif
-FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf zlib bpf
-FEATURE_DISPLAY = libelf zlib bpf
-
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
-FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
-
-check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help
-ifdef MAKECMDGOALS
-ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
- check_feat := 0
-endif
-endif
-
-ifeq ($(check_feat),1)
-ifeq ($(FEATURES_DUMP),)
-include $(srctree)/tools/build/Makefile.feature
-else
-include $(FEATURES_DUMP)
-endif
-endif
+INCLUDES = -I$(or $(OUTPUT),.) \
+ -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
export prefix libdir src obj
@@ -102,11 +84,13 @@ else
endif
# Append required CFLAGS
+override CFLAGS += -std=gnu89
override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+override CFLAGS += $(CLANG_CROSS_FLAGS)
# flags specific for shared library
SHLIB_FLAGS := -DSHARED -fPIC
@@ -131,8 +115,8 @@ SHARED_OBJDIR := $(OUTPUT)sharedobjs/
STATIC_OBJDIR := $(OUTPUT)staticobjs/
BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
-VERSION_SCRIPT := libbpf.map
BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h
+BPF_GENERATED := $(BPF_HELPER_DEFS)
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -143,10 +127,11 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags)
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
sed 's/\[.*\]//' | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+ sed 's/\[.*\]//' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -156,37 +141,31 @@ all: fixdep
all_cmd: $(CMD_TARGETS) check
-$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
+$(BPF_IN_SHARED): force $(BPF_GENERATED)
@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
(diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true
- @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \
- (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \
- echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true
- @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
- (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
- echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
@(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
(diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
-$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
+$(BPF_IN_STATIC): force $(BPF_GENERATED)
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
- $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+ $(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
- $(QUIET_LINK)$(CC) $(LDFLAGS) \
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
+ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@@ -199,9 +178,9 @@ $(OUTPUT)libbpf.pc:
-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
< libbpf.pc.template > $@
-check: check_abi
+check: check_abi check_version
-check_abi: $(OUTPUT)libbpf.so
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
@@ -214,7 +193,8 @@ check_abi: $(OUTPUT)libbpf.so
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
+ sed 's/\[.*\]//' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
@@ -224,6 +204,21 @@ check_abi: $(OUTPUT)libbpf.so
exit 1; \
fi
+HDR_MAJ_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3)
+HDR_MIN_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3)
+
+check_version: $(VERSION_SCRIPT) libbpf_version.h
+ @if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then \
+ echo "Error: libbpf major version mismatch detected: " \
+ "'$(HDR_MAJ_VERSION)' != '$(LIBBPF_MAJOR_VERSION)'" >&2; \
+ exit 1; \
+ fi
+ @if [ "$(HDR_MIN_VERSION)" != "$(LIBBPF_MINOR_VERSION)" ]; then \
+ echo "Error: libbpf minor version mismatch detected: " \
+ "'$(HDR_MIN_VERSION)' != '$(LIBBPF_MINOR_VERSION)'" >&2; \
+ exit 1; \
+ fi
+
define do_install_mkdir
if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
@@ -234,7 +229,7 @@ define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
fi; \
- $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef
install_lib: all_cmd
@@ -242,19 +237,25 @@ install_lib: all_cmd
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
-install_headers: $(BPF_HELPER_DEFS)
- $(call QUIET_INSTALL, headers) \
- $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,btf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
- $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
- $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
- $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \
+ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \
+ skel_internal.h libbpf_version.h usdt.bpf.h
+GEN_HDRS := $(BPF_GENERATED)
+
+INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf
+INSTALL_SRC_HDRS := $(addprefix $(INSTALL_PFX)/, $(SRC_HDRS))
+INSTALL_GEN_HDRS := $(addprefix $(INSTALL_PFX)/, $(notdir $(GEN_HDRS)))
+
+$(INSTALL_SRC_HDRS): $(INSTALL_PFX)/%.h: %.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(prefix)/include/bpf,644)
+
+$(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(prefix)/include/bpf,644)
+
+install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS)
+ $(call QUIET_INSTALL, libbpf_headers)
install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
@@ -262,34 +263,16 @@ install_pkgconfig: $(PC_FILE)
install: install_lib install_pkgconfig install_headers
-### Cleaning rules
-
-config-clean:
- $(call QUIET_CLEAN, feature-detect)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
-
-clean: config-clean
+clean:
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
- *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
+ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR) \
$(addprefix $(OUTPUT), \
*.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc)
- $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
-
-
-PHONY += force elfdep zdep bpfdep cscope tags
+PHONY += force cscope tags check check_abi check_version
force:
-elfdep:
- @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
-
-zdep:
- @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
-
-bpfdep:
- @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
-
cscope:
ls *.c *.h > cscope.files
cscope -b -q -I $(srctree)/include -f cscope.out
@@ -304,3 +287,20 @@ tags:
# Delete partially updated (corrupted) files on error
.DELETE_ON_ERROR:
+
+help:
+ @echo 'libbpf common targets:'
+ @echo ' HINT: use "V=1" to enable verbose build'
+ @echo ' all - build libraries and pkgconfig'
+ @echo ' clean - remove all generated files'
+ @echo ' check - check ABI and version info'
+ @echo ''
+ @echo 'libbpf install targets:'
+ @echo ' HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")'
+ @echo ' to adjust target destination, e.g. "make prefix=/usr/local install"'
+ @echo ' install - build and install all headers, libraries and pkgconfig'
+ @echo ' install_headers - install only headers to include/bpf'
+ @echo ''
+ @echo 'libbpf make targets:'
+ @echo ' tags - use ctags to make tag information for source code browsing'
+ @echo ' cscope - use cscope to make interactive source code browsing database'
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
deleted file mode 100644
index 8928f7787f2d..000000000000
--- a/tools/lib/bpf/README.rst
+++ /dev/null
@@ -1,168 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libbpf API naming convention
-============================
-
-libbpf API provides access to a few logically separated groups of
-functions and types. Every group has its own naming convention
-described here. It's recommended to follow these conventions whenever a
-new function or type is added to keep libbpf API clean and consistent.
-
-All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``perf_buffer_``.
-
-System call wrappers
---------------------
-
-System call wrappers are simple wrappers for commands supported by
-sys_bpf system call. These wrappers should go to ``bpf.h`` header file
-and map one-on-one to corresponding commands.
-
-For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
-command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
-
-Objects
--------
-
-Another class of types and functions provided by libbpf API is "objects"
-and functions to work with them. Objects are high-level abstractions
-such as BPF program or BPF map. They're represented by corresponding
-structures such as ``struct bpf_object``, ``struct bpf_program``,
-``struct bpf_map``, etc.
-
-Structures are forward declared and access to their fields should be
-provided via corresponding getters and setters rather than directly.
-
-These objects are associated with corresponding parts of ELF object that
-contains compiled BPF programs.
-
-For example ``struct bpf_object`` represents ELF object itself created
-from an ELF file or from a buffer, ``struct bpf_program`` represents a
-program in ELF object and ``struct bpf_map`` is a map.
-
-Functions that work with an object have names built from object name,
-double underscore and part that describes function purpose.
-
-For example ``bpf_object__open`` consists of the name of corresponding
-object, ``bpf_object``, double underscore and ``open`` that defines the
-purpose of the function to open ELF file and create ``bpf_object`` from
-it.
-
-Another example: ``bpf_program__load`` is named for corresponding
-object, ``bpf_program``, that is separated from other part of the name
-by double underscore.
-
-All objects and corresponding functions other than BTF related should go
-to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
-
-Auxiliary functions
--------------------
-
-Auxiliary functions and types that don't fit well in any of categories
-described above should have ``libbpf_`` prefix, e.g.
-``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
-
-AF_XDP functions
--------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
-Please take a look at Documentation/networking/af_xdp.rst in the Linux
-kernel source tree on how to use XDP sockets and for some common
-mistakes in case you do not get any traffic up to user space.
-
-libbpf ABI
-==========
-
-libbpf can be both linked statically or used as DSO. To avoid possible
-conflicts with other libraries an application is linked with, all
-non-static libbpf symbols should have one of the prefixes mentioned in
-API documentation above. See API naming convention to choose the right
-name for a new symbol.
-
-Symbol visibility
------------------
-
-libbpf follow the model when all global symbols have visibility "hidden"
-by default and to make a symbol visible it has to be explicitly
-attributed with ``LIBBPF_API`` macro. For example:
-
-.. code-block:: c
-
- LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
-
-This prevents from accidentally exporting a symbol, that is not supposed
-to be a part of ABI what, in turn, improves both libbpf developer- and
-user-experiences.
-
-ABI versionning
----------------
-
-To make future ABI extensions possible libbpf ABI is versioned.
-Versioning is implemented by ``libbpf.map`` version script that is
-passed to linker.
-
-Version name is ``LIBBPF_`` prefix + three-component numeric version,
-starting from ``0.0.1``.
-
-Every time ABI is being changed, e.g. because a new symbol is added or
-semantic of existing symbol is changed, ABI version should be bumped.
-This bump in ABI version is at most once per kernel development cycle.
-
-For example, if current state of ``libbpf.map`` is:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
-
-, and a new symbol ``bpf_func_c`` is being introduced, then
-``libbpf.map`` should be changed like this:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
- LIBBPF_0.0.2 {
- global:
- bpf_func_c;
- } LIBBPF_0.0.1;
-
-, where new version ``LIBBPF_0.0.2`` depends on the previous
-``LIBBPF_0.0.1``.
-
-Format of version script and ways to handle ABI changes, including
-incompatible ones, described in details in [1].
-
-Stand-alone build
-=================
-
-Under https://github.com/libbpf/libbpf there is a (semi-)automated
-mirror of the mainline's version of libbpf for a stand-alone build.
-
-However, all changes to libbpf's code base must be upstreamed through
-the mainline kernel tree.
-
-License
-=======
-
-libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
-
-Links
-=====
-
-[1] https://www.akkadia.org/drepper/dsohowto.pdf
- (Chapter 3. Maintaining APIs and ABIs).
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d27e34133973..97ec005c3c47 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -28,6 +28,10 @@
#include <asm/unistd.h>
#include <errno.h>
#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <sys/resource.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"
@@ -49,6 +53,12 @@
# define __NR_bpf 351
# elif defined(__arc__)
# define __NR_bpf 280
+# elif defined(__mips__) && defined(_ABIO32)
+# define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+# define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+# define __NR_bpf 5315
# else
# error __NR_bpf not defined. libbpf does not support your arch.
# endif
@@ -65,128 +75,137 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
return syscall(__NR_bpf, cmd, attr, size);
}
-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ int fd;
+
+ fd = sys_bpf(cmd, attr, size);
+ return ensure_good_fd(fd);
+}
+
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
{
int fd;
do {
- fd = sys_bpf(BPF_PROG_LOAD, attr, size);
- } while (fd < 0 && errno == EAGAIN);
+ fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size);
+ } while (fd < 0 && errno == EAGAIN && --attempts > 0);
return fd;
}
-int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and progs. This was done in [0].
+ * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in
+ * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF.
+ *
+ * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
+ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
+ */
+int probe_memcg_account(int token_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
+ struct bpf_insn insns[] = {
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
union bpf_attr attr;
+ int prog_fd;
- memset(&attr, '\0', sizeof(attr));
-
- attr.map_type = create_attr->map_type;
- attr.key_size = create_attr->key_size;
- attr.value_size = create_attr->value_size;
- attr.max_entries = create_attr->max_entries;
- attr.map_flags = create_attr->map_flags;
- if (create_attr->name)
- memcpy(attr.map_name, create_attr->name,
- min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
- attr.numa_node = create_attr->numa_node;
- attr.btf_fd = create_attr->btf_fd;
- attr.btf_key_type_id = create_attr->btf_key_type_id;
- attr.btf_value_type_id = create_attr->btf_value_type_id;
- attr.map_ifindex = create_attr->map_ifindex;
- if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
- attr.btf_vmlinux_value_type_id =
- create_attr->btf_vmlinux_value_type_id;
- else
- attr.inner_map_fd = create_attr->inner_map_fd;
-
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ /* attempt loading freplace trying to use custom BTF */
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insn_cnt;
+ attr.license = ptr_to_u64("GPL");
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
+
+ prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
+ if (prog_fd >= 0) {
+ close(prog_fd);
+ return 1;
+ }
+ return 0;
}
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
- int key_size, int value_size, int max_entries,
- __u32 map_flags, int node)
-{
- struct bpf_create_map_attr map_attr = {};
+static bool memlock_bumped;
+static rlim_t memlock_rlim = RLIM_INFINITY;
- map_attr.name = name;
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
- if (node >= 0) {
- map_attr.numa_node = node;
- map_attr.map_flags |= BPF_F_NUMA_NODE;
- }
+int libbpf_set_memlock_rlim(size_t memlock_bytes)
+{
+ if (memlock_bumped)
+ return libbpf_err(-EBUSY);
- return bpf_create_map_xattr(&map_attr);
+ memlock_rlim = memlock_bytes;
+ return 0;
}
-int bpf_create_map(enum bpf_map_type map_type, int key_size,
- int value_size, int max_entries, __u32 map_flags)
+int bump_rlimit_memlock(void)
{
- struct bpf_create_map_attr map_attr = {};
+ struct rlimit rlim;
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
+ /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
+ if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
+ return 0;
- return bpf_create_map_xattr(&map_attr);
-}
+ memlock_bumped = true;
-int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
- int key_size, int value_size, int max_entries,
- __u32 map_flags)
-{
- struct bpf_create_map_attr map_attr = {};
+ /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+ if (memlock_rlim == 0)
+ return 0;
- map_attr.name = name;
- map_attr.map_type = map_type;
- map_attr.map_flags = map_flags;
- map_attr.key_size = key_size;
- map_attr.value_size = value_size;
- map_attr.max_entries = max_entries;
+ rlim.rlim_cur = rlim.rlim_max = memlock_rlim;
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim))
+ return -errno;
- return bpf_create_map_xattr(&map_attr);
+ return 0;
}
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
- int key_size, int inner_map_fd, int max_entries,
- __u32 map_flags, int node)
+int bpf_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ const struct bpf_map_create_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
union bpf_attr attr;
+ int fd;
- memset(&attr, '\0', sizeof(attr));
+ bump_rlimit_memlock();
+
+ memset(&attr, 0, attr_sz);
+
+ if (!OPTS_VALID(opts, bpf_map_create_opts))
+ return libbpf_err(-EINVAL);
attr.map_type = map_type;
+ if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
+ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
- attr.value_size = 4;
- attr.inner_map_fd = inner_map_fd;
+ attr.value_size = value_size;
attr.max_entries = max_entries;
- attr.map_flags = map_flags;
- if (name)
- memcpy(attr.map_name, name,
- min(strlen(name), BPF_OBJ_NAME_LEN - 1));
-
- if (node >= 0) {
- attr.map_flags |= BPF_F_NUMA_NODE;
- attr.numa_node = node;
- }
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-}
+ attr.btf_fd = OPTS_GET(opts, btf_fd, 0);
+ attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
+ attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
+ attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
+ attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0);
-int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
- int key_size, int inner_map_fd, int max_entries,
- __u32 map_flags)
-{
- return bpf_create_map_in_map_node(map_type, name, key_size,
- inner_map_fd, max_entries, map_flags,
- -1);
+ attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
+ attr.map_flags = OPTS_GET(opts, map_flags, 0);
+ attr.map_extra = OPTS_GET(opts, map_extra, 0);
+ attr.numa_node = OPTS_GET(opts, numa_node, 0);
+ attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+
+ attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+
+ fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
static void *
@@ -214,61 +233,93 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
return info;
}
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz)
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
void *finfo = NULL, *linfo = NULL;
+ const char *func_info, *line_info;
+ __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
+ __u32 func_info_rec_size, line_info_rec_size;
+ int fd, attempts;
union bpf_attr attr;
- __u32 log_level;
- int fd;
+ char *log_buf;
- if (!load_attr || !log_buf != !log_buf_sz)
- return -EINVAL;
-
- log_level = load_attr->log_level;
- if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
- return -EINVAL;
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = load_attr->prog_type;
- attr.expected_attach_type = load_attr->expected_attach_type;
- if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
- attr.prog_type == BPF_PROG_TYPE_LSM) {
- attr.attach_btf_id = load_attr->attach_btf_id;
- } else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
- attr.prog_type == BPF_PROG_TYPE_EXT) {
- attr.attach_btf_id = load_attr->attach_btf_id;
- attr.attach_prog_fd = load_attr->attach_prog_fd;
- } else {
- attr.prog_ifindex = load_attr->prog_ifindex;
- attr.kern_version = load_attr->kern_version;
- }
- attr.insn_cnt = (__u32)load_attr->insns_cnt;
- attr.insns = ptr_to_u64(load_attr->insns);
- attr.license = ptr_to_u64(load_attr->license);
+ bump_rlimit_memlock();
+
+ if (!OPTS_VALID(opts, bpf_prog_load_opts))
+ return libbpf_err(-EINVAL);
+
+ attempts = OPTS_GET(opts, attempts, 0);
+ if (attempts < 0)
+ return libbpf_err(-EINVAL);
+ if (attempts == 0)
+ attempts = PROG_LOAD_ATTEMPTS;
+
+ memset(&attr, 0, attr_sz);
+
+ attr.prog_type = prog_type;
+ attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
+
+ attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
+ attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
+ attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
+ attr.kern_version = OPTS_GET(opts, kern_version, 0);
+ attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
+
+ if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
+ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
+ attr.license = ptr_to_u64(license);
+
+ if (insn_cnt > UINT_MAX)
+ return libbpf_err(-E2BIG);
+
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)insn_cnt;
+
+ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+ attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
+
+ if (attach_prog_fd && attach_btf_obj_fd)
+ return libbpf_err(-EINVAL);
+
+ attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
+ if (attach_prog_fd)
+ attr.attach_prog_fd = attach_prog_fd;
+ else
+ attr.attach_btf_obj_fd = attach_btf_obj_fd;
+
+ log_buf = OPTS_GET(opts, log_buf, NULL);
+ log_size = OPTS_GET(opts, log_size, 0);
+ log_level = OPTS_GET(opts, log_level, 0);
+
+ if (!!log_buf != !!log_size)
+ return libbpf_err(-EINVAL);
+
+ func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
+ func_info = OPTS_GET(opts, func_info, NULL);
+ attr.func_info_rec_size = func_info_rec_size;
+ attr.func_info = ptr_to_u64(func_info);
+ attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0);
+
+ line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
+ line_info = OPTS_GET(opts, line_info, NULL);
+ attr.line_info_rec_size = line_info_rec_size;
+ attr.line_info = ptr_to_u64(line_info);
+ attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
+
+ attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
- attr.log_level = log_level;
if (log_level) {
attr.log_buf = ptr_to_u64(log_buf);
- attr.log_size = log_buf_sz;
- } else {
- attr.log_buf = ptr_to_u64(NULL);
- attr.log_size = 0;
+ attr.log_size = log_size;
+ attr.log_level = log_level;
}
- attr.prog_btf_fd = load_attr->prog_btf_fd;
- attr.func_info_rec_size = load_attr->func_info_rec_size;
- attr.func_info_cnt = load_attr->func_info_cnt;
- attr.func_info = ptr_to_u64(load_attr->func_info);
- attr.line_info_rec_size = load_attr->line_info_rec_size;
- attr.line_info_cnt = load_attr->line_info_cnt;
- attr.line_info = ptr_to_u64(load_attr->line_info);
- if (load_attr->name)
- memcpy(attr.prog_name, load_attr->name,
- min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
- attr.prog_flags = load_attr->prog_flags;
-
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
if (fd >= 0)
return fd;
@@ -278,177 +329,195 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
*/
while (errno == E2BIG && (!finfo || !linfo)) {
if (!finfo && attr.func_info_cnt &&
- attr.func_info_rec_size < load_attr->func_info_rec_size) {
+ attr.func_info_rec_size < func_info_rec_size) {
/* try with corrected func info records */
- finfo = alloc_zero_tailing_info(load_attr->func_info,
- load_attr->func_info_cnt,
- load_attr->func_info_rec_size,
+ finfo = alloc_zero_tailing_info(func_info,
+ attr.func_info_cnt,
+ func_info_rec_size,
attr.func_info_rec_size);
- if (!finfo)
+ if (!finfo) {
+ errno = E2BIG;
goto done;
+ }
attr.func_info = ptr_to_u64(finfo);
- attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_rec_size = func_info_rec_size;
} else if (!linfo && attr.line_info_cnt &&
- attr.line_info_rec_size <
- load_attr->line_info_rec_size) {
- linfo = alloc_zero_tailing_info(load_attr->line_info,
- load_attr->line_info_cnt,
- load_attr->line_info_rec_size,
+ attr.line_info_rec_size < line_info_rec_size) {
+ linfo = alloc_zero_tailing_info(line_info,
+ attr.line_info_cnt,
+ line_info_rec_size,
attr.line_info_rec_size);
- if (!linfo)
+ if (!linfo) {
+ errno = E2BIG;
goto done;
+ }
attr.line_info = ptr_to_u64(linfo);
- attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_rec_size = line_info_rec_size;
} else {
break;
}
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
-
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
if (fd >= 0)
goto done;
}
- if (log_level || !log_buf)
- goto done;
+ if (log_level == 0 && log_buf) {
+ /* log_level == 0 with non-NULL log_buf requires retrying on error
+ * with log_level == 1 and log_buf/log_buf_size set, to get details of
+ * failure
+ */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_size;
+ attr.log_level = 1;
- /* Try again with log */
- attr.log_buf = ptr_to_u64(log_buf);
- attr.log_size = log_buf_sz;
- attr.log_level = 1;
- log_buf[0] = 0;
- fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
+ OPTS_SET(opts, log_true_size, attr.log_true_size);
+ }
done:
+ /* free() doesn't affect errno, so we don't need to restore it */
free(finfo);
free(linfo);
- return fd;
+ return libbpf_err_errno(fd);
}
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags)
{
- struct bpf_load_program_attr load_attr;
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
+ union bpf_attr attr;
+ int ret;
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = type;
- load_attr.expected_attach_type = 0;
- load_attr.name = NULL;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.kern_version = kern_version;
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+ ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, __u32 prog_flags, const char *license,
- __u32 kern_version, char *log_buf, size_t log_buf_sz,
- int log_level)
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = type;
- attr.insn_cnt = (__u32)insns_cnt;
- attr.insns = ptr_to_u64(insns);
- attr.license = ptr_to_u64(license);
- attr.log_buf = ptr_to_u64(log_buf);
- attr.log_size = log_buf_sz;
- attr.log_level = log_level;
- log_buf[0] = 0;
- attr.kern_version = kern_version;
- attr.prog_flags = prog_flags;
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
- return sys_bpf_prog_load(&attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_map_update_elem(int fd, const void *key, const void *value,
- __u64 flags)
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_map_lookup_elem(int fd, const void *key, void *value)
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+int bpf_map_delete_elem(int fd, const void *key)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
- attr.value = ptr_to_u64(value);
- return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_map_delete_elem(int fd, const void *key)
+int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
+ attr.flags = flags;
- return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
int bpf_map_get_next_key(int fd, const void *key, void *next_key)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, next_key);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.next_key = ptr_to_u64(next_key);
- return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
int bpf_map_freeze(int fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
- return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
@@ -456,13 +525,14 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
__u32 *count,
const struct bpf_map_batch_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, batch);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_map_batch_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.batch.map_fd = fd;
attr.batch.in_batch = ptr_to_u64(in_batch);
attr.batch.out_batch = ptr_to_u64(out_batch);
@@ -472,17 +542,17 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
attr.batch.flags = OPTS_GET(opts, flags, 0);
- ret = sys_bpf(cmd, &attr, sizeof(attr));
+ ret = sys_bpf(cmd, &attr, attr_sz);
*count = attr.batch.count;
- return ret;
+ return libbpf_err_errno(ret);
}
-int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
- NULL, keys, NULL, count, opts);
+ NULL, (void *)keys, NULL, count, opts);
}
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
@@ -502,32 +572,58 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
count, opts);
}
-int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
- keys, values, count, opts);
+ (void *)keys, (void *)values, count, opts);
}
-int bpf_obj_pin(int fd, const char *pathname)
+int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
union bpf_attr attr;
+ int ret;
+
+ if (!OPTS_VALID(opts, bpf_obj_pin_opts))
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
+ attr.path_fd = OPTS_GET(opts, path_fd, 0);
attr.pathname = ptr_to_u64((void *)pathname);
+ attr.file_flags = OPTS_GET(opts, file_flags, 0);
attr.bpf_fd = fd;
- return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
+ return libbpf_err_errno(ret);
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ return bpf_obj_pin_opts(fd, pathname, NULL);
}
int bpf_obj_get(const char *pathname)
{
+ return bpf_obj_get_opts(pathname, NULL);
+}
+
+int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_obj_get_opts))
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
+ attr.path_fd = OPTS_GET(opts, path_fd, 0);
attr.pathname = ptr_to_u64((void *)pathname);
+ attr.file_flags = OPTS_GET(opts, file_flags, 0);
- return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -537,203 +633,358 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
.flags = flags,
);
- return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+ return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
}
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts)
+int bpf_prog_attach_opts(int prog_fd, int target, enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
+ __u32 relative_id, flags;
+ int ret, relative_fd;
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
- return -EINVAL;
-
- memset(&attr, 0, sizeof(attr));
- attr.target_fd = target_fd;
- attr.attach_bpf_fd = prog_fd;
- attr.attach_type = type;
- attr.attach_flags = OPTS_GET(opts, flags, 0);
- attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
+ return libbpf_err(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.target_fd = target;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.replace_bpf_fd = OPTS_GET(opts, replace_fd, 0);
+ attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
+
+ if (relative_id) {
+ attr.attach_flags = flags | BPF_F_ID;
+ attr.relative_id = relative_id;
+ } else {
+ attr.attach_flags = flags;
+ attr.relative_fd = relative_fd;
+ }
- return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+int bpf_prog_detach_opts(int prog_fd, int target, enum bpf_attach_type type,
+ const struct bpf_prog_detach_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
+ __u32 relative_id, flags;
+ int ret, relative_fd;
union bpf_attr attr;
- memset(&attr, 0, sizeof(attr));
- attr.target_fd = target_fd;
- attr.attach_type = type;
+ if (!OPTS_VALID(opts, bpf_prog_detach_opts))
+ return libbpf_err(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+ flags = OPTS_GET(opts, flags, 0);
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.target_fd = target;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
+
+ if (relative_id) {
+ attr.attach_flags = flags | BPF_F_ID;
+ attr.relative_id = relative_id;
+ } else {
+ attr.attach_flags = flags;
+ attr.relative_fd = relative_fd;
+ }
+
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
-int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
- union bpf_attr attr;
-
- memset(&attr, 0, sizeof(attr));
- attr.target_fd = target_fd;
- attr.attach_bpf_fd = prog_fd;
- attr.attach_type = type;
+ return bpf_prog_detach_opts(0, target_fd, type, NULL);
+}
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ return bpf_prog_detach_opts(prog_fd, target_fd, type, NULL);
}
int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts)
{
- __u32 target_btf_id, iter_info_len;
+ const size_t attr_sz = offsetofend(union bpf_attr, link_create);
+ __u32 target_btf_id, iter_info_len, relative_id;
+ int fd, err, relative_fd;
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_link_create_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
- if (iter_info_len && target_btf_id)
- return -EINVAL;
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (iter_info_len || target_btf_id) {
+ if (iter_info_len && target_btf_id)
+ return libbpf_err(-EINVAL);
+ if (!OPTS_ZEROED(opts, target_btf_id))
+ return libbpf_err(-EINVAL);
+ }
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
- if (iter_info_len) {
- attr.link_create.iter_info =
- ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
- attr.link_create.iter_info_len = iter_info_len;
- } else if (target_btf_id) {
+ if (target_btf_id) {
attr.link_create.target_btf_id = target_btf_id;
+ goto proceed;
}
- return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+ switch (attach_type) {
+ case BPF_TRACE_ITER:
+ attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ break;
+ case BPF_PERF_EVENT:
+ attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
+ if (!OPTS_ZEROED(opts, perf_event))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_TRACE_KPROBE_MULTI:
+ attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0);
+ attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0);
+ attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0));
+ attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0));
+ attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0));
+ if (!OPTS_ZEROED(opts, kprobe_multi))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_TRACE_UPROBE_MULTI:
+ attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0);
+ attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0);
+ attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0));
+ attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0));
+ attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0));
+ attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0));
+ attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0);
+ if (!OPTS_ZEROED(opts, uprobe_multi))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ case BPF_MODIFY_RETURN:
+ case BPF_LSM_MAC:
+ attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
+ if (!OPTS_ZEROED(opts, tracing))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_NETFILTER:
+ attr.link_create.netfilter.pf = OPTS_GET(opts, netfilter.pf, 0);
+ attr.link_create.netfilter.hooknum = OPTS_GET(opts, netfilter.hooknum, 0);
+ attr.link_create.netfilter.priority = OPTS_GET(opts, netfilter.priority, 0);
+ attr.link_create.netfilter.flags = OPTS_GET(opts, netfilter.flags, 0);
+ if (!OPTS_ZEROED(opts, netfilter))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_TCX_INGRESS:
+ case BPF_TCX_EGRESS:
+ relative_fd = OPTS_GET(opts, tcx.relative_fd, 0);
+ relative_id = OPTS_GET(opts, tcx.relative_id, 0);
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+ if (relative_id) {
+ attr.link_create.tcx.relative_id = relative_id;
+ attr.link_create.flags |= BPF_F_ID;
+ } else {
+ attr.link_create.tcx.relative_fd = relative_fd;
+ }
+ attr.link_create.tcx.expected_revision = OPTS_GET(opts, tcx.expected_revision, 0);
+ if (!OPTS_ZEROED(opts, tcx))
+ return libbpf_err(-EINVAL);
+ break;
+ case BPF_NETKIT_PRIMARY:
+ case BPF_NETKIT_PEER:
+ relative_fd = OPTS_GET(opts, netkit.relative_fd, 0);
+ relative_id = OPTS_GET(opts, netkit.relative_id, 0);
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+ if (relative_id) {
+ attr.link_create.netkit.relative_id = relative_id;
+ attr.link_create.flags |= BPF_F_ID;
+ } else {
+ attr.link_create.netkit.relative_fd = relative_fd;
+ }
+ attr.link_create.netkit.expected_revision = OPTS_GET(opts, netkit.expected_revision, 0);
+ if (!OPTS_ZEROED(opts, netkit))
+ return libbpf_err(-EINVAL);
+ break;
+ default:
+ if (!OPTS_ZEROED(opts, flags))
+ return libbpf_err(-EINVAL);
+ break;
+ }
+proceed:
+ fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz);
+ if (fd >= 0)
+ return fd;
+ /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
+ * and other similar programs
+ */
+ err = -errno;
+ if (err != -EINVAL)
+ return libbpf_err(err);
+
+ /* if user used features not supported by
+ * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately
+ */
+ if (attr.link_create.target_fd || attr.link_create.target_btf_id)
+ return libbpf_err(err);
+ if (!OPTS_ZEROED(opts, sz))
+ return libbpf_err(err);
+
+ /* otherwise, for few select kinds of programs that can be
+ * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as
+ * a fallback for older kernels
+ */
+ switch (attach_type) {
+ case BPF_TRACE_RAW_TP:
+ case BPF_LSM_MAC:
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ case BPF_MODIFY_RETURN:
+ return bpf_raw_tracepoint_open(NULL, prog_fd);
+ default:
+ return libbpf_err(err);
+ }
}
int bpf_link_detach(int link_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_detach);
union bpf_attr attr;
+ int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_detach.link_fd = link_fd;
- return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_update);
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_link_update_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
+
+ if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0))
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_update.link_fd = link_fd;
attr.link_update.new_prog_fd = new_prog_fd;
attr.link_update.flags = OPTS_GET(opts, flags, 0);
- attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ if (OPTS_GET(opts, old_prog_fd, 0))
+ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ else if (OPTS_GET(opts, old_map_fd, 0))
+ attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0);
- return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
+ return libbpf_err_errno(ret);
}
int bpf_iter_create(int link_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, iter_create);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.iter_create.link_fd = link_fd;
- return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
- __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+int bpf_prog_query_opts(int target, enum bpf_attach_type type,
+ struct bpf_prog_query_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, query);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
- attr.query.target_fd = target_fd;
- attr.query.attach_type = type;
- attr.query.query_flags = query_flags;
- attr.query.prog_cnt = *prog_cnt;
- attr.query.prog_ids = ptr_to_u64(prog_ids);
+ if (!OPTS_VALID(opts, bpf_prog_query_opts))
+ return libbpf_err(-EINVAL);
- ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
- if (attach_flags)
- *attach_flags = attr.query.attach_flags;
- *prog_cnt = attr.query.prog_cnt;
- return ret;
-}
+ memset(&attr, 0, attr_sz);
+ attr.query.target_fd = target;
+ attr.query.attach_type = type;
+ attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
+ attr.query.count = OPTS_GET(opts, count, 0);
+ attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
+ attr.query.link_ids = ptr_to_u64(OPTS_GET(opts, link_ids, NULL));
+ attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
+ attr.query.link_attach_flags = ptr_to_u64(OPTS_GET(opts, link_attach_flags, NULL));
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
- void *data_out, __u32 *size_out, __u32 *retval,
- __u32 *duration)
-{
- union bpf_attr attr;
- int ret;
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
- memset(&attr, 0, sizeof(attr));
- attr.test.prog_fd = prog_fd;
- attr.test.data_in = ptr_to_u64(data);
- attr.test.data_out = ptr_to_u64(data_out);
- attr.test.data_size_in = size;
- attr.test.repeat = repeat;
+ OPTS_SET(opts, attach_flags, attr.query.attach_flags);
+ OPTS_SET(opts, revision, attr.query.revision);
+ OPTS_SET(opts, count, attr.query.count);
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
- if (size_out)
- *size_out = attr.test.data_size_out;
- if (retval)
- *retval = attr.test.retval;
- if (duration)
- *duration = attr.test.duration;
- return ret;
+ return libbpf_err_errno(ret);
}
-int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
- union bpf_attr attr;
+ LIBBPF_OPTS(bpf_prog_query_opts, opts);
int ret;
- if (!test_attr->data_out && test_attr->data_size_out > 0)
- return -EINVAL;
-
- memset(&attr, 0, sizeof(attr));
- attr.test.prog_fd = test_attr->prog_fd;
- attr.test.data_in = ptr_to_u64(test_attr->data_in);
- attr.test.data_out = ptr_to_u64(test_attr->data_out);
- attr.test.data_size_in = test_attr->data_size_in;
- attr.test.data_size_out = test_attr->data_size_out;
- attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
- attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
- attr.test.ctx_size_in = test_attr->ctx_size_in;
- attr.test.ctx_size_out = test_attr->ctx_size_out;
- attr.test.repeat = test_attr->repeat;
-
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
- test_attr->data_size_out = attr.test.data_size_out;
- test_attr->ctx_size_out = attr.test.ctx_size_out;
- test_attr->retval = attr.test.retval;
- test_attr->duration = attr.test.duration;
- return ret;
+ opts.query_flags = query_flags;
+ opts.prog_ids = prog_ids;
+ opts.prog_cnt = *prog_cnt;
+
+ ret = bpf_prog_query_opts(target_fd, type, &opts);
+
+ if (attach_flags)
+ *attach_flags = opts.attach_flags;
+ *prog_cnt = opts.prog_cnt;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, test);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_test_run_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.test.prog_fd = prog_fd;
+ attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
attr.test.cpu = OPTS_GET(opts, cpu, 0);
attr.test.flags = OPTS_GET(opts, flags, 0);
attr.test.repeat = OPTS_GET(opts, repeat, 0);
@@ -747,27 +998,30 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz);
+
OPTS_SET(opts, data_size_out, attr.test.data_size_out);
OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
OPTS_SET(opts, duration, attr.test.duration);
OPTS_SET(opts, retval, attr.test.retval);
- return ret;
+
+ return libbpf_err_errno(ret);
}
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int err;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.start_id = start_id;
- err = sys_bpf(cmd, &attr, sizeof(attr));
+ err = sys_bpf(cmd, &attr, attr_sz);
if (!err)
*next_id = attr.next_id;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -790,144 +1044,274 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
}
-int bpf_prog_get_fd_by_id(__u32 id)
+int bpf_prog_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.prog_id = id;
+ attr.open_flags = OPTS_GET(opts, open_flags, 0);
- return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
-int bpf_map_get_fd_by_id(__u32 id)
+int bpf_prog_get_fd_by_id(__u32 id)
{
+ return bpf_prog_get_fd_by_id_opts(id, NULL);
+}
+
+int bpf_map_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
attr.map_id = id;
+ attr.open_flags = OPTS_GET(opts, open_flags, 0);
- return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
-int bpf_btf_get_fd_by_id(__u32 id)
+int bpf_map_get_fd_by_id(__u32 id)
{
+ return bpf_map_get_fd_by_id_opts(id, NULL);
+}
+
+int bpf_btf_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
attr.btf_id = id;
+ attr.open_flags = OPTS_GET(opts, open_flags, 0);
- return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
-int bpf_link_get_fd_by_id(__u32 id)
+int bpf_btf_get_fd_by_id(__u32 id)
{
+ return bpf_btf_get_fd_by_id_opts(id, NULL);
+}
+
+int bpf_link_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
attr.link_id = id;
+ attr.open_flags = OPTS_GET(opts, open_flags, 0);
- return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
+ return libbpf_err_errno(fd);
+}
+
+int bpf_link_get_fd_by_id(__u32 id)
+{
+ return bpf_link_get_fd_by_id_opts(id, NULL);
}
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
union bpf_attr attr;
int err;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.info.bpf_fd = bpf_fd;
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);
- err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
if (!err)
*info_len = attr.info.info_len;
+ return libbpf_err_errno(err);
+}
- return err;
+int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len)
+{
+ return bpf_obj_get_info_by_fd(prog_fd, info, info_len);
+}
+
+int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len)
+{
+ return bpf_obj_get_info_by_fd(map_fd, info, info_len);
+}
+
+int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len)
+{
+ return bpf_obj_get_info_by_fd(btf_fd, info, info_len);
+}
+
+int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len)
+{
+ return bpf_obj_get_info_by_fd(link_fd, info, info_len);
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
- return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
- bool do_log)
+int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
{
- union bpf_attr attr = {};
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
+ union bpf_attr attr;
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
int fd;
- attr.btf = ptr_to_u64(btf);
+ bump_rlimit_memlock();
+
+ memset(&attr, 0, attr_sz);
+
+ if (!OPTS_VALID(opts, bpf_btf_load_opts))
+ return libbpf_err(-EINVAL);
+
+ log_buf = OPTS_GET(opts, log_buf, NULL);
+ log_size = OPTS_GET(opts, log_size, 0);
+ log_level = OPTS_GET(opts, log_level, 0);
+
+ if (log_size > UINT_MAX)
+ return libbpf_err(-EINVAL);
+ if (log_size && !log_buf)
+ return libbpf_err(-EINVAL);
+
+ attr.btf = ptr_to_u64(btf_data);
attr.btf_size = btf_size;
-retry:
- if (do_log && log_buf && log_buf_size) {
- attr.btf_log_level = 1;
- attr.btf_log_size = log_buf_size;
+ attr.btf_flags = OPTS_GET(opts, btf_flags, 0);
+ attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
+
+ /* log_level == 0 and log_buf != NULL means "try loading without
+ * log_buf, but retry with log_buf and log_level=1 on error", which is
+ * consistent across low-level and high-level BTF and program loading
+ * APIs within libbpf and provides a sensible behavior in practice
+ */
+ if (log_level) {
attr.btf_log_buf = ptr_to_u64(log_buf);
+ attr.btf_log_size = (__u32)log_size;
+ attr.btf_log_level = log_level;
}
- fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
- if (fd == -1 && !do_log && log_buf && log_buf_size) {
- do_log = true;
- goto retry;
+ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
+ if (fd < 0 && log_buf && log_level == 0) {
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ attr.btf_log_size = (__u32)log_size;
+ attr.btf_log_level = 1;
+ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
}
- return fd;
+ OPTS_SET(opts, log_true_size, attr.btf_log_true_size);
+ return libbpf_err_errno(fd);
}
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr)
{
- union bpf_attr attr = {};
+ const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query);
+ union bpf_attr attr;
int err;
+ memset(&attr, 0, attr_sz);
attr.task_fd_query.pid = pid;
attr.task_fd_query.fd = fd;
attr.task_fd_query.flags = flags;
attr.task_fd_query.buf = ptr_to_u64(buf);
attr.task_fd_query.buf_len = *buf_len;
- err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz);
+
*buf_len = attr.task_fd_query.buf_len;
*prog_id = attr.task_fd_query.prog_id;
*fd_type = attr.task_fd_query.fd_type;
*probe_offset = attr.task_fd_query.probe_offset;
*probe_addr = attr.task_fd_query.probe_addr;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_enable_stats(enum bpf_stats_type type)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, enable_stats);
union bpf_attr attr;
+ int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.enable_stats.type = type;
- return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
int bpf_prog_bind_map(int prog_fd, int map_fd,
const struct bpf_prog_bind_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map);
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_prog_bind_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.prog_bind_map.prog_fd = prog_fd;
attr.prog_bind_map.map_fd = map_fd;
attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
- return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
+ return libbpf_err_errno(ret);
+}
+
+int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, token_create);
+ union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_token_create_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.token_create.bpffs_fd = bpffs_fd;
+ attr.token_create.flags = OPTS_GET(opts, flags, 0);
+
+ fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 875dde20d56e..df0db2f0cdb7 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
- * common eBPF ELF operations.
+ * Common BPF ELF operations.
*
* Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
@@ -29,92 +29,121 @@
#include <stdint.h>
#include "libbpf_common.h"
+#include "libbpf_legacy.h"
#ifdef __cplusplus
extern "C" {
#endif
-struct bpf_create_map_attr {
- const char *name;
- enum bpf_map_type map_type;
- __u32 map_flags;
- __u32 key_size;
- __u32 value_size;
- __u32 max_entries;
- __u32 numa_node;
+LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
+
+struct bpf_map_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
__u32 btf_fd;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
+ __u32 btf_vmlinux_value_type_id;
+
+ __u32 inner_map_fd;
+ __u32 map_flags;
+ __u64 map_extra;
+
+ __u32 numa_node;
__u32 map_ifindex;
- union {
- __u32 inner_map_fd;
- __u32 btf_vmlinux_value_type_id;
- };
+ __s32 value_type_btf_obj_fd;
+
+ __u32 token_fd;
+ size_t :0;
};
+#define bpf_map_create_opts__last_field token_fd
+
+LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ const struct bpf_map_create_opts *opts);
+
+struct bpf_prog_load_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns
+ * -EAGAIN. This field determines how many attempts libbpf has to
+ * make. If not specified, libbpf will use default value of 5.
+ */
+ int attempts;
-LIBBPF_API int
-bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
-LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
- int key_size, int value_size,
- int max_entries, __u32 map_flags, int node);
-LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
- int key_size, int value_size,
- int max_entries, __u32 map_flags);
-LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
- int value_size, int max_entries, __u32 map_flags);
-LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
- const char *name, int key_size,
- int inner_map_fd, int max_entries,
- __u32 map_flags, int node);
-LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
- const char *name, int key_size,
- int inner_map_fd, int max_entries,
- __u32 map_flags);
-
-struct bpf_load_program_attr {
- enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
- const char *name;
- const struct bpf_insn *insns;
- size_t insns_cnt;
- const char *license;
- union {
- __u32 kern_version;
- __u32 attach_prog_fd;
- };
- union {
- __u32 prog_ifindex;
- __u32 attach_btf_id;
- };
__u32 prog_btf_fd;
- __u32 func_info_rec_size;
+ __u32 prog_flags;
+ __u32 prog_ifindex;
+ __u32 kern_version;
+
+ __u32 attach_btf_id;
+ __u32 attach_prog_fd;
+ __u32 attach_btf_obj_fd;
+
+ const int *fd_array;
+
+ /* .BTF.ext func info data */
const void *func_info;
__u32 func_info_cnt;
- __u32 line_info_rec_size;
+ __u32 func_info_rec_size;
+
+ /* .BTF.ext line info data */
const void *line_info;
__u32 line_info_cnt;
+ __u32 line_info_rec_size;
+
+ /* verifier log options */
__u32 log_level;
- __u32 prog_flags;
+ __u32 log_size;
+ char *log_buf;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ * If kernel doesn't support this feature, log_size is left unchanged.
+ */
+ __u32 log_true_size;
+ __u32 token_fd;
+ size_t :0;
};
+#define bpf_prog_load_opts__last_field token_fd
+
+LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name, const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *opts);
/* Flags to direct loading requirements */
#define MAPS_RELAX_COMPAT 0x01
-/* Recommend log buffer size */
+/* Recommended log buffer size */
#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
-LIBBPF_API int
-bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
- char *log_buf, size_t log_buf_sz);
-LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
- const struct bpf_insn *insns, size_t insns_cnt,
- const char *license, __u32 kern_version,
- char *log_buf, size_t log_buf_sz);
-LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
- const struct bpf_insn *insns,
- size_t insns_cnt, __u32 prog_flags,
- const char *license, __u32 kern_version,
- char *log_buf, size_t log_buf_sz,
- int log_level);
+
+struct bpf_btf_load_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ /* kernel log options */
+ char *log_buf;
+ __u32 log_level;
+ __u32 log_size;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ * If kernel doesn't support this feature, log_size is left unchanged.
+ */
+ __u32 log_true_size;
+
+ __u32 btf_flags;
+ __u32 token_fd;
+ size_t :0;
+};
+#define bpf_btf_load_opts__last_field token_fd
+
+LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
+ struct bpf_btf_load_opts *opts);
LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
@@ -124,7 +153,10 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
__u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+ void *value, __u64 flags);
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
@@ -135,39 +167,228 @@ struct bpf_map_batch_opts {
};
#define bpf_map_batch_opts__last_field flags
-LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+
+/**
+ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
+ * elements in a BPF map.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param count input and output parameter; on input **count** represents the
+ * number of elements in the map to delete in batch;
+ * on output if a non-EFAULT error is returned, **count** represents the number of deleted
+ * elements if the output **count** value is not equal to the input **count** value
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch deletion works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
__u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
+ *
+ * The parameter *in_batch* is the address of the first element in the batch to
+ * read. *out_batch* is an output parameter that should be passed as *in_batch*
+ * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for
+ * *in_batch* to indicate that the batched lookup starts from the beginning of
+ * the map. Both *in_batch* and *out_batch* must point to memory large enough to
+ * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at
+ * least 4 bytes wide regardless of key size.
+ *
+ * The *keys* and *values* are output parameters which must point to memory large enough to
+ * hold *count* items based on the key and value size of the map *map_fd*. The *keys*
+ * buffer must be of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * indicate that the batched lookup starts from the beginning of the map.
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array large enough for *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read in batch; on output it's the number of elements that were
+ * successfully read.
+ * If a non-EFAULT error is returned, count will be set as the number of elements
+ * that were read before the error occurred.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch lookup works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
+ * of BPF map elements where each element is deleted after being retrieved.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * get address of the first element in *out_batch*. If not NULL, must be large
+ * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH,
+ * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless
+ * of key size.
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read and delete in batch; on output it represents the number of
+ * elements that were successfully read and deleted
+ * If a non-**EFAULT** error code is returned and if the output **count** value
+ * is not equal to the input **count** value, up to **count** elements may
+ * have been deleted.
+ * if **EFAULT** is returned up to *count* elements may have been deleted without
+ * being returned via the *keys* and *values* output parameters.
+ * @param opts options for configuring the way the batch lookup and delete works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
void *out_batch, void *keys,
void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
-LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+
+/**
+ * @brief **bpf_map_update_batch()** updates multiple elements in a map
+ * by specifying keys and their corresponding values.
+ *
+ * The *keys* and *values* parameters must point to memory large enough
+ * to hold *count* items based on the key and value size of the map.
+ *
+ * The *opts* parameter can be used to control how *bpf_map_update_batch()*
+ * should handle keys that either do or do not already exist in the map.
+ * In particular the *flags* parameter of *bpf_map_batch_opts* can be
+ * one of the following:
+ *
+ * Note that *count* is an input and output parameter, where on output it
+ * represents how many elements were successfully updated. Also note that if
+ * **EFAULT** then *count* should not be trusted to be correct.
+ *
+ * **BPF_ANY**
+ * Create new elements or update existing.
+ *
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ *
+ * **BPF_EXIST**
+ * Update existing elements.
+ *
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array of *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to update in batch; on output if a non-EFAULT error is returned,
+ * **count** represents the number of updated elements if the output **count**
+ * value is not equal to the input **count** value.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch update works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
__u32 *count,
const struct bpf_map_batch_opts *opts);
+struct bpf_obj_pin_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ __u32 file_flags;
+ int path_fd;
+
+ size_t :0;
+};
+#define bpf_obj_pin_opts__last_field path_fd
+
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
-LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname,
+ const struct bpf_obj_pin_opts *opts);
-struct bpf_prog_attach_opts {
+struct bpf_obj_get_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
- unsigned int flags;
- int replace_prog_fd;
+
+ __u32 file_flags;
+ int path_fd;
+
+ size_t :0;
};
-#define bpf_prog_attach_opts__last_field replace_prog_fd
+#define bpf_obj_get_opts__last_field path_fd
+
+LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_obj_get_opts(const char *pathname,
+ const struct bpf_obj_get_opts *opts);
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
-LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts);
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+struct bpf_prog_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ union {
+ int replace_prog_fd;
+ int replace_fd;
+ };
+ int relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_prog_attach_opts__last_field expected_revision
+
+struct bpf_prog_detach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ int relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_prog_detach_opts__last_field expected_revision
+
+/**
+ * @brief **bpf_prog_attach_opts()** attaches the BPF program corresponding to
+ * *prog_fd* to a *target* which can represent a file descriptor or netdevice
+ * ifindex.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param target attach location file descriptor or ifindex
+ * @param type attach type for the BPF program
+ * @param opts options for configuring the attachment
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int target,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
+
+/**
+ * @brief **bpf_prog_detach_opts()** detaches the BPF program corresponding to
+ * *prog_fd* from a *target* which can represent a file descriptor or netdevice
+ * ifindex.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param target detach location file descriptor or ifindex
+ * @param type detach type for the BPF program
+ * @param opts options for configuring the detachment
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_detach_opts(int prog_fd, int target,
+ enum bpf_attach_type type,
+ const struct bpf_prog_detach_opts *opts);
+
union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -175,8 +396,49 @@ struct bpf_link_create_opts {
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
__u32 target_btf_id;
+ union {
+ struct {
+ __u64 bpf_cookie;
+ } perf_event;
+ struct {
+ __u32 flags;
+ __u32 cnt;
+ const char **syms;
+ const unsigned long *addrs;
+ const __u64 *cookies;
+ } kprobe_multi;
+ struct {
+ __u32 flags;
+ __u32 cnt;
+ const char *path;
+ const unsigned long *offsets;
+ const unsigned long *ref_ctr_offsets;
+ const __u64 *cookies;
+ __u32 pid;
+ } uprobe_multi;
+ struct {
+ __u64 cookie;
+ } tracing;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
+ struct {
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ } tcx;
+ struct {
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ } netkit;
+ };
+ size_t :0;
};
-#define bpf_link_create_opts__last_field target_btf_id
+#define bpf_link_create_opts__last_field uprobe_multi.pid
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
@@ -188,8 +450,9 @@ struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
__u32 old_prog_fd; /* expected old program FD */
+ __u32 old_map_fd; /* expected old map FD */
};
-#define bpf_link_update_opts__last_field old_prog_fd
+#define bpf_link_update_opts__last_field old_map_fd
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts);
@@ -213,36 +476,161 @@ struct bpf_prog_test_run_attr {
* out: length of cxt_out */
};
-LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-
-/*
- * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_xattr instead.
- */
-LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
- __u32 size, void *data_out, __u32 *size_out,
- __u32 *retval, __u32 *duration);
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
+
+struct bpf_get_fd_by_id_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 open_flags; /* permissions requested for the operation on fd */
+ size_t :0;
+};
+#define bpf_get_fd_by_id_opts__last_field open_flags
+
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
+ const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF
+ * program corresponding to *prog_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param info pointer to **struct bpf_prog_info** that will be populated with
+ * BPF program information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF
+ * map corresponding to *map_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
+ *
+ * @param map_fd BPF map file descriptor
+ * @param info pointer to **struct bpf_map_info** that will be populated with
+ * BPF map information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the
+ * BTF object corresponding to *btf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
+ *
+ * @param btf_fd BTF object file descriptor
+ * @param info pointer to **struct bpf_btf_info** that will be populated with
+ * BTF object information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF
+ * link corresponding to *link_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
+ *
+ * @param link_fd BPF link file descriptor
+ * @param info pointer to **struct bpf_link_info** that will be populated with
+ * BPF link information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len);
+
+struct bpf_prog_query_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 query_flags;
+ __u32 attach_flags; /* output argument */
+ __u32 *prog_ids;
+ union {
+ /* input+output argument */
+ __u32 prog_cnt;
+ __u32 count;
+ };
+ __u32 *prog_attach_flags;
+ __u32 *link_ids;
+ __u32 *link_attach_flags;
+ __u64 revision;
+ size_t :0;
+};
+#define bpf_prog_query_opts__last_field revision
+
+/**
+ * @brief **bpf_prog_query_opts()** queries the BPF programs and BPF links
+ * which are attached to *target* which can represent a file descriptor or
+ * netdevice ifindex.
+ *
+ * @param target query location file descriptor or ifindex
+ * @param type attach type for the BPF program
+ * @param opts options for configuring the query
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_query_opts(int target, enum bpf_attach_type type,
+ struct bpf_prog_query_opts *opts);
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
+
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
- __u32 log_buf_size, bool do_log);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
+#ifdef __cplusplus
+/* forward-declaring enums in C++ isn't compatible with pure C enums, so
+ * instead define bpf_enable_stats() as accepting int as an input
+ */
+LIBBPF_API int bpf_enable_stats(int type);
+#else
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+#endif
struct bpf_prog_bind_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -272,12 +660,37 @@ struct bpf_test_run_opts {
__u32 duration; /* out: average per repetition in ns */
__u32 flags;
__u32 cpu;
+ __u32 batch_size;
};
-#define bpf_test_run_opts__last_field cpu
+#define bpf_test_run_opts__last_field batch_size
LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
struct bpf_test_run_opts *opts);
+struct bpf_token_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ size_t :0;
+};
+#define bpf_token_create_opts__last_field flags
+
+/**
+ * @brief **bpf_token_create()** creates a new instance of BPF token derived
+ * from specified BPF FS mount point.
+ *
+ * BPF token created with this API can be passed to bpf() syscall for
+ * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
+ *
+ * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
+ * instance.
+ * @param opts optional BPF token creation options, can be NULL
+ *
+ * @return BPF token FD > 0, on success; negative error code, otherwise (errno
+ * is also set to the error code)
+ */
+LIBBPF_API int bpf_token_create(int bpffs_fd,
+ struct bpf_token_create_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index bbcefb3ff5a5..1ce738d91685 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -2,6 +2,8 @@
#ifndef __BPF_CORE_READ_H__
#define __BPF_CORE_READ_H__
+#include <bpf/bpf_helpers.h>
+
/*
* enum bpf_field_info_kind is passed as a second argument into
* __builtin_preserve_field_info() built-in to get a specific aspect of
@@ -29,6 +31,7 @@ enum bpf_type_id_kind {
enum bpf_type_info_kind {
BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
BPF_TYPE_SIZE = 1, /* type size in target kernel */
+ BPF_TYPE_MATCHES = 2, /* type match in target kernel */
};
/* second argument to __builtin_preserve_enum_value() built-in */
@@ -40,10 +43,10 @@ enum bpf_enum_value_kind {
#define __CORE_RELO(src, field, info) \
__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read_kernel( \
- (void *)dst, \
+ (void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
@@ -88,11 +91,19 @@ enum bpf_enum_value_kind {
const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
unsigned long long val; \
\
+ /* This is a so-called barrier_var() operation that makes specified \
+ * variable "a black box" for optimizing compiler. \
+ * It forces compiler to perform BYTE_OFFSET relocation on p and use \
+ * its calculated value in the switch below, instead of applying \
+ * the same relocation 4 times for each individual memory load. \
+ */ \
+ asm volatile("" : "=r"(p) : "0"(p)); \
+ \
switch (__CORE_RELO(s, field, BYTE_SIZE)) { \
- case 1: val = *(const unsigned char *)p; \
- case 2: val = *(const unsigned short *)p; \
- case 4: val = *(const unsigned int *)p; \
- case 8: val = *(const unsigned long long *)p; \
+ case 1: val = *(const unsigned char *)p; break; \
+ case 2: val = *(const unsigned short *)p; break; \
+ case 4: val = *(const unsigned int *)p; break; \
+ case 8: val = *(const unsigned long long *)p; break; \
} \
val <<= __CORE_RELO(s, field, LSHIFT_U64); \
if (__CORE_RELO(s, field, SIGNED)) \
@@ -103,20 +114,102 @@ enum bpf_enum_value_kind {
})
/*
+ * Write to a bitfield, identified by s->field.
+ * This is the inverse of BPF_CORE_WRITE_BITFIELD().
+ */
+#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \
+ void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
+ unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \
+ unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \
+ unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \
+ unsigned long long mask, val, nval = new_val; \
+ unsigned int rpad = rshift - lshift; \
+ \
+ asm volatile("" : "+r"(p)); \
+ \
+ switch (byte_size) { \
+ case 1: val = *(unsigned char *)p; break; \
+ case 2: val = *(unsigned short *)p; break; \
+ case 4: val = *(unsigned int *)p; break; \
+ case 8: val = *(unsigned long long *)p; break; \
+ } \
+ \
+ mask = (~0ULL << rshift) >> lshift; \
+ val = (val & ~mask) | ((nval << rpad) & mask); \
+ \
+ switch (byte_size) { \
+ case 1: *(unsigned char *)p = val; break; \
+ case 2: *(unsigned short *)p = val; break; \
+ case 4: *(unsigned int *)p = val; break; \
+ case 8: *(unsigned long long *)p = val; break; \
+ } \
+})
+
+/* Differentiator between compilers builtin implementations. This is a
+ * requirement due to the compiler parsing differences where GCC optimizes
+ * early in parsing those constructs of type pointers to the builtin specific
+ * type, resulting in not being possible to collect the required type
+ * information in the builtin expansion.
+ */
+#ifdef __clang__
+#define ___bpf_typeof(type) ((typeof(type) *) 0)
+#else
+#define ___bpf_typeof1(type, NR) ({ \
+ extern typeof(type) *___concat(bpf_type_tmp_, NR); \
+ ___concat(bpf_type_tmp_, NR); \
+})
+#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__)
+#endif
+
+#ifdef __clang__
+#define ___bpf_field_ref1(field) (field)
+#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field)
+#else
+#define ___bpf_field_ref1(field) (&(field))
+#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field))
+#endif
+#define ___bpf_field_ref(args...) \
+ ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args)
+
+/*
* Convenience macro to check that field actually exists in target kernel's.
* Returns:
* 1, if matching field is present in target kernel;
* 0, if no matching field found.
+ *
+ * Supports two forms:
+ * - field reference through variable access:
+ * bpf_core_field_exists(p->my_field);
+ * - field reference through type and field names:
+ * bpf_core_field_exists(struct my_type, my_field).
*/
-#define bpf_core_field_exists(field) \
- __builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+#define bpf_core_field_exists(field...) \
+ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS)
/*
* Convenience macro to get the byte size of a field. Works for integers,
* struct/unions, pointers, arrays, and enums.
+ *
+ * Supports two forms:
+ * - field reference through variable access:
+ * bpf_core_field_size(p->my_field);
+ * - field reference through type and field names:
+ * bpf_core_field_size(struct my_type, my_field).
*/
-#define bpf_core_field_size(field) \
- __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+#define bpf_core_field_size(field...) \
+ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE)
+
+/*
+ * Convenience macro to get field's byte offset.
+ *
+ * Supports two forms:
+ * - field reference through variable access:
+ * bpf_core_field_offset(p->my_field);
+ * - field reference through type and field names:
+ * bpf_core_field_offset(struct my_type, my_field).
+ */
+#define bpf_core_field_offset(field...) \
+ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET)
/*
* Convenience macro to get BTF type ID of a specified type, using a local BTF
@@ -124,7 +217,7 @@ enum bpf_enum_value_kind {
* BTF. Always succeeds.
*/
#define bpf_core_type_id_local(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL)
/*
* Convenience macro to get BTF type ID of a target kernel's type that matches
@@ -134,7 +227,7 @@ enum bpf_enum_value_kind {
* - 0, if no matching type was found in a target kernel BTF.
*/
#define bpf_core_type_id_kernel(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET)
/*
* Convenience macro to check that provided named type
@@ -144,7 +237,17 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_exists(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) "matches" that in a target kernel.
+ * Returns:
+ * 1, if the type matches in the target kernel's BTF;
+ * 0, if the type does not match any in the target kernel
+ */
+#define bpf_core_type_matches(type) \
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES)
/*
* Convenience macro to get the byte size of a provided named type
@@ -154,7 +257,7 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_size(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE)
/*
* Convenience macro to check that provided enumerator value is defined in
@@ -164,8 +267,13 @@ enum bpf_enum_value_kind {
* kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value_exists(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+#else
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS)
+#endif
/*
* Convenience macro to get the integer value of an enumerator value in
@@ -175,8 +283,13 @@ enum bpf_enum_value_kind {
* present in target kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+#else
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE)
+#endif
/*
* bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
@@ -188,24 +301,40 @@ enum bpf_enum_value_kind {
* a relocation, which records BTF type ID describing root struct/union and an
* accessor string which describes exact embedded field that was used to take
* an address. See detailed description of this relocation format and
- * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h.
*
* This relocation allows libbpf to adjust BPF instruction to use correct
* actual field offset, based on target kernel BTF type that matches original
* (local) BTF, used to record relocation.
*/
#define bpf_core_read(dst, sz, src) \
- bpf_probe_read_kernel(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src))
+/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */
+#define bpf_core_read_user(dst, sz, src) \
+ bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src))
/*
* bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
* additionally emitting BPF CO-RE field relocation for specified source
* argument.
*/
#define bpf_core_read_str(dst, sz, src) \
- bpf_probe_read_kernel_str(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src))
+
+/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */
+#define bpf_core_read_user_str(dst, sz, src) \
+ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src))
+
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
+
+/*
+ * Cast provided pointer *ptr* into a pointer to a specified *type* in such
+ * a way that BPF verifier will become aware of associated kernel-side BTF
+ * type. This allows to access members of kernel types directly without the
+ * need to use BPF_CORE_READ() macros.
+ */
+#define bpf_core_cast(ptr, type) \
+ ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type)))
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
@@ -264,30 +393,29 @@ enum bpf_enum_value_kind {
read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
/* "recursively" read a sequence of inner pointers using local __t var */
-#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a);
-#define ___rd_last(...) \
- ___read(bpf_core_read, &__t, \
- ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
-#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__)
-#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
-#define ___read_ptrs(src, ...) \
- ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__)
-
-#define ___core_read0(fn, dst, src, a) \
+#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a);
+#define ___rd_last(fn, ...) \
+ ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
+#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__)
+#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__)
+#define ___read_ptrs(fn, src, ...) \
+ ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__)
+
+#define ___core_read0(fn, fn_ptr, dst, src, a) \
___read(fn, dst, ___type(src), src, a);
-#define ___core_readN(fn, dst, src, ...) \
- ___read_ptrs(src, ___nolast(__VA_ARGS__)) \
+#define ___core_readN(fn, fn_ptr, dst, src, ...) \
+ ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \
___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \
___last(__VA_ARGS__));
-#define ___core_read(fn, dst, src, a, ...) \
- ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \
+#define ___core_read(fn, fn_ptr, dst, src, a, ...) \
+ ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \
src, a, ##__VA_ARGS__)
/*
@@ -295,20 +423,73 @@ enum bpf_enum_value_kind {
* BPF_CORE_READ(), in which final field is read into user-provided storage.
* See BPF_CORE_READ() below for more details on general usage.
*/
-#define BPF_CORE_READ_INTO(dst, src, a, ...) \
- ({ \
- ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \
- })
+#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_core_read, bpf_core_read, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/*
+ * Variant of BPF_CORE_READ_INTO() for reading from user-space memory.
+ *
+ * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use.
+ */
+#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_core_read_user, bpf_core_read_user, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/* Non-CO-RE variant of BPF_CORE_READ_INTO() */
+#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_probe_read_kernel, bpf_probe_read_kernel, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO().
+ *
+ * As no CO-RE relocations are emitted, source types can be arbitrary and are
+ * not restricted to kernel types only.
+ */
+#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_probe_read_user, bpf_probe_read_user, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
/*
* BPF_CORE_READ_STR_INTO() does same "pointer chasing" as
* BPF_CORE_READ() for intermediate pointers, but then executes (and returns
* corresponding error code) bpf_core_read_str() for final string read.
*/
-#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
- ({ \
- ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
- })
+#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_core_read_str, bpf_core_read, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/*
+ * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory.
+ *
+ * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use.
+ */
+#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_core_read_user_str, bpf_core_read_user, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */
+#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_probe_read_kernel_str, bpf_probe_read_kernel, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
+
+/*
+ * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO().
+ *
+ * As no CO-RE relocations are emitted, source types can be arbitrary and are
+ * not restricted to kernel types only.
+ */
+#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \
+ ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \
+ dst, (src), a, ##__VA_ARGS__) \
+})
/*
* BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially
@@ -334,12 +515,46 @@ enum bpf_enum_value_kind {
* N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose.
*/
-#define BPF_CORE_READ(src, a, ...) \
- ({ \
- ___type((src), a, ##__VA_ARGS__) __r; \
- BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
- __r; \
- })
+#define BPF_CORE_READ(src, a, ...) ({ \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
+ __r; \
+})
+
+/*
+ * Variant of BPF_CORE_READ() for reading from user-space memory.
+ *
+ * NOTE: all the source types involved are still *kernel types* and need to
+ * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will
+ * fail. Custom user types are not relocatable with CO-RE.
+ * The typical situation in which BPF_CORE_READ_USER() might be used is to
+ * read kernel UAPI types from the user-space memory passed in as a syscall
+ * input argument.
+ */
+#define BPF_CORE_READ_USER(src, a, ...) ({ \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \
+ __r; \
+})
+
+/* Non-CO-RE variant of BPF_CORE_READ() */
+#define BPF_PROBE_READ(src, a, ...) ({ \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
+ __r; \
+})
+
+/*
+ * Non-CO-RE variant of BPF_CORE_READ_USER().
+ *
+ * As no CO-RE relocations are emitted, source types can be arbitrary and are
+ * not restricted to kernel types only.
+ */
+#define BPF_PROBE_READ_USER(src, a, ...) ({ \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \
+ __r; \
+})
#endif
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
new file mode 100644
index 000000000000..fdf44403ff36
--- /dev/null
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __BPF_GEN_INTERNAL_H
+#define __BPF_GEN_INTERNAL_H
+
+#include "bpf.h"
+
+struct ksym_relo_desc {
+ const char *name;
+ int kind;
+ int insn_idx;
+ bool is_weak;
+ bool is_typeless;
+ bool is_ld64;
+};
+
+struct ksym_desc {
+ const char *name;
+ int ref;
+ int kind;
+ union {
+ /* used for kfunc */
+ int off;
+ /* used for typeless ksym */
+ bool typeless;
+ };
+ int insn;
+ bool is_ld64;
+};
+
+struct bpf_gen {
+ struct gen_loader_opts *opts;
+ void *data_start;
+ void *data_cur;
+ void *insn_start;
+ void *insn_cur;
+ ssize_t cleanup_label;
+ __u32 nr_progs;
+ __u32 nr_maps;
+ int log_level;
+ int error;
+ struct ksym_relo_desc *relos;
+ int relo_cnt;
+ struct bpf_core_relo *core_relos;
+ int core_relo_cnt;
+ char attach_target[128];
+ int attach_kind;
+ struct ksym_desc *ksyms;
+ __u32 nr_ksyms;
+ int fd_array;
+ int nr_fd_array;
+};
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);
+void bpf_gen__free(struct bpf_gen *gen);
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
+void bpf_gen__map_create(struct bpf_gen *gen,
+ enum bpf_map_type map_type, const char *map_name,
+ __u32 key_size, __u32 value_size, __u32 max_entries,
+ struct bpf_map_create_opts *map_attr, int map_idx);
+void bpf_gen__prog_load(struct bpf_gen *gen,
+ enum bpf_prog_type prog_type, const char *prog_name,
+ const char *license, struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *load_attr, int prog_idx);
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
+ bool is_typeless, bool is_ld64, int kind, int insn_idx);
+void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo);
+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);
+
+#endif
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 72b251110c4d..cd17f6d0791f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -13,25 +13,39 @@
#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
#define __array(name, val) typeof(val) *name[]
-
-/* Helper macro to print out debug messages */
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
+#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
+ * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
+ * extern variables, etc).
+ * To allow use of SEC() with externs (e.g., for extern .maps declarations),
+ * make sure __attribute__((unused)) doesn't trigger compilation warning.
*/
-#define SEC(NAME) __attribute__((section(NAME), used))
+#if __GNUC__ && !__clang__
+
+/*
+ * Pragma macros are broken on GCC
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
+ */
+#define SEC(name) __attribute__((section(name), used))
+
+#else
+
+#define SEC(name) \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
+ __attribute__((section(name), used)) \
+ _Pragma("GCC diagnostic pop") \
-#ifndef __always_inline
-#define __always_inline __attribute__((always_inline))
#endif
+
+/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
+#undef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+
#ifndef __noinline
#define __noinline __attribute__((noinline))
#endif
@@ -40,17 +54,68 @@
#endif
/*
- * Helper macro to manipulate data structures
+ * Use __hidden attribute to mark a non-static BPF subprogram effectively
+ * static for BPF verifier's verification algorithm purposes, allowing more
+ * extensive and permissive BPF verification process, taking into account
+ * subprogram's caller context.
*/
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
+#define __hidden __attribute__((visibility("hidden")))
+
+/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
+ * any system-level headers (such as stddef.h, linux/version.h, etc), and
+ * commonly-used macros like NULL and KERNEL_VERSION aren't available through
+ * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
+ * them on their own. So as a convenience, provide such definitions here.
+ */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef KERNEL_VERSION
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
#endif
-#ifndef container_of
+
+/*
+ * Helper macros to manipulate data structures
+ */
+
+/* offsetof() definition that uses __builtin_offset() might not preserve field
+ * offset CO-RE relocation properly, so force-redefine offsetof() using
+ * old-school approach which works with CO-RE correctly
+ */
+#undef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+
+/* redefined container_of() to ensure we use the above offsetof() macro */
+#undef container_of
#define container_of(ptr, type, member) \
({ \
void *__mptr = (void *)(ptr); \
((type *)(__mptr - offsetof(type, member))); \
})
+
+/*
+ * Compiler (optimization) barrier.
+ */
+#ifndef barrier
+#define barrier() asm volatile("" ::: "memory")
+#endif
+
+/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
+ * compiler believe that there is some black box modification of a given
+ * variable and thus prevents compiler from making extra assumption about its
+ * value and potential simplifications and optimizations on this variable.
+ *
+ * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
+ * a variable, making some code patterns unverifiable. Putting barrier_var()
+ * in place will ensure that cast is performed before the barrier_var()
+ * invocation, because compiler has to pessimistically assume that embedded
+ * asm section might perform some extra operations on that variable.
+ *
+ * This is a variable-specific variant of more global barrier().
+ */
+#ifndef barrier_var
+#define barrier_var(var) asm volatile("" : "+r"(var))
#endif
/*
@@ -101,18 +166,6 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
}
#endif
-/*
- * Helper structure used by eBPF C program
- * to describe BPF map attributes to libbpf loader
- */
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
-};
-
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
@@ -127,5 +180,231 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
+#define __kptr __attribute__((btf_type_tag("kptr")))
+#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))
+
+#define bpf_ksym_exists(sym) ({ \
+ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \
+ !!sym; \
+})
+
+#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
+#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
+#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
+#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
+#define __arg_arena __attribute((btf_decl_tag("arg:arena")))
+
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
+
+#ifdef BPF_NO_GLOBAL_DATA
+#define BPF_PRINTK_FMT_MOD
+#else
+#define BPF_PRINTK_FMT_MOD static const
+#endif
+
+#define __bpf_printk(fmt, ...) \
+({ \
+ BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/*
+ * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments
+ * instead of an array of u64.
+ */
+#define __bpf_vprintk(fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_trace_vprintk(___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
+ * Otherwise use __bpf_vprintk
+ */
+#define ___bpf_pick_printk(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
+ __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
+ __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\
+ __bpf_printk /*1*/, __bpf_printk /*0*/)
+
+/* Helper macro to print out debug messages */
+#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)
+
+struct bpf_iter_num;
+
+extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym;
+extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym;
+extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;
+
+#ifndef bpf_for_each
+/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for
+ * using BPF open-coded iterators without having to write mundane explicit
+ * low-level loop logic. Instead, it provides for()-like generic construct
+ * that can be used pretty naturally. E.g., for some hypothetical cgroup
+ * iterator, you'd write:
+ *
+ * struct cgroup *cg, *parent_cg = <...>;
+ *
+ * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {
+ * bpf_printk("Child cgroup id = %d", cg->cgroup_id);
+ * if (cg->cgroup_id == 123)
+ * break;
+ * }
+ *
+ * I.e., it looks almost like high-level for each loop in other languages,
+ * supports continue/break, and is verifiable by BPF verifier.
+ *
+ * For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
+ * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
+ * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
+ * *`, not just `int`. So for integers bpf_for() is more convenient.
+ *
+ * Note: this macro relies on C99 feature of allowing to declare variables
+ * inside for() loop, bound to for() loop lifetime. It also utilizes GCC
+ * extension: __attribute__((cleanup(<func>))), supported by both GCC and
+ * Clang.
+ */
+#define bpf_for_each(type, cur, args...) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \
+ cleanup(bpf_iter_##type##_destroy))), \
+ /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_##type##_new(&___it, ##args), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_##type##_destroy, (void *)0); \
+ /* iteration and termination check */ \
+ (((cur) = bpf_iter_##type##_next(&___it))); \
+)
+#endif /* bpf_for_each */
+
+#ifndef bpf_for
+/* bpf_for(i, start, end) implements a for()-like looping construct that sets
+ * provided integer variable *i* to values starting from *start* through,
+ * but not including, *end*. It also proves to BPF verifier that *i* belongs
+ * to range [start, end), so this can be used for accessing arrays without
+ * extra checks.
+ *
+ * Note: *start* and *end* are assumed to be expressions with no side effects
+ * and whose values do not change throughout bpf_for() loop execution. They do
+ * not have to be statically known or constant, though.
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_for(i, start, end) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
+ cleanup(bpf_iter_num_destroy))), \
+ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_num_new(&___it, (start), (end)), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_num_destroy, (void *)0); \
+ ({ \
+ /* iteration step */ \
+ int *___t = bpf_iter_num_next(&___it); \
+ /* termination and bounds check */ \
+ (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \
+ }); \
+)
+#endif /* bpf_for */
+
+#ifndef bpf_repeat
+/* bpf_repeat(N) performs N iterations without exposing iteration number
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_repeat(N) for ( \
+ /* initialize and define destructor */ \
+ struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
+ cleanup(bpf_iter_num_destroy))), \
+ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
+ *___p __attribute__((unused)) = ( \
+ bpf_iter_num_new(&___it, 0, (N)), \
+ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
+ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
+ (void)bpf_iter_num_destroy, (void *)0); \
+ bpf_iter_num_next(&___it); \
+ /* nothing here */ \
+)
+#endif /* bpf_repeat */
#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 3ed1a27b5f7c..5c503096ef43 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
nr_linfo = info->nr_line_info;
if (!nr_linfo)
- return NULL;
+ return errno = EINVAL, NULL;
/*
* The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
*/
if (info->line_info_rec_size <
offsetof(struct bpf_line_info, file_name_off))
- return NULL;
+ return errno = EINVAL, NULL;
prog_linfo = calloc(1, sizeof(*prog_linfo));
if (!prog_linfo)
- return NULL;
+ return errno = ENOMEM, NULL;
/* Copy xlated line_info */
prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
err_free:
bpf_prog_linfo__free(prog_linfo);
- return NULL;
+ return errno = EINVAL, NULL;
}
const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
const __u64 *jited_linfo;
if (func_idx >= prog_linfo->nr_jited_func)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
(start * jited_rec_size);
jited_linfo = raw_jited_linfo;
if (addr < *jited_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
nr_linfo = prog_linfo->nr_linfo;
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
rec_size = prog_linfo->rec_size;
raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
linfo = raw_linfo;
if (insn_off < linfo->insn_off)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
for (i = 0; i < nr_linfo; i++) {
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f9ef37707888..1c13f8e88833 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -2,6 +2,8 @@
#ifndef __BPF_TRACING_H__
#define __BPF_TRACING_H__
+#include "bpf_helpers.h"
+
/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
#if defined(__TARGET_ARCH_x86)
#define bpf_target_x86
@@ -24,300 +26,626 @@
#elif defined(__TARGET_ARCH_sparc)
#define bpf_target_sparc
#define bpf_target_defined
+#elif defined(__TARGET_ARCH_riscv)
+ #define bpf_target_riscv
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arc)
+ #define bpf_target_arc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_loongarch)
+ #define bpf_target_loongarch
+ #define bpf_target_defined
#else
- #undef bpf_target_defined
-#endif
/* Fall back to what the compiler says */
-#ifndef bpf_target_defined
#if defined(__x86_64__)
#define bpf_target_x86
+ #define bpf_target_defined
#elif defined(__s390__)
#define bpf_target_s390
+ #define bpf_target_defined
#elif defined(__arm__)
#define bpf_target_arm
+ #define bpf_target_defined
#elif defined(__aarch64__)
#define bpf_target_arm64
+ #define bpf_target_defined
#elif defined(__mips__)
#define bpf_target_mips
+ #define bpf_target_defined
#elif defined(__powerpc__)
#define bpf_target_powerpc
+ #define bpf_target_defined
#elif defined(__sparc__)
#define bpf_target_sparc
+ #define bpf_target_defined
+#elif defined(__riscv) && __riscv_xlen == 64
+ #define bpf_target_riscv
+ #define bpf_target_defined
+#elif defined(__arc__)
+ #define bpf_target_arc
+ #define bpf_target_defined
+#elif defined(__loongarch__)
+ #define bpf_target_loongarch
+ #define bpf_target_defined
+#endif /* no compiler target */
+
#endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
#endif
#if defined(bpf_target_x86)
+/*
+ * https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
+ */
+
#if defined(__KERNEL__) || defined(__VMLINUX_H__)
-#define PT_REGS_PARM1(x) ((x)->di)
-#define PT_REGS_PARM2(x) ((x)->si)
-#define PT_REGS_PARM3(x) ((x)->dx)
-#define PT_REGS_PARM4(x) ((x)->cx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->sp)
-#define PT_REGS_FP(x) ((x)->bp)
-#define PT_REGS_RC(x) ((x)->ax)
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->ip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+#define __PT_PARM1_REG di
+#define __PT_PARM2_REG si
+#define __PT_PARM3_REG dx
+#define __PT_PARM4_REG cx
+#define __PT_PARM5_REG r8
+#define __PT_PARM6_REG r9
+/*
+ * Syscall uses r10 for PARM4. See arch/x86/entry/entry_64.S:entry_SYSCALL_64
+ * comments in Linux sources. And refer to syscall(2) manpage.
+ */
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG r10
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG sp
+#define __PT_FP_REG bp
+#define __PT_RC_REG ax
+#define __PT_SP_REG sp
+#define __PT_IP_REG ip
#else
#ifdef __i386__
-/* i386 kernel is built with -mregparm=3 */
-#define PT_REGS_PARM1(x) ((x)->eax)
-#define PT_REGS_PARM2(x) ((x)->edx)
-#define PT_REGS_PARM3(x) ((x)->ecx)
-#define PT_REGS_PARM4(x) 0
-#define PT_REGS_PARM5(x) 0
-#define PT_REGS_RET(x) ((x)->esp)
-#define PT_REGS_FP(x) ((x)->ebp)
-#define PT_REGS_RC(x) ((x)->eax)
-#define PT_REGS_SP(x) ((x)->esp)
-#define PT_REGS_IP(x) ((x)->eip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
-#define PT_REGS_PARM4_CORE(x) 0
-#define PT_REGS_PARM5_CORE(x) 0
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
-#else
+/* i386 kernel is built with -mregparm=3 */
+#define __PT_PARM1_REG eax
+#define __PT_PARM2_REG edx
+#define __PT_PARM3_REG ecx
+/* i386 syscall ABI is very different, refer to syscall(2) manpage */
+#define __PT_PARM1_SYSCALL_REG ebx
+#define __PT_PARM2_SYSCALL_REG ecx
+#define __PT_PARM3_SYSCALL_REG edx
+#define __PT_PARM4_SYSCALL_REG esi
+#define __PT_PARM5_SYSCALL_REG edi
+#define __PT_PARM6_SYSCALL_REG ebp
+
+#define __PT_RET_REG esp
+#define __PT_FP_REG ebp
+#define __PT_RC_REG eax
+#define __PT_SP_REG esp
+#define __PT_IP_REG eip
+
+#else /* __i386__ */
+
+#define __PT_PARM1_REG rdi
+#define __PT_PARM2_REG rsi
+#define __PT_PARM3_REG rdx
+#define __PT_PARM4_REG rcx
+#define __PT_PARM5_REG r8
+#define __PT_PARM6_REG r9
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG r10
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG rsp
+#define __PT_FP_REG rbp
+#define __PT_RC_REG rax
+#define __PT_SP_REG rsp
+#define __PT_IP_REG rip
+
+#endif /* __i386__ */
+
+#endif /* __KERNEL__ || __VMLINUX_H__ */
-#define PT_REGS_PARM1(x) ((x)->rdi)
-#define PT_REGS_PARM2(x) ((x)->rsi)
-#define PT_REGS_PARM3(x) ((x)->rdx)
-#define PT_REGS_PARM4(x) ((x)->rcx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->rsp)
-#define PT_REGS_FP(x) ((x)->rbp)
-#define PT_REGS_RC(x) ((x)->rax)
-#define PT_REGS_SP(x) ((x)->rsp)
-#define PT_REGS_IP(x) ((x)->rip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
+#elif defined(bpf_target_s390)
-#endif
-#endif
+/*
+ * https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf
+ */
-#elif defined(bpf_target_s390)
+struct pt_regs___s390 {
+ unsigned long orig_gpr2;
+};
/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_S390 const volatile user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
+#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
+#define __PT_PARM1_REG gprs[2]
+#define __PT_PARM2_REG gprs[3]
+#define __PT_PARM3_REG gprs[4]
+#define __PT_PARM4_REG gprs[5]
+#define __PT_PARM5_REG gprs[6]
+
+#define __PT_PARM1_SYSCALL_REG orig_gpr2
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG gprs[7]
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) \
+ BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG)
+
+#define __PT_RET_REG gprs[14]
+#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG gprs[2]
+#define __PT_SP_REG gprs[15]
+#define __PT_IP_REG psw.addr
#elif defined(bpf_target_arm)
-#define PT_REGS_PARM1(x) ((x)->uregs[0])
-#define PT_REGS_PARM2(x) ((x)->uregs[1])
-#define PT_REGS_PARM3(x) ((x)->uregs[2])
-#define PT_REGS_PARM4(x) ((x)->uregs[3])
-#define PT_REGS_PARM5(x) ((x)->uregs[4])
-#define PT_REGS_RET(x) ((x)->uregs[14])
-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->uregs[0])
-#define PT_REGS_SP(x) ((x)->uregs[13])
-#define PT_REGS_IP(x) ((x)->uregs[12])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+/*
+ * https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#machine-registers
+ */
+
+#define __PT_PARM1_REG uregs[0]
+#define __PT_PARM2_REG uregs[1]
+#define __PT_PARM3_REG uregs[2]
+#define __PT_PARM4_REG uregs[3]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG uregs[4]
+#define __PT_PARM6_SYSCALL_REG uregs[5]
+#define __PT_PARM7_SYSCALL_REG uregs[6]
+
+#define __PT_RET_REG uregs[14]
+#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG uregs[0]
+#define __PT_SP_REG uregs[13]
+#define __PT_IP_REG uregs[12]
#elif defined(bpf_target_arm64)
+/*
+ * https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#machine-registers
+ */
+
+struct pt_regs___arm64 {
+ unsigned long orig_x0;
+};
+
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_ARM64 const volatile struct user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
+#define __PT_PARM1_REG regs[0]
+#define __PT_PARM2_REG regs[1]
+#define __PT_PARM3_REG regs[2]
+#define __PT_PARM4_REG regs[3]
+#define __PT_PARM5_REG regs[4]
+#define __PT_PARM6_REG regs[5]
+#define __PT_PARM7_REG regs[6]
+#define __PT_PARM8_REG regs[7]
+
+#define __PT_PARM1_SYSCALL_REG orig_x0
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) \
+ BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG)
+
+#define __PT_RET_REG regs[30]
+#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[0]
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
#elif defined(bpf_target_mips)
-#define PT_REGS_PARM1(x) ((x)->regs[4])
-#define PT_REGS_PARM2(x) ((x)->regs[5])
-#define PT_REGS_PARM3(x) ((x)->regs[6])
-#define PT_REGS_PARM4(x) ((x)->regs[7])
-#define PT_REGS_PARM5(x) ((x)->regs[8])
-#define PT_REGS_RET(x) ((x)->regs[31])
-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[2])
-#define PT_REGS_SP(x) ((x)->regs[29])
-#define PT_REGS_IP(x) ((x)->cp0_epc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+/*
+ * N64 ABI is assumed right now.
+ * https://en.wikipedia.org/wiki/MIPS_architecture#Calling_conventions
+ */
+
+#define __PT_PARM1_REG regs[4]
+#define __PT_PARM2_REG regs[5]
+#define __PT_PARM3_REG regs[6]
+#define __PT_PARM4_REG regs[7]
+#define __PT_PARM5_REG regs[8]
+#define __PT_PARM6_REG regs[9]
+#define __PT_PARM7_REG regs[10]
+#define __PT_PARM8_REG regs[11]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG /* only N32/N64 */
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG /* only N32/N64 */
+
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[2]
+#define __PT_SP_REG regs[29]
+#define __PT_IP_REG cp0_epc
#elif defined(bpf_target_powerpc)
-#define PT_REGS_PARM1(x) ((x)->gpr[3])
-#define PT_REGS_PARM2(x) ((x)->gpr[4])
-#define PT_REGS_PARM3(x) ((x)->gpr[5])
-#define PT_REGS_PARM4(x) ((x)->gpr[6])
-#define PT_REGS_PARM5(x) ((x)->gpr[7])
-#define PT_REGS_RC(x) ((x)->gpr[3])
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->nip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+/*
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf (page 3-14,
+ * section "Function Calling Sequence")
+ */
+
+#define __PT_PARM1_REG gpr[3]
+#define __PT_PARM2_REG gpr[4]
+#define __PT_PARM3_REG gpr[5]
+#define __PT_PARM4_REG gpr[6]
+#define __PT_PARM5_REG gpr[7]
+#define __PT_PARM6_REG gpr[8]
+#define __PT_PARM7_REG gpr[9]
+#define __PT_PARM8_REG gpr[10]
+
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG orig_gpr3
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+#if !defined(__arch64__)
+#define __PT_PARM7_SYSCALL_REG __PT_PARM7_REG /* only powerpc (not powerpc64) */
+#endif
+
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG gpr[3]
+#define __PT_SP_REG sp
+#define __PT_IP_REG nip
#elif defined(bpf_target_sparc)
-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
+/*
+ * https://en.wikipedia.org/wiki/Calling_convention#SPARC
+ */
+#define __PT_PARM1_REG u_regs[UREG_I0]
+#define __PT_PARM2_REG u_regs[UREG_I1]
+#define __PT_PARM3_REG u_regs[UREG_I2]
+#define __PT_PARM4_REG u_regs[UREG_I3]
+#define __PT_PARM5_REG u_regs[UREG_I4]
+#define __PT_PARM6_REG u_regs[UREG_I5]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG u_regs[UREG_I7]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG u_regs[UREG_I0]
+#define __PT_SP_REG u_regs[UREG_FP]
/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
-#define PT_REGS_IP(x) ((x)->tpc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
+#define __PT_IP_REG tpc
#else
-#define PT_REGS_IP(x) ((x)->pc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
+#define __PT_IP_REG pc
+#endif
+
+#elif defined(bpf_target_riscv)
+
+/*
+ * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
+ */
+
+/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG a0
+#define __PT_PARM2_REG a1
+#define __PT_PARM3_REG a2
+#define __PT_PARM4_REG a3
+#define __PT_PARM5_REG a4
+#define __PT_PARM6_REG a5
+#define __PT_PARM7_REG a6
+#define __PT_PARM8_REG a7
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG ra
+#define __PT_FP_REG s0
+#define __PT_RC_REG a0
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
+
+#elif defined(bpf_target_arc)
+
+/*
+ * Section "Function Calling Sequence" (page 24):
+ * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf
+ */
+
+/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG scratch.r0
+#define __PT_PARM2_REG scratch.r1
+#define __PT_PARM3_REG scratch.r2
+#define __PT_PARM4_REG scratch.r3
+#define __PT_PARM5_REG scratch.r4
+#define __PT_PARM6_REG scratch.r5
+#define __PT_PARM7_REG scratch.r6
+#define __PT_PARM8_REG scratch.r7
+
+/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG scratch.blink
+#define __PT_FP_REG scratch.fp
+#define __PT_RC_REG scratch.r0
+#define __PT_SP_REG scratch.sp
+#define __PT_IP_REG scratch.ret
+
+#elif defined(bpf_target_loongarch)
+
+/*
+ * https://docs.kernel.org/loongarch/introduction.html
+ * https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
+ */
+
+/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
+#define __PT_PARM1_REG regs[4]
+#define __PT_PARM2_REG regs[5]
+#define __PT_PARM3_REG regs[6]
+#define __PT_PARM4_REG regs[7]
+#define __PT_PARM5_REG regs[8]
+#define __PT_PARM6_REG regs[9]
+#define __PT_PARM7_REG regs[10]
+#define __PT_PARM8_REG regs[11]
+
+/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG regs[1]
+#define __PT_FP_REG regs[22]
+#define __PT_RC_REG regs[4]
+#define __PT_SP_REG regs[3]
+#define __PT_IP_REG csr_era
+
#endif
+#if defined(bpf_target_defined)
+
+struct pt_regs;
+
+/* allow some architectures to override `struct pt_regs` */
+#ifndef __PT_REGS_CAST
+#define __PT_REGS_CAST(x) (x)
#endif
+/*
+ * Different architectures support different number of arguments passed
+ * through registers. i386 supports just 3, some arches support up to 8.
+ */
+#ifndef __PT_PARM4_REG
+#define __PT_PARM4_REG __unsupported__
+#endif
+#ifndef __PT_PARM5_REG
+#define __PT_PARM5_REG __unsupported__
+#endif
+#ifndef __PT_PARM6_REG
+#define __PT_PARM6_REG __unsupported__
+#endif
+#ifndef __PT_PARM7_REG
+#define __PT_PARM7_REG __unsupported__
+#endif
+#ifndef __PT_PARM8_REG
+#define __PT_PARM8_REG __unsupported__
+#endif
+/*
+ * Similarly, syscall-specific conventions might differ between function call
+ * conventions within each architecutre. All supported architectures pass
+ * either 6 or 7 syscall arguments in registers.
+ *
+ * See syscall(2) manpage for succinct table with information on each arch.
+ */
+#ifndef __PT_PARM7_SYSCALL_REG
+#define __PT_PARM7_SYSCALL_REG __unsupported__
+#endif
+
+#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG)
+#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG)
+#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG)
+#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG)
+#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG)
+#define PT_REGS_PARM6(x) (__PT_REGS_CAST(x)->__PT_PARM6_REG)
+#define PT_REGS_PARM7(x) (__PT_REGS_CAST(x)->__PT_PARM7_REG)
+#define PT_REGS_PARM8(x) (__PT_REGS_CAST(x)->__PT_PARM8_REG)
+#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG)
+#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG)
+#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG)
+#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG)
+#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG)
+#define PT_REGS_PARM6_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_REG)
+#define PT_REGS_PARM7_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_REG)
+#define PT_REGS_PARM8_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM8_REG)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG)
+
#if defined(bpf_target_powerpc)
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+
#elif defined(bpf_target_sparc)
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+
#else
+
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
- (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+
+#endif
+
+#ifndef PT_REGS_PARM1_SYSCALL
+#define PT_REGS_PARM1_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM1_SYSCALL_REG)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM2_SYSCALL
+#define PT_REGS_PARM2_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM2_SYSCALL_REG)
+#define PT_REGS_PARM2_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM3_SYSCALL
+#define PT_REGS_PARM3_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM3_SYSCALL_REG)
+#define PT_REGS_PARM3_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM4_SYSCALL
+#define PT_REGS_PARM4_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM4_SYSCALL_REG)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM5_SYSCALL
+#define PT_REGS_PARM5_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM5_SYSCALL_REG)
+#define PT_REGS_PARM5_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM6_SYSCALL
+#define PT_REGS_PARM6_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM6_SYSCALL_REG)
+#define PT_REGS_PARM6_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM7_SYSCALL
+#define PT_REGS_PARM7_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM7_SYSCALL_REG)
+#define PT_REGS_PARM7_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_SYSCALL_REG)
#endif
+#else /* defined(bpf_target_defined) */
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM8(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM8_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* defined(bpf_target_defined) */
+
+/*
+ * When invoked from a syscall handler kprobe, returns a pointer to a
+ * struct pt_regs containing syscall arguments and suitable for passing to
+ * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL().
+ */
+#ifndef PT_REGS_SYSCALL_REGS
+/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx))
+#endif
+
+#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
-#define ___bpf_narg(...) \
- ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
- ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
-
-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
- ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
/*
* BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
@@ -336,7 +664,7 @@ struct pt_regs;
*/
#define BPF_PROG(name, args...) \
name(unsigned long long *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args); \
typeof(name(0)) name(unsigned long long *ctx) \
{ \
@@ -345,24 +673,128 @@ typeof(name(0)) name(unsigned long long *ctx) \
return ____##name(___bpf_ctx_cast(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
+#ifndef ___bpf_nth2
+#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
+ _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
+#endif
+#ifndef ___bpf_narg2
+#define ___bpf_narg2(...) \
+ ___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \
+ 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
+#endif
+
+#define ___bpf_treg_cnt(t) \
+ __builtin_choose_expr(sizeof(t) == 1, 1, \
+ __builtin_choose_expr(sizeof(t) == 2, 1, \
+ __builtin_choose_expr(sizeof(t) == 4, 1, \
+ __builtin_choose_expr(sizeof(t) == 8, 1, \
+ __builtin_choose_expr(sizeof(t) == 16, 2, \
+ (void)0)))))
+
+#define ___bpf_reg_cnt0() (0)
+#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
+
+#define ___bpf_union_arg(t, x, n) \
+ __builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
+ (void)0)))))
+
+#define ___bpf_ctx_arg0(n, args...)
+#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
+#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
+#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
+#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
+#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
+#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
+#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
+#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
+#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
+#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
+#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
+#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
+#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
+
+#define ___bpf_ctx_decl0()
+#define ___bpf_ctx_decl1(t, x) , t x
+#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args)
+#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args)
+#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args)
+#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args)
+#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args)
+#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args)
+#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args)
+#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args)
+#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args)
+#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args)
+#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args)
+#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
+
+/*
+ * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
+ * arguments. Since each struct argument might take one or two u64 values
+ * in the trampoline stack, argument type size is needed to place proper number
+ * of u64 values for each argument. Therefore, BPF_PROG2 has different
+ * syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
+ *
+ * int BPF_PROG(test2, int a, int b) { ... }
+ *
+ * the corresponding BPF_PROG2 syntax is:
+ *
+ * int BPF_PROG2(test2, int, a, int, b) { ... }
+ *
+ * where type and the corresponding argument name are separated by comma.
+ *
+ * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
+ * than 8 bytes:
+ *
+ * int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
+ * int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+ * {
+ * // access a, b, c, d, e, and ret directly
+ * ...
+ * }
+ */
+#define BPF_PROG2(name, args...) \
+name(unsigned long long *ctx); \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \
+typeof(name(0)) name(unsigned long long *ctx) \
+{ \
+ return ____##name(ctx ___bpf_ctx_arg(args)); \
+} \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
+
struct pt_regs;
-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
- ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
- ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
- ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
- ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
- ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
- ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (void *)PT_REGS_PARM6(ctx)
+#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (void *)PT_REGS_PARM7(ctx)
+#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (void *)PT_REGS_PARM8(ctx)
+#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
@@ -376,7 +808,7 @@ struct pt_regs;
*/
#define BPF_KPROBE(name, args...) \
name(struct pt_regs *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -385,14 +817,12 @@ typeof(name(0)) name(struct pt_regs *ctx) \
return ____##name(___bpf_kprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_args1(x) \
- ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
-#define ___bpf_kretprobe_args(args...) \
- ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
@@ -402,7 +832,7 @@ ____##name(struct pt_regs *ctx, ##args)
*/
#define BPF_KRETPROBE(name, args...) \
name(struct pt_regs *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -413,20 +843,80 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */
+#define ___bpf_syscall_args0() ctx
+#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
+#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (void *)PT_REGS_PARM6_SYSCALL(regs)
+#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (void *)PT_REGS_PARM7_SYSCALL(regs)
+#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+
+/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
+#define ___bpf_syswrap_args0() ctx
+#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (void *)PT_REGS_PARM6_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (void *)PT_REGS_PARM7_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)
+
/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
+ * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for
+ * tracing syscall functions, like __x64_sys_close. It hides the underlying
+ * platform-specific low-level way of getting syscall input arguments from
+ * struct pt_regs, and provides a familiar typed and named function arguments
+ * syntax and semantics of accessing syscall input parameters.
+ *
+ * Original struct pt_regs * context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ *
+ * At the moment BPF_KSYSCALL does not transparently handle all the calling
+ * convention quirks for the following syscalls:
+ *
+ * - mmap(): __ARCH_WANT_SYS_OLD_MMAP.
+ * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and
+ * CONFIG_CLONE_BACKWARDS3.
+ * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL.
+ * - compat syscalls.
+ *
+ * This may or may not change in the future. User needs to take extra measures
+ * to handle such quirks explicitly, if necessary.
+ *
+ * This macro relies on BPF CO-RE support and virtual __kconfig externs.
+ */
+#define BPF_KSYSCALL(name, args...) \
+name(struct pt_regs *ctx); \
+extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \
+static __always_inline typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \
+ ? (struct pt_regs *)PT_REGS_PARM1(ctx) \
+ : ctx; \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ if (LINUX_HAS_SYSCALL_WRAPPER) \
+ return ____##name(___bpf_syswrap_args(args)); \
+ else \
+ return ____##name(___bpf_syscall_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
+
+/* BPF_UPROBE and BPF_URETPROBE are identical to BPF_KPROBE and BPF_KRETPROBE,
+ * but are named way less confusingly for SEC("uprobe") and SEC("uretprobe")
+ * use cases.
*/
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
- ({ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[] = { args }; \
- _Pragma("GCC diagnostic pop") \
- int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
- ___ret; \
- })
+#define BPF_UPROBE(name, args...) BPF_KPROBE(name, ##args)
+#define BPF_URETPROBE(name, args...) BPF_KRETPROBE(name, ##args)
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 231b07203e3d..2d0840ef599a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,6 +21,7 @@
#include "libbpf.h"
#include "libbpf_internal.h"
#include "hashmap.h"
+#include "strset.h"
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -56,7 +57,7 @@ struct btf {
* representation is broken up into three independently allocated
* memory regions to be able to modify them independently.
* raw_data is nulled out at that point, but can be later allocated
- * and cached again if user calls btf__get_raw_data(), at which point
+ * and cached again if user calls btf__raw_data(), at which point
* raw_data will contain a contiguous copy of header, types, and
* strings:
*
@@ -67,7 +68,7 @@ struct btf {
* | | |
* hdr | |
* types_data----+ |
- * strs_data------------------+
+ * strset__data(strs_set)-----+
*
* +----------+---------+-----------+
* | Header | Types | Strings |
@@ -78,18 +79,43 @@ struct btf {
void *types_data;
size_t types_data_cap; /* used size stored in hdr->type_len */
- /* type ID to `struct btf_type *` lookup index */
+ /* type ID to `struct btf_type *` lookup index
+ * type_offs[0] corresponds to the first non-VOID type:
+ * - for base BTF it's type [1];
+ * - for split BTF it's the first non-base BTF type.
+ */
__u32 *type_offs;
size_t type_offs_cap;
+ /* number of types in this BTF instance:
+ * - doesn't include special [0] void type;
+ * - for split BTF counts number of types added on top of base BTF.
+ */
__u32 nr_types;
+ /* if not NULL, points to the base BTF on top of which the current
+ * split BTF is based
+ */
+ struct btf *base_btf;
+ /* BTF type ID of the first type in this BTF instance:
+ * - for base BTF it's equal to 1;
+ * - for split BTF it's equal to biggest type ID of base BTF plus 1.
+ */
+ int start_id;
+ /* logical string offset of this BTF instance:
+ * - for base BTF it's equal to 0;
+ * - for split BTF it's equal to total size of base BTF's string section size.
+ */
+ int start_str_off;
+ /* only one of strs_data or strs_set can be non-NULL, depending on
+ * whether BTF is in a modifiable state (strs_set is used) or not
+ * (strs_data points inside raw_data)
+ */
void *strs_data;
- size_t strs_data_cap; /* used size stored in hdr->str_len */
-
- /* lookup index for each unique string in strings section */
- struct hashmap *strs_hash;
+ /* a set of unique strings */
+ struct strset *strs_set;
/* whether strings are already deduplicated */
bool strs_deduped;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -104,7 +130,7 @@ static inline __u64 ptr_to_u64(const void *ptr)
/* Ensure given dynamically allocated memory region pointed to by *data* with
* capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
- * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements
* are already used. At most *max_cnt* elements can be ever allocated.
* If necessary, memory is reallocated and all existing data is copied over,
* new pointer to the memory region is stored at *data, new memory region
@@ -112,8 +138,8 @@ static inline __u64 ptr_to_u64(const void *ptr)
* On success, memory pointer to the beginning of unused memory is returned.
* On error, NULL is returned.
*/
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt)
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt)
{
size_t new_cnt;
void *new_data;
@@ -149,26 +175,31 @@ void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
/* Ensure given dynamically allocated memory region has enough allocated space
* to accommodate *need_cnt* elements of size *elem_sz* bytes each
*/
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
{
void *p;
if (need_cnt <= *cap_cnt)
return 0;
- p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+ p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
if (!p)
return -ENOMEM;
return 0;
}
+static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt)
+{
+ return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+ btf->nr_types, BTF_MAX_NR_TYPES, add_cnt);
+}
+
static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
{
__u32 *p;
- p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
- btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+ p = btf_add_type_offs_mem(btf, 1);
if (!p)
return -ENOMEM;
@@ -205,32 +236,29 @@ static int btf_parse_hdr(struct btf *btf)
}
btf_bswap_hdr(hdr);
} else if (hdr->magic != BTF_MAGIC) {
- pr_debug("Invalid BTF magic:%x\n", hdr->magic);
+ pr_debug("Invalid BTF magic: %x\n", hdr->magic);
return -EINVAL;
}
- meta_left = btf->raw_size - sizeof(*hdr);
- if (!meta_left) {
- pr_debug("BTF has no data\n");
+ if (btf->raw_size < hdr->hdr_len) {
+ pr_debug("BTF header len %u larger than data size %u\n",
+ hdr->hdr_len, btf->raw_size);
return -EINVAL;
}
- if (meta_left < hdr->type_off) {
- pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
+ meta_left = btf->raw_size - hdr->hdr_len;
+ if (meta_left < (long long)hdr->str_off + hdr->str_len) {
+ pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
return -EINVAL;
}
- if (meta_left < hdr->str_off) {
- pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
+ if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
+ pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
+ hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
return -EINVAL;
}
- if (hdr->type_off >= hdr->str_off) {
- pr_debug("BTF type section offset >= string section offset. No type?\n");
- return -EINVAL;
- }
-
- if (hdr->type_off & 0x02) {
+ if (hdr->type_off % 4) {
pr_debug("BTF type section is not aligned to 4 bytes\n");
return -EINVAL;
}
@@ -244,12 +272,16 @@ static int btf_parse_str_sec(struct btf *btf)
const char *start = btf->strs_data;
const char *end = start + btf->hdr->str_len;
- if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
- start[0] || end[-1]) {
+ if (btf->base_btf && hdr->str_len == 0)
+ return 0;
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
+ pr_debug("Invalid BTF string section\n");
+ return -EINVAL;
+ }
+ if (!btf->base_btf && start[0]) {
pr_debug("Invalid BTF string section\n");
return -EINVAL;
}
-
return 0;
}
@@ -266,11 +298,15 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
@@ -282,6 +318,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(struct btf_var);
case BTF_KIND_DATASEC:
return base_size + vlen * sizeof(struct btf_var_secinfo);
+ case BTF_KIND_DECL_TAG:
+ return base_size + sizeof(struct btf_decl_tag);
default:
pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
return -EINVAL;
@@ -298,6 +336,7 @@ static void btf_bswap_type_base(struct btf_type *t)
static int btf_bswap_type_rest(struct btf_type *t)
{
struct btf_var_secinfo *v;
+ struct btf_enum64 *e64;
struct btf_member *m;
struct btf_array *a;
struct btf_param *p;
@@ -313,6 +352,8 @@ static int btf_bswap_type_rest(struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return 0;
case BTF_KIND_INT:
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -323,6 +364,13 @@ static int btf_bswap_type_rest(struct btf_type *t)
e->val = bswap_32(e->val);
}
return 0;
+ case BTF_KIND_ENUM64:
+ for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) {
+ e64->name_off = bswap_32(e64->name_off);
+ e64->val_lo32 = bswap_32(e64->val_lo32);
+ e64->val_hi32 = bswap_32(e64->val_hi32);
+ }
+ return 0;
case BTF_KIND_ARRAY:
a = btf_array(t);
a->type = bswap_32(a->type);
@@ -353,6 +401,9 @@ static int btf_bswap_type_rest(struct btf_type *t)
v->size = bswap_32(v->size);
}
return 0;
+ case BTF_KIND_DECL_TAG:
+ btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx);
+ return 0;
default:
pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
return -EINVAL;
@@ -364,19 +415,9 @@ static int btf_parse_type_sec(struct btf *btf)
struct btf_header *hdr = btf->hdr;
void *next_type = btf->types_data;
void *end_type = next_type + hdr->type_len;
- int err, i = 0, type_size;
-
- /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
- * so ensure we can never properly use its offset from index by
- * setting it to a large value
- */
- err = btf_add_type_idx_entry(btf, UINT_MAX);
- if (err)
- return err;
+ int err, type_size;
while (next_type + sizeof(struct btf_type) <= end_type) {
- i++;
-
if (btf->swapped_endian)
btf_bswap_type_base(next_type);
@@ -384,7 +425,7 @@ static int btf_parse_type_sec(struct btf *btf)
if (type_size < 0)
return type_size;
if (next_type + type_size > end_type) {
- pr_warn("BTF type [%d] is malformed\n", i);
+ pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types);
return -EINVAL;
}
@@ -407,47 +448,230 @@ static int btf_parse_type_sec(struct btf *btf)
return 0;
}
-__u32 btf__get_nr_types(const struct btf *btf)
+static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id)
+{
+ const char *s;
+
+ s = btf__str_by_offset(btf, str_off);
+ if (!s) {
+ pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id)
+{
+ const struct btf_type *t;
+
+ t = btf__type_by_id(btf, id);
+ if (!t) {
+ pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id)
+{
+ __u32 kind = btf_kind(t);
+ int err, i, n;
+
+ err = btf_validate_str(btf, t->name_off, "type name", id);
+ if (err)
+ return err;
+
+ switch (kind) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_INT:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
+ break;
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
+ err = btf_validate_id(btf, t->type, id);
+ if (err)
+ return err;
+ break;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+
+ err = btf_validate_id(btf, a->type, id);
+ err = err ?: btf_validate_id(btf, a->index_type, id);
+ if (err)
+ return err;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "field name", id);
+ err = err ?: btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ const struct btf_enum *m = btf_enum(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "enum name", id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *m = btf_enum64(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "enum name", id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC: {
+ const struct btf_type *ft;
+
+ err = btf_validate_id(btf, t->type, id);
+ if (err)
+ return err;
+ ft = btf__type_by_id(btf, t->type);
+ if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) {
+ pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type);
+ return -EINVAL;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m = btf_params(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "param name", id);
+ err = err ?: btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ default:
+ pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate basic sanity of BTF. It's intentionally less thorough than
+ * kernel's validation and validates only properties of BTF that libbpf relies
+ * on to be correct (e.g., valid type IDs, valid string offsets, etc)
+ */
+static int btf_sanity_check(const struct btf *btf)
+{
+ const struct btf_type *t;
+ __u32 i, n = btf__type_cnt(btf);
+ int err;
+
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ err = btf_validate_type(btf, t, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+__u32 btf__type_cnt(const struct btf *btf)
{
- return btf->nr_types;
+ return btf->start_id + btf->nr_types;
+}
+
+const struct btf *btf__base_btf(const struct btf *btf)
+{
+ return btf->base_btf;
}
/* internal helper returning non-const pointer to a type */
-static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id == 0)
return &btf_void;
-
- return btf->types_data + btf->type_offs[type_id];
+ if (type_id < btf->start_id)
+ return btf_type_by_id(btf->base_btf, type_id);
+ return btf->types_data + btf->type_offs[type_id - btf->start_id];
}
const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
{
- if (type_id > btf->nr_types)
- return NULL;
+ if (type_id >= btf->start_id + btf->nr_types)
+ return errno = EINVAL, NULL;
return btf_type_by_id((struct btf *)btf, type_id);
}
static int determine_ptr_size(const struct btf *btf)
{
+ static const char * const long_aliases[] = {
+ "long",
+ "long int",
+ "int long",
+ "unsigned long",
+ "long unsigned",
+ "unsigned long int",
+ "unsigned int long",
+ "long unsigned int",
+ "long int unsigned",
+ "int unsigned long",
+ "int long unsigned",
+ };
const struct btf_type *t;
const char *name;
- int i;
+ int i, j, n;
+
+ if (btf->base_btf && btf->base_btf->ptr_sz > 0)
+ return btf->base_btf->ptr_sz;
- for (i = 1; i <= btf->nr_types; i++) {
+ n = btf__type_cnt(btf);
+ for (i = 1; i < n; i++) {
t = btf__type_by_id(btf, i);
if (!btf_is_int(t))
continue;
+ if (t->size != 4 && t->size != 8)
+ continue;
+
name = btf__name_by_offset(btf, t->name_off);
if (!name)
continue;
- if (strcmp(name, "long int") == 0 ||
- strcmp(name, "long unsigned int") == 0) {
- if (t->size != 4 && t->size != 8)
- continue;
- return t->size;
+ for (j = 0; j < ARRAY_SIZE(long_aliases); j++) {
+ if (strcmp(name, long_aliases[j]) == 0)
+ return t->size;
}
}
@@ -487,16 +711,16 @@ size_t btf__pointer_size(const struct btf *btf)
int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
{
if (ptr_sz != 4 && ptr_sz != 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->ptr_sz = ptr_sz;
return 0;
}
static bool is_host_big_endian(void)
{
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return false;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return true;
#else
# error "Unrecognized __BYTE_ORDER__"
@@ -514,7 +738,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
{
if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
if (!btf->swapped_endian) {
@@ -545,14 +769,15 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
int i;
t = btf__type_by_id(btf, type_id);
- for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
- i++) {
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_DATASEC:
+ case BTF_KIND_FLOAT:
size = t->size;
goto done;
case BTF_KIND_PTR:
@@ -563,17 +788,19 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
type_id = t->type;
break;
case BTF_KIND_ARRAY:
array = btf_array(t);
if (nelems && array->nelems > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
nelems *= array->nelems;
type_id = array->type;
break;
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
t = btf__type_by_id(btf, type_id);
@@ -581,9 +808,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
done:
if (size < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (nelems && size > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
return nelems * size;
}
@@ -596,6 +823,8 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
return btf_ptr_sz(btf);
@@ -603,6 +832,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
return btf__align_of(btf, t->type);
case BTF_KIND_ARRAY:
return btf__align_of(btf, btf_array(t)->type);
@@ -615,15 +845,28 @@ int btf__align_of(const struct btf *btf, __u32 id)
for (i = 0; i < vlen; i++, m++) {
align = btf__align_of(btf, m->type);
if (align <= 0)
- return align;
+ return libbpf_err(align);
max_align = max(max_align, align);
+
+ /* if field offset isn't aligned according to field
+ * type's alignment, then struct must be packed
+ */
+ if (btf_member_bitfield_size(t, i) == 0 &&
+ (m->offset % (8 * align)) != 0)
+ return 1;
}
+ /* if struct/union size isn't a multiple of its alignment,
+ * then struct must be packed
+ */
+ if ((t->size % max_align) != 0)
+ return 1;
+
return max_align;
}
default:
pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
- return 0;
+ return errno = EINVAL, 0;
}
}
@@ -642,19 +885,19 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
}
if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return type_id;
}
__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
{
- __u32 i;
+ __u32 i, nr_types = btf__type_cnt(btf);
if (!strcmp(type_name, "void"))
return 0;
- for (i = 1; i <= btf->nr_types; i++) {
+ for (i = 1; i < nr_types; i++) {
const struct btf_type *t = btf__type_by_id(btf, i);
const char *name = btf__name_by_offset(btf, t->name_off);
@@ -662,18 +905,18 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
-__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
- __u32 kind)
+static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
+ const char *type_name, __u32 kind)
{
- __u32 i;
+ __u32 i, nr_types = btf__type_cnt(btf);
if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
return 0;
- for (i = 1; i <= btf->nr_types; i++) {
+ for (i = start_id; i < nr_types; i++) {
const struct btf_type *t = btf__type_by_id(btf, i);
const char *name;
@@ -684,7 +927,19 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
+}
+
+__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
+ __u32 kind)
+{
+ return btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
+}
+
+__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
+ __u32 kind)
+{
+ return btf_find_by_name_kind(btf, 1, type_name, kind);
}
static bool btf_is_modifiable(const struct btf *btf)
@@ -709,7 +964,7 @@ void btf__free(struct btf *btf)
*/
free(btf->hdr);
free(btf->types_data);
- free(btf->strs_data);
+ strset__free(btf->strs_set);
}
free(btf->raw_data);
free(btf->raw_data_swapped);
@@ -717,7 +972,7 @@ void btf__free(struct btf *btf)
free(btf);
}
-struct btf *btf__new_empty(void)
+static struct btf *btf_new_empty(struct btf *base_btf)
{
struct btf *btf;
@@ -725,12 +980,21 @@ struct btf *btf__new_empty(void)
if (!btf)
return ERR_PTR(-ENOMEM);
+ btf->nr_types = 0;
+ btf->start_id = 1;
+ btf->start_str_off = 0;
btf->fd = -1;
btf->ptr_sz = sizeof(void *);
btf->swapped_endian = false;
+ if (base_btf) {
+ btf->base_btf = base_btf;
+ btf->start_id = btf__type_cnt(base_btf);
+ btf->start_str_off = base_btf->hdr->str_len;
+ }
+
/* +1 for empty string at offset 0 */
- btf->raw_size = sizeof(struct btf_header) + 1;
+ btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
btf->raw_data = calloc(1, btf->raw_size);
if (!btf->raw_data) {
free(btf);
@@ -744,12 +1008,22 @@ struct btf *btf__new_empty(void)
btf->types_data = btf->raw_data + btf->hdr->hdr_len;
btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
- btf->hdr->str_len = 1; /* empty string at offset 0 */
+ btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
return btf;
}
-struct btf *btf__new(const void *data, __u32 size)
+struct btf *btf__new_empty(void)
+{
+ return libbpf_ptr(btf_new_empty(NULL));
+}
+
+struct btf *btf__new_empty_split(struct btf *base_btf)
+{
+ return libbpf_ptr(btf_new_empty(base_btf));
+}
+
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
{
struct btf *btf;
int err;
@@ -758,6 +1032,17 @@ struct btf *btf__new(const void *data, __u32 size)
if (!btf)
return ERR_PTR(-ENOMEM);
+ btf->nr_types = 0;
+ btf->start_id = 1;
+ btf->start_str_off = 0;
+ btf->fd = -1;
+
+ if (base_btf) {
+ btf->base_btf = base_btf;
+ btf->start_id = btf__type_cnt(base_btf);
+ btf->start_str_off = base_btf->hdr->str_len;
+ }
+
btf->raw_data = malloc(size);
if (!btf->raw_data) {
err = -ENOMEM;
@@ -776,11 +1061,10 @@ struct btf *btf__new(const void *data, __u32 size)
err = btf_parse_str_sec(btf);
err = err ?: btf_parse_type_sec(btf);
+ err = err ?: btf_sanity_check(btf);
if (err)
goto done;
- btf->fd = -1;
-
done:
if (err) {
btf__free(btf);
@@ -790,7 +1074,18 @@ done:
return btf;
}
-struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__new(const void *data, __u32 size)
+{
+ return libbpf_ptr(btf_new(data, size, NULL));
+}
+
+struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
+{
+ return libbpf_ptr(btf_new(data, size, base_btf));
+}
+
+static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
+ struct btf_ext **btf_ext)
{
Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
int err = 0, fd = -1, idx = 0;
@@ -798,13 +1093,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
Elf_Scn *scn = NULL;
Elf *elf = NULL;
GElf_Ehdr ehdr;
+ size_t shstrndx;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn("failed to init libelf for %s\n", path);
return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
}
- fd = open(path, O_RDONLY);
+ fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
err = -errno;
pr_warn("failed to open %s: %s\n", path, strerror(errno));
@@ -822,7 +1118,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
pr_warn("failed to get EHDR from %s\n", path);
goto done;
}
- if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+
+ if (elf_getshdrstrndx(elf, &shstrndx)) {
+ pr_warn("failed to get section names section index for %s\n",
+ path);
+ goto done;
+ }
+
+ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) {
pr_warn("failed to get e_shstrndx from %s\n", path);
goto done;
}
@@ -837,7 +1140,7 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
idx, path);
goto done;
}
- name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ name = elf_strptr(elf, shstrndx, sh.sh_name);
if (!name) {
pr_warn("failed to get section(%d) name from %s\n",
idx, path);
@@ -862,14 +1165,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
}
}
- err = 0;
-
if (!btf_data) {
- err = -ENOENT;
+ pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
+ err = -ENODATA;
goto done;
}
- btf = btf__new(btf_data->d_buf, btf_data->d_size);
- if (IS_ERR(btf))
+ btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
+ err = libbpf_get_error(btf);
+ if (err)
goto done;
switch (gelf_getclass(elf)) {
@@ -885,9 +1188,9 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
}
if (btf_ext && btf_ext_data) {
- *btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(*btf_ext))
+ *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(*btf_ext);
+ if (err)
goto done;
} else if (btf_ext) {
*btf_ext = NULL;
@@ -897,23 +1200,27 @@ done:
elf_end(elf);
close(fd);
- if (err)
- return ERR_PTR(err);
- /*
- * btf is always parsed before btf_ext, so no need to clean up
- * btf_ext, if btf loading failed
- */
- if (IS_ERR(btf))
+ if (!err)
return btf;
- if (btf_ext && IS_ERR(*btf_ext)) {
- btf__free(btf);
- err = PTR_ERR(*btf_ext);
- return ERR_PTR(err);
- }
- return btf;
+
+ if (btf_ext)
+ btf_ext__free(*btf_ext);
+ btf__free(btf);
+
+ return ERR_PTR(err);
}
-struct btf *btf__parse_raw(const char *path)
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+ return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
+}
+
+struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
+{
+ return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
+}
+
+static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
{
struct btf *btf = NULL;
void *data = NULL;
@@ -922,7 +1229,7 @@ struct btf *btf__parse_raw(const char *path)
int err = 0;
long sz;
- f = fopen(path, "rb");
+ f = fopen(path, "rbe");
if (!f) {
err = -errno;
goto err_out;
@@ -967,7 +1274,7 @@ struct btf *btf__parse_raw(const char *path)
}
/* finally parse BTF data */
- btf = btf__new(data, sz);
+ btf = btf_new(data, sz, base_btf);
err_out:
free(data);
@@ -976,162 +1283,129 @@ err_out:
return err ? ERR_PTR(err) : btf;
}
-struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__parse_raw(const char *path)
{
- struct btf *btf;
-
- if (btf_ext)
- *btf_ext = NULL;
-
- btf = btf__parse_raw(path);
- if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
- return btf;
-
- return btf__parse_elf(path, btf_ext);
+ return libbpf_ptr(btf_parse_raw(path, NULL));
}
-static int compare_vsi_off(const void *_a, const void *_b)
+struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
{
- const struct btf_var_secinfo *a = _a;
- const struct btf_var_secinfo *b = _b;
-
- return a->offset - b->offset;
+ return libbpf_ptr(btf_parse_raw(path, base_btf));
}
-static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
- struct btf_type *t)
+static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
- __u32 size = 0, off = 0, i, vars = btf_vlen(t);
- const char *name = btf__name_by_offset(btf, t->name_off);
- const struct btf_type *t_var;
- struct btf_var_secinfo *vsi;
- const struct btf_var *var;
- int ret;
-
- if (!name) {
- pr_debug("No name found in string section for DATASEC kind.\n");
- return -ENOENT;
- }
-
- /* .extern datasec size and var offsets were set correctly during
- * extern collection step, so just skip straight to sorting variables
- */
- if (t->size)
- goto sort_vars;
-
- ret = bpf_object__section_size(obj, name, &size);
- if (ret || !size || (t->size && t->size != size)) {
- pr_debug("Invalid size for section %s: %u bytes\n", name, size);
- return -ENOENT;
- }
-
- t->size = size;
-
- for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
- t_var = btf__type_by_id(btf, vsi->type);
- var = btf_var(t_var);
-
- if (!btf_is_var(t_var)) {
- pr_debug("Non-VAR type seen in section %s\n", name);
- return -EINVAL;
- }
-
- if (var->linkage == BTF_VAR_STATIC)
- continue;
-
- name = btf__name_by_offset(btf, t_var->name_off);
- if (!name) {
- pr_debug("No name found in string section for VAR kind\n");
- return -ENOENT;
- }
-
- ret = bpf_object__variable_offset(obj, name, &off);
- if (ret) {
- pr_debug("No offset found in symbol table for VAR %s\n",
- name);
- return -ENOENT;
- }
+ struct btf *btf;
+ int err;
- vsi->offset = off;
- }
+ if (btf_ext)
+ *btf_ext = NULL;
-sort_vars:
- qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
- return 0;
+ btf = btf_parse_raw(path, base_btf);
+ err = libbpf_get_error(btf);
+ if (!err)
+ return btf;
+ if (err != -EPROTO)
+ return ERR_PTR(err);
+ return btf_parse_elf(path, base_btf, btf_ext);
}
-int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
{
- int err = 0;
- __u32 i;
-
- for (i = 1; i <= btf->nr_types; i++) {
- struct btf_type *t = btf_type_by_id(btf, i);
-
- /* Loader needs to fix up some of the things compiler
- * couldn't get its hands on while emitting BTF. This
- * is section size and global variable offset. We use
- * the info from the ELF itself for this purpose.
- */
- if (btf_is_datasec(t)) {
- err = btf_fixup_datasec(obj, btf, t);
- if (err)
- break;
- }
- }
+ return libbpf_ptr(btf_parse(path, NULL, btf_ext));
+}
- return err;
+struct btf *btf__parse_split(const char *path, struct btf *base_btf)
+{
+ return libbpf_ptr(btf_parse(path, base_btf, NULL));
}
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf__load(struct btf *btf)
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd)
{
- __u32 log_buf_size = 0, raw_size;
- char *log_buf = NULL;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ __u32 buf_sz = 0, raw_size;
+ char *buf = NULL, *tmp;
void *raw_data;
int err = 0;
if (btf->fd >= 0)
- return -EEXIST;
-
-retry_load:
- if (log_buf_size) {
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return -ENOMEM;
-
- *log_buf = 0;
- }
+ return libbpf_err(-EEXIST);
+ if (log_sz && !log_buf)
+ return libbpf_err(-EINVAL);
+ /* cache native raw data representation */
raw_data = btf_get_raw_data(btf, &raw_size, false);
if (!raw_data) {
err = -ENOMEM;
goto done;
}
- /* cache native raw data representation */
btf->raw_size = raw_size;
btf->raw_data = raw_data;
- btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
+retry_load:
+ /* if log_level is 0, we won't provide log_buf/log_size to the kernel,
+ * initially. Only if BTF loading fails, we bump log_level to 1 and
+ * retry, using either auto-allocated or custom log_buf. This way
+ * non-NULL custom log_buf provides a buffer just in case, but hopes
+ * for successful load and no need for log_buf.
+ */
+ if (log_level) {
+ /* if caller didn't provide custom log_buf, we'll keep
+ * allocating our own progressively bigger buffers for BTF
+ * verification log
+ */
+ if (!log_buf) {
+ buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2);
+ tmp = realloc(buf, buf_sz);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto done;
+ }
+ buf = tmp;
+ buf[0] = '\0';
+ }
+
+ opts.log_buf = log_buf ? log_buf : buf;
+ opts.log_size = log_buf ? log_sz : buf_sz;
+ opts.log_level = log_level;
+ }
+
+ opts.token_fd = token_fd;
+ if (token_fd)
+ opts.btf_flags |= BPF_F_TOKEN_FD;
+
+ btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
- if (!log_buf || errno == ENOSPC) {
- log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
- log_buf_size << 1);
- free(log_buf);
+ /* time to turn on verbose mode and try again */
+ if (log_level == 0) {
+ log_level = 1;
goto retry_load;
}
+ /* only retry if caller didn't provide custom log_buf, but
+ * make sure we can never overflow buf_sz
+ */
+ if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2)
+ goto retry_load;
err = -errno;
- pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
- if (*log_buf)
- pr_warn("%s\n", log_buf);
- goto done;
+ pr_warn("BTF loading error: %d\n", err);
+ /* don't print out contents of custom log_buf */
+ if (!log_buf && buf[0])
+ pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf);
}
done:
- free(log_buf);
- return err;
+ free(buf);
+ return libbpf_err(err);
+}
+
+int btf__load_into_kernel(struct btf *btf)
+{
+ return btf_load_into_kernel(btf, NULL, 0, 0, 0);
}
int btf__fd(const struct btf *btf)
@@ -1144,6 +1418,11 @@ void btf__set_fd(struct btf *btf, int fd)
btf->fd = fd;
}
+static const void *btf_strs_data(const struct btf *btf)
+{
+ return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set);
+}
+
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
{
struct btf_header *hdr = btf->hdr;
@@ -1171,8 +1450,8 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
memcpy(p, btf->types_data, hdr->type_len);
if (swap_endian) {
- for (i = 1; i <= btf->nr_types; i++) {
- t = p + btf->type_offs[i];
+ for (i = 0; i < btf->nr_types; i++) {
+ t = p + btf->type_offs[i];
/* btf_bswap_type_rest() relies on native t->info, so
* we swap base type info after we swapped all the
* additional information
@@ -1184,7 +1463,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
}
p += hdr->type_len;
- memcpy(p, btf->strs_data, hdr->str_len);
+ memcpy(p, btf_strs_data(btf), hdr->str_len);
p += hdr->str_len;
*size = data_sz;
@@ -1194,7 +1473,7 @@ err_out:
return NULL;
}
-const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
+const void *btf__raw_data(const struct btf *btf_ro, __u32 *size)
{
struct btf *btf = (struct btf *)btf_ro;
__u32 data_sz;
@@ -1202,7 +1481,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
if (!data)
- return NULL;
+ return errno = ENOMEM, NULL;
btf->raw_size = data_sz;
if (btf->swapped_endian)
@@ -1213,12 +1492,17 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
return data;
}
+__attribute__((alias("btf__raw_data")))
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+
const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
{
- if (offset < btf->hdr->str_len)
- return btf->strs_data + offset;
+ if (offset < btf->start_str_off)
+ return btf__str_by_offset(btf->base_btf, offset);
+ else if (offset - btf->start_str_off < btf->hdr->str_len)
+ return btf_strs_data(btf) + (offset - btf->start_str_off);
else
- return NULL;
+ return errno = EINVAL, NULL;
}
const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1226,36 +1510,28 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
return btf__str_by_offset(btf, offset);
}
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
{
- struct bpf_btf_info btf_info = { 0 };
+ struct bpf_btf_info btf_info;
__u32 len = sizeof(btf_info);
__u32 last_size;
- int btf_fd;
+ struct btf *btf;
void *ptr;
int err;
- err = 0;
- *btf = NULL;
- btf_fd = bpf_btf_get_fd_by_id(id);
- if (btf_fd < 0)
- return 0;
-
- /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+ /* we won't know btf_size until we call bpf_btf_get_info_by_fd(). so
* let's start with a sane default - 4KiB here - and resize it only if
- * bpf_obj_get_info_by_fd() needs a bigger buffer.
+ * bpf_btf_get_info_by_fd() needs a bigger buffer.
*/
- btf_info.btf_size = 4096;
- last_size = btf_info.btf_size;
+ last_size = 4096;
ptr = malloc(last_size);
- if (!ptr) {
- err = -ENOMEM;
- goto exit_free;
- }
+ if (!ptr)
+ return ERR_PTR(-ENOMEM);
- memset(ptr, 0, last_size);
+ memset(&btf_info, 0, sizeof(btf_info));
btf_info.btf = ptr_to_u64(ptr);
- err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+ btf_info.btf_size = last_size;
+ err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len);
if (!err && btf_info.btf_size > last_size) {
void *temp_ptr;
@@ -1263,119 +1539,49 @@ int btf__get_from_id(__u32 id, struct btf **btf)
last_size = btf_info.btf_size;
temp_ptr = realloc(ptr, last_size);
if (!temp_ptr) {
- err = -ENOMEM;
+ btf = ERR_PTR(-ENOMEM);
goto exit_free;
}
ptr = temp_ptr;
- memset(ptr, 0, last_size);
+
+ len = sizeof(btf_info);
+ memset(&btf_info, 0, sizeof(btf_info));
btf_info.btf = ptr_to_u64(ptr);
- err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+ btf_info.btf_size = last_size;
+
+ err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len);
}
if (err || btf_info.btf_size > last_size) {
- err = errno;
+ btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG);
goto exit_free;
}
- *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
- if (IS_ERR(*btf)) {
- err = PTR_ERR(*btf);
- *btf = NULL;
- }
+ btf = btf_new(ptr, btf_info.btf_size, base_btf);
exit_free:
- close(btf_fd);
free(ptr);
-
- return err;
+ return btf;
}
-int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
- __u32 expected_key_size, __u32 expected_value_size,
- __u32 *key_type_id, __u32 *value_type_id)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
{
- const struct btf_type *container_type;
- const struct btf_member *key, *value;
- const size_t max_name = 256;
- char container_name[max_name];
- __s64 key_size, value_size;
- __s32 container_id;
-
- if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
- max_name) {
- pr_warn("map:%s length of '____btf_map_%s' is too long\n",
- map_name, map_name);
- return -EINVAL;
- }
-
- container_id = btf__find_by_name(btf, container_name);
- if (container_id < 0) {
- pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
- map_name, container_name);
- return container_id;
- }
-
- container_type = btf__type_by_id(btf, container_id);
- if (!container_type) {
- pr_warn("map:%s cannot find BTF type for container_id:%u\n",
- map_name, container_id);
- return -EINVAL;
- }
-
- if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
- pr_warn("map:%s container_name:%s is an invalid container struct\n",
- map_name, container_name);
- return -EINVAL;
- }
-
- key = btf_members(container_type);
- value = key + 1;
-
- key_size = btf__resolve_size(btf, key->type);
- if (key_size < 0) {
- pr_warn("map:%s invalid BTF key_type_size\n", map_name);
- return key_size;
- }
-
- if (expected_key_size != key_size) {
- pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
- map_name, (__u32)key_size, expected_key_size);
- return -EINVAL;
- }
-
- value_size = btf__resolve_size(btf, value->type);
- if (value_size < 0) {
- pr_warn("map:%s invalid BTF value_type_size\n", map_name);
- return value_size;
- }
-
- if (expected_value_size != value_size) {
- pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
- map_name, (__u32)value_size, expected_value_size);
- return -EINVAL;
- }
-
- *key_type_id = key->type;
- *value_type_id = value->type;
+ struct btf *btf;
+ int btf_fd;
- return 0;
-}
+ btf_fd = bpf_btf_get_fd_by_id(id);
+ if (btf_fd < 0)
+ return libbpf_err_ptr(-errno);
-static size_t strs_hash_fn(const void *key, void *ctx)
-{
- struct btf *btf = ctx;
- const char *str = btf->strs_data + (long)key;
+ btf = btf_get_from_fd(btf_fd, base_btf);
+ close(btf_fd);
- return str_hash(str);
+ return libbpf_ptr(btf);
}
-static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
+struct btf *btf__load_from_kernel_by_id(__u32 id)
{
- struct btf *btf = ctx;
- const char *str1 = btf->strs_data + (long)key1;
- const char *str2 = btf->strs_data + (long)key2;
-
- return strcmp(str1, str2) == 0;
+ return btf__load_from_kernel_by_id_split(id, NULL);
}
static void btf_invalidate_raw_data(struct btf *btf)
@@ -1396,10 +1602,9 @@ static void btf_invalidate_raw_data(struct btf *btf)
*/
static int btf_ensure_modifiable(struct btf *btf)
{
- void *hdr, *types, *strs, *strs_end, *s;
- struct hashmap *hash = NULL;
- long off;
- int err;
+ void *hdr, *types;
+ struct strset *set = NULL;
+ int err = -ENOMEM;
if (btf_is_modifiable(btf)) {
/* any BTF modification invalidates raw_data */
@@ -1410,45 +1615,32 @@ static int btf_ensure_modifiable(struct btf *btf)
/* split raw data into three memory regions */
hdr = malloc(btf->hdr->hdr_len);
types = malloc(btf->hdr->type_len);
- strs = malloc(btf->hdr->str_len);
- if (!hdr || !types || !strs)
+ if (!hdr || !types)
goto err_out;
memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
memcpy(types, btf->types_data, btf->hdr->type_len);
- memcpy(strs, btf->strs_data, btf->hdr->str_len);
/* build lookup index for all strings */
- hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
- if (IS_ERR(hash)) {
- err = PTR_ERR(hash);
- hash = NULL;
+ set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+ if (IS_ERR(set)) {
+ err = PTR_ERR(set);
goto err_out;
}
- strs_end = strs + btf->hdr->str_len;
- for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
- /* hashmap__add() returns EEXIST if string with the same
- * content already is in the hash map
- */
- err = hashmap__add(hash, (void *)off, (void *)off);
- if (err == -EEXIST)
- continue; /* duplicate */
- if (err)
- goto err_out;
- }
-
/* only when everything was successful, update internal state */
btf->hdr = hdr;
btf->types_data = types;
btf->types_data_cap = btf->hdr->type_len;
- btf->strs_data = strs;
- btf->strs_data_cap = btf->hdr->str_len;
- btf->strs_hash = hash;
+ btf->strs_data = NULL;
+ btf->strs_set = set;
/* if BTF was created from scratch, all strings are guaranteed to be
* unique and deduplicated
*/
- btf->strs_deduped = btf->hdr->str_len <= 1;
+ if (btf->hdr->str_len == 0)
+ btf->strs_deduped = true;
+ if (!btf->base_btf && btf->hdr->str_len == 1)
+ btf->strs_deduped = true;
/* invalidate raw_data representation */
btf_invalidate_raw_data(btf);
@@ -1456,17 +1648,10 @@ static int btf_ensure_modifiable(struct btf *btf)
return 0;
err_out:
- hashmap__free(hash);
+ strset__free(set);
free(hdr);
free(types);
- free(strs);
- return -ENOMEM;
-}
-
-static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
-{
- return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
- btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+ return err;
}
/* Find an offset in BTF string section that corresponds to a given string *s*.
@@ -1477,26 +1662,23 @@ static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
*/
int btf__find_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
+ int off;
+
+ if (btf->base_btf) {
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
+ }
/* BTF needs to be in a modifiable state to build string lookup index */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
-
- /* see btf__add_str() for why we do this */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
-
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
+ return libbpf_err(-ENOMEM);
- if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
- return old_off;
+ off = strset__find_str(btf->strs_set, s);
+ if (off < 0)
+ return libbpf_err(off);
- return -ENOENT;
+ return btf->start_str_off + off;
}
/* Add a string s to the BTF string section.
@@ -1506,58 +1688,232 @@ int btf__find_str(struct btf *btf, const char *s)
*/
int btf__add_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
- int err;
+ int off;
+
+ if (btf->base_btf) {
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
+ }
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
- /* Hashmap keys are always offsets within btf->strs_data, so to even
- * look up some string from the "outside", we need to first append it
- * at the end, so that it can be addressed with an offset. Luckily,
- * until btf->hdr->str_len is incremented, that string is just a piece
- * of garbage for the rest of BTF code, so no harm, no foul. On the
- * other hand, if the string is unique, it's already appended and
- * ready to be used, only a simple btf->hdr->str_len increment away.
- */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
+ off = strset__add_str(btf->strs_set, s);
+ if (off < 0)
+ return libbpf_err(off);
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
+ btf->hdr->str_len = strset__data_size(btf->strs_set);
- /* Now attempt to add the string, but only if the string with the same
- * contents doesn't exist already (HASHMAP_ADD strategy). If such
- * string exists, we'll get its offset in old_off (that's old_key).
- */
- err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
- if (err == -EEXIST)
- return old_off; /* duplicated string, return existing offset */
+ return btf->start_str_off + off;
+}
+
+static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+{
+ return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+ btf->hdr->type_len, UINT_MAX, add_sz);
+}
+
+static void btf_type_inc_vlen(struct btf_type *t)
+{
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+}
+
+static int btf_commit_type(struct btf *btf, int data_sz)
+{
+ int err;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
if (err)
- return err;
+ return libbpf_err(err);
- btf->hdr->str_len += len; /* new unique string, adjust data length */
- return new_off;
+ btf->hdr->type_len += data_sz;
+ btf->hdr->str_off += data_sz;
+ btf->nr_types++;
+ return btf->start_id + btf->nr_types - 1;
}
-static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+struct btf_pipe {
+ const struct btf *src;
+ struct btf *dst;
+ struct hashmap *str_off_map; /* map string offsets from src to dst */
+};
+
+static int btf_rewrite_str(__u32 *str_off, void *ctx)
{
- return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
- btf->hdr->type_len, UINT_MAX, add_sz);
+ struct btf_pipe *p = ctx;
+ long mapped_off;
+ int off, err;
+
+ if (!*str_off) /* nothing to do for empty strings */
+ return 0;
+
+ if (p->str_off_map &&
+ hashmap__find(p->str_off_map, *str_off, &mapped_off)) {
+ *str_off = mapped_off;
+ return 0;
+ }
+
+ off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
+ if (off < 0)
+ return off;
+
+ /* Remember string mapping from src to dst. It avoids
+ * performing expensive string comparisons.
+ */
+ if (p->str_off_map) {
+ err = hashmap__append(p->str_off_map, *str_off, off);
+ if (err)
+ return err;
+ }
+
+ *str_off = off;
+ return 0;
}
-static __u32 btf_type_info(int kind, int vlen, int kflag)
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
{
- return (kflag << 31) | (kind << 24) | vlen;
+ struct btf_pipe p = { .src = src_btf, .dst = btf };
+ struct btf_type *t;
+ int sz, err;
+
+ sz = btf_type_size(src_type);
+ if (sz < 0)
+ return libbpf_err(sz);
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return libbpf_err(-ENOMEM);
+
+ memcpy(t, src_type, sz);
+
+ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+ if (err)
+ return libbpf_err(err);
+
+ return btf_commit_type(btf, sz);
}
-static void btf_type_inc_vlen(struct btf_type *t)
+static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
{
- t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+ struct btf *btf = ctx;
+
+ if (!*type_id) /* nothing to do for VOID references */
+ return 0;
+
+ /* we haven't updated btf's type count yet, so
+ * btf->start_id + btf->nr_types - 1 is the type ID offset we should
+ * add to all newly added BTF types
+ */
+ *type_id += btf->start_id + btf->nr_types - 1;
+ return 0;
+}
+
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx);
+
+int btf__add_btf(struct btf *btf, const struct btf *src_btf)
+{
+ struct btf_pipe p = { .src = src_btf, .dst = btf };
+ int data_sz, sz, cnt, i, err, old_strs_len;
+ __u32 *off;
+ void *t;
+
+ /* appending split BTF isn't supported yet */
+ if (src_btf->base_btf)
+ return libbpf_err(-ENOTSUP);
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ /* remember original strings section size if we have to roll back
+ * partial strings section changes
+ */
+ old_strs_len = btf->hdr->str_len;
+
+ data_sz = src_btf->hdr->type_len;
+ cnt = btf__type_cnt(src_btf) - 1;
+
+ /* pre-allocate enough memory for new types */
+ t = btf_add_type_mem(btf, data_sz);
+ if (!t)
+ return libbpf_err(-ENOMEM);
+
+ /* pre-allocate enough memory for type offset index for new types */
+ off = btf_add_type_offs_mem(btf, cnt);
+ if (!off)
+ return libbpf_err(-ENOMEM);
+
+ /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+ p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(p.str_off_map))
+ return libbpf_err(-ENOMEM);
+
+ /* bulk copy types data for all types from src_btf */
+ memcpy(t, src_btf->types_data, data_sz);
+
+ for (i = 0; i < cnt; i++) {
+ sz = btf_type_size(t);
+ if (sz < 0) {
+ /* unlikely, has to be corrupted src_btf */
+ err = sz;
+ goto err_out;
+ }
+
+ /* fill out type ID to type offset mapping for lookups by type ID */
+ *off = t - btf->types_data;
+
+ /* add, dedup, and remap strings referenced by this BTF type */
+ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+ if (err)
+ goto err_out;
+
+ /* remap all type IDs referenced from this BTF type */
+ err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf);
+ if (err)
+ goto err_out;
+
+ /* go to next type data and type offset index entry */
+ t += sz;
+ off++;
+ }
+
+ /* Up until now any of the copied type data was effectively invisible,
+ * so if we exited early before this point due to error, BTF would be
+ * effectively unmodified. There would be extra internal memory
+ * pre-allocated, but it would not be available for querying. But now
+ * that we've copied and rewritten all the data successfully, we can
+ * update type count and various internal offsets and sizes to
+ * "commit" the changes and made them visible to the outside world.
+ */
+ btf->hdr->type_len += data_sz;
+ btf->hdr->str_off += data_sz;
+ btf->nr_types += cnt;
+
+ hashmap__free(p.str_off_map);
+
+ /* return type ID of the first added BTF type */
+ return btf->start_id + btf->nr_types - cnt;
+err_out:
+ /* zero out preallocated memory as if it was just allocated with
+ * libbpf_add_mem()
+ */
+ memset(btf->types_data + btf->hdr->type_len, 0, data_sz);
+ memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
+
+ /* and now restore original strings section size; types data size
+ * wasn't modified, so doesn't need restoring, see big comment above
+ */
+ btf->hdr->str_len = old_strs_len;
+
+ hashmap__free(p.str_off_map);
+
+ return libbpf_err(err);
}
/*
@@ -1572,25 +1928,25 @@ static void btf_type_inc_vlen(struct btf_type *t)
int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
{
struct btf_type *t;
- int sz, err, name_off;
+ int sz, name_off;
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(int);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* if something goes wrong later, we might end up with an extra string,
* but that shouldn't be a problem, because BTF can't be constructed
@@ -1606,14 +1962,48 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
/* set INT info, we don't allow setting legacy bit offset/size */
*(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
+ return btf_commit_type(btf, sz);
+}
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+/*
+ * Append new BTF_KIND_FLOAT type with:
+ * - *name* - non-empty, non-NULL type name;
+ * - *sz* - size of the type, in bytes;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
+{
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return libbpf_err(-EINVAL);
+
+ /* byte_sz must be one of the explicitly allowed values */
+ if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
+ byte_sz != 16)
+ return libbpf_err(-EINVAL);
+
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return libbpf_err(-ENOMEM);
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
+ t->size = byte_sz;
+
+ return btf_commit_type(btf, sz);
}
/* it's completely legal to append BTF types with type IDs pointing forward to
@@ -1631,18 +2021,18 @@ static int validate_type_id(int id)
static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
{
struct btf_type *t;
- int sz, name_off = 0, err;
+ int sz, name_off = 0;
if (validate_type_id(ref_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1654,14 +2044,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
t->info = btf_type_info(kind, 0, 0);
t->type = ref_type_id;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/*
@@ -1689,18 +2072,18 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
{
struct btf_type *t;
struct btf_array *a;
- int sz, err;
+ int sz;
if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_array);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
t->name_off = 0;
t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1711,29 +2094,22 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
a->index_type = index_type_id;
a->nelems = nr_elems;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/* generic STRUCT/UNION append function */
static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
{
struct btf_type *t;
- int sz, err, name_off = 0;
+ int sz, name_off = 0;
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1748,14 +2124,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
t->info = btf_type_info(kind, 0, 0);
t->size = bytes_sz;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/*
@@ -1764,7 +2133,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
* - *byte_sz* - size of the struct, in bytes;
*
* Struct initially has no fields in it. Fields can be added by
- * btf__add_field() right after btf__add_struct() succeeds.
+ * btf__add_field() right after btf__add_struct() succeeds.
*
* Returns:
* - >0, type ID of newly added BTF type;
@@ -1793,6 +2162,11 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
}
+static struct btf_type *btf_last_type(struct btf *btf)
+{
+ return btf_type_by_id(btf, btf__type_cnt(btf) - 1);
+}
+
/*
* Append new field for the current STRUCT/UNION type with:
* - *name* - name of the field, can be NULL or empty for anonymous field;
@@ -1813,30 +2187,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
/* last type should be union/struct */
if (btf->nr_types == 0)
- return -EINVAL;
- t = btf_type_by_id(btf, btf->nr_types);
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
if (!btf_is_composite(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* best-effort bit field offset/size enforcement */
is_bitfield = bit_size || (bit_offset % 8 != 0);
if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* only offset 0 is allowed for unions */
if (btf_is_union(t) && bit_offset)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_member);
m = btf_add_type_mem(btf, sz);
if (!m)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1849,7 +2223,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
m->offset = bit_offset | (bit_size << 24);
/* btf_add_type_mem can invalidate t pointer */
- t = btf_type_by_id(btf, btf->nr_types);
+ t = btf_last_type(btf);
/* update parent type's vlen and kflag */
t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
@@ -1858,35 +2232,23 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
return 0;
}
-/*
- * Append new BTF_KIND_ENUM type with:
- * - *name* - name of the enum, can be NULL or empty for anonymous enums;
- * - *byte_sz* - size of the enum, in bytes.
- *
- * Enum initially has no enum values in it (and corresponds to enum forward
- * declaration). Enumerator values can be added by btf__add_enum_value()
- * immediately after btf__add_enum() succeeds.
- *
- * Returns:
- * - >0, type ID of newly added BTF type;
- * - <0, on error.
- */
-int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed, __u8 kind)
{
struct btf_type *t;
- int sz, err, name_off = 0;
+ int sz, name_off = 0;
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1896,17 +2258,32 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
/* start out with vlen=0; it will be adjusted when adding enum values */
t->name_off = name_off;
- t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+ t->info = btf_type_info(kind, 0, is_signed);
t->size = byte_sz;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
+ return btf_commit_type(btf, sz);
+}
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+/*
+ * Append new BTF_KIND_ENUM type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ /*
+ * set the signedness to be unsigned, it will change to signed
+ * if any later enumerator is negative.
+ */
+ return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM);
}
/*
@@ -1925,25 +2302,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
/* last type should be BTF_KIND_ENUM */
if (btf->nr_types == 0)
- return -EINVAL;
- t = btf_type_by_id(btf, btf->nr_types);
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
if (!btf_is_enum(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (value < INT_MIN || value > UINT_MAX)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_enum);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -1953,7 +2330,83 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
v->val = value;
/* update parent type's vlen */
- t = btf_type_by_id(btf, btf->nr_types);
+ t = btf_last_type(btf);
+ btf_type_inc_vlen(t);
+
+ /* if negative value, set signedness to signed */
+ if (value < 0)
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM64 type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ * - *is_signed* - whether the enum values are signed or not;
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum64_value()
+ * immediately after btf__add_enum64() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz,
+ bool is_signed)
+{
+ return btf_add_enum_common(btf, name, byte_sz, is_signed,
+ BTF_KIND_ENUM64);
+}
+
+/*
+ * Append new enum value for the current ENUM64 type with:
+ * - *name* - name of the enumerator value, can't be NULL or empty;
+ * - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
+{
+ struct btf_enum64 *v;
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* last type should be BTF_KIND_ENUM64 */
+ if (btf->nr_types == 0)
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
+ if (!btf_is_enum64(t))
+ return libbpf_err(-EINVAL);
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return libbpf_err(-EINVAL);
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ sz = sizeof(struct btf_enum64);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return libbpf_err(-ENOMEM);
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ v->name_off = name_off;
+ v->val_lo32 = (__u32)value;
+ v->val_hi32 = value >> 32;
+
+ /* update parent type's vlen */
+ t = btf_last_type(btf);
btf_type_inc_vlen(t);
btf->hdr->type_len += sz;
@@ -1973,7 +2426,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
switch (fwd_kind) {
case BTF_FWD_STRUCT:
@@ -1994,7 +2447,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
*/
return btf__add_enum(btf, name, sizeof(int));
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
}
@@ -2009,7 +2462,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
}
@@ -2051,6 +2504,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
}
/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ * - *value*, non-empty/non-NULL tag value;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
+{
+ if (!value || !value[0])
+ return libbpf_err(-EINVAL);
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+}
+
+/*
* Append new BTF_KIND_FUNC type with:
* - *name*, non-empty/non-NULL name;
* - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
@@ -2064,10 +2533,10 @@ int btf__add_func(struct btf *btf, const char *name,
int id;
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
linkage != BTF_FUNC_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
if (id > 0) {
@@ -2075,7 +2544,7 @@ int btf__add_func(struct btf *btf, const char *name,
t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
}
- return id;
+ return libbpf_err(id);
}
/*
@@ -2093,18 +2562,18 @@ int btf__add_func(struct btf *btf, const char *name,
int btf__add_func_proto(struct btf *btf, int ret_type_id)
{
struct btf_type *t;
- int sz, err;
+ int sz;
if (validate_type_id(ret_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* start out with vlen=0; this will be adjusted when adding enum
* values, if necessary
@@ -2113,14 +2582,7 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
t->type = ret_type_id;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/*
@@ -2138,23 +2600,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
int sz, name_off = 0;
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* last type should be BTF_KIND_FUNC_PROTO */
if (btf->nr_types == 0)
- return -EINVAL;
- t = btf_type_by_id(btf, btf->nr_types);
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
if (!btf_is_func_proto(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_param);
p = btf_add_type_mem(btf, sz);
if (!p)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2166,7 +2628,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
p->type = type_id;
/* update parent type's vlen */
- t = btf_type_by_id(btf, btf->nr_types);
+ t = btf_last_type(btf);
btf_type_inc_vlen(t);
btf->hdr->type_len += sz;
@@ -2188,25 +2650,25 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
{
struct btf_type *t;
struct btf_var *v;
- int sz, err, name_off;
+ int sz, name_off;
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
linkage != BTF_VAR_GLOBAL_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_var);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2219,14 +2681,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
v = btf_var(t);
v->linkage = linkage;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/*
@@ -2244,19 +2699,19 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
{
struct btf_type *t;
- int sz, err, name_off;
+ int sz, name_off;
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2267,14 +2722,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
t->size = byte_sz;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
- if (err)
- return err;
-
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
- btf->nr_types++;
- return btf->nr_types;
+ return btf_commit_type(btf, sz);
}
/*
@@ -2295,29 +2743,29 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
/* last type should be BTF_KIND_DATASEC */
if (btf->nr_types == 0)
- return -EINVAL;
- t = btf_type_by_id(btf, btf->nr_types);
+ return libbpf_err(-EINVAL);
+ t = btf_last_type(btf);
if (!btf_is_datasec(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(var_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_var_secinfo);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
v->type = var_type_id;
v->offset = offset;
v->size = byte_sz;
/* update parent type's vlen */
- t = btf_type_by_id(btf, btf->nr_types);
+ t = btf_last_type(btf);
btf_type_inc_vlen(t);
btf->hdr->type_len += sz;
@@ -2325,6 +2773,48 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
return 0;
}
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ * - *value* - non-empty/non-NULL string;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ * member or function argument index;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx)
+{
+ struct btf_type *t;
+ int sz, value_off;
+
+ if (!value || !value[0] || component_idx < -1)
+ return libbpf_err(-EINVAL);
+
+ if (validate_type_id(ref_type_id))
+ return libbpf_err(-EINVAL);
+
+ if (btf_ensure_modifiable(btf))
+ return libbpf_err(-ENOMEM);
+
+ sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return libbpf_err(-ENOMEM);
+
+ value_off = btf__add_str(btf, value);
+ if (value_off < 0)
+ return value_off;
+
+ t->name_off = value_off;
+ t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
+ t->type = ref_type_id;
+ btf_decl_tag(t)->component_idx = component_idx;
+
+ return btf_commit_type(btf, sz);
+}
+
struct btf_ext_sec_setup_param {
__u32 off;
__u32 len;
@@ -2339,6 +2829,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
const struct btf_ext_info_sec *sinfo;
struct btf_ext_info *ext_info;
__u32 info_left, record_size;
+ size_t sec_cnt = 0;
/* The start of the info sec (including the __u32 record_size). */
void *info;
@@ -2402,8 +2893,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
return -EINVAL;
}
- total_record_size = sec_hdrlen +
- (__u64)num_records * record_size;
+ total_record_size = sec_hdrlen + (__u64)num_records * record_size;
if (info_left < total_record_size) {
pr_debug("%s section has incorrect num_records in .BTF.ext\n",
ext_sec->desc);
@@ -2412,12 +2902,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
info_left -= total_record_size;
sinfo = (void *)sinfo + total_record_size;
+ sec_cnt++;
}
ext_info = ext_sec->ext_info;
ext_info->len = ext_sec->len - sizeof(__u32);
ext_info->rec_size = record_size;
ext_info->info = info + sizeof(__u32);
+ ext_info->sec_cnt = sec_cnt;
return 0;
}
@@ -2501,22 +2993,21 @@ void btf_ext__free(struct btf_ext *btf_ext)
{
if (IS_ERR_OR_NULL(btf_ext))
return;
+ free(btf_ext->func_info.sec_idxs);
+ free(btf_ext->line_info.sec_idxs);
+ free(btf_ext->core_relo_info.sec_idxs);
free(btf_ext->data);
free(btf_ext);
}
-struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
{
struct btf_ext *btf_ext;
int err;
- err = btf_ext_parse_hdr(data, size);
- if (err)
- return ERR_PTR(err);
-
btf_ext = calloc(1, sizeof(struct btf_ext));
if (!btf_ext)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
btf_ext->data_size = size;
btf_ext->data = malloc(size);
@@ -2526,9 +3017,15 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
}
memcpy(btf_ext->data, data, size);
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, line_info_len))
+ err = btf_ext_parse_hdr(btf_ext->data, size);
+ if (err)
goto done;
+
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+ err = -EINVAL;
+ goto done;
+ }
+
err = btf_ext_setup_func_info(btf_ext);
if (err)
goto done;
@@ -2538,7 +3035,8 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
goto done;
if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
- goto done;
+ goto done; /* skip core relos parsing */
+
err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -2546,103 +3044,32 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
done:
if (err) {
btf_ext__free(btf_ext);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
return btf_ext;
}
-const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size)
{
*size = btf_ext->data_size;
return btf_ext->data;
}
-static int btf_ext_reloc_info(const struct btf *btf,
- const struct btf_ext_info *ext_info,
- const char *sec_name, __u32 insns_cnt,
- void **info, __u32 *cnt)
-{
- __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
- __u32 i, record_size, existing_len, records_len;
- struct btf_ext_info_sec *sinfo;
- const char *info_sec_name;
- __u64 remain_len;
- void *data;
-
- record_size = ext_info->rec_size;
- sinfo = ext_info->info;
- remain_len = ext_info->len;
- while (remain_len > 0) {
- records_len = sinfo->num_info * record_size;
- info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
- if (strcmp(info_sec_name, sec_name)) {
- remain_len -= sec_hdrlen + records_len;
- sinfo = (void *)sinfo + sec_hdrlen + records_len;
- continue;
- }
+__attribute__((alias("btf_ext__raw_data")))
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
- existing_len = (*cnt) * record_size;
- data = realloc(*info, existing_len + records_len);
- if (!data)
- return -ENOMEM;
-
- memcpy(data + existing_len, sinfo->data, records_len);
- /* adjust insn_off only, the rest data will be passed
- * to the kernel.
- */
- for (i = 0; i < sinfo->num_info; i++) {
- __u32 *insn_off;
-
- insn_off = data + existing_len + (i * record_size);
- *insn_off = *insn_off / sizeof(struct bpf_insn) +
- insns_cnt;
- }
- *info = data;
- *cnt += sinfo->num_info;
- return 0;
- }
-
- return -ENOENT;
-}
-
-int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt)
-{
- return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
- insns_cnt, func_info, cnt);
-}
-
-int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt)
-{
- return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
- insns_cnt, line_info, cnt);
-}
-
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
-{
- return btf_ext->func_info.rec_size;
-}
-
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
-{
- return btf_ext->line_info.rec_size;
-}
struct btf_dedup;
-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts);
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_prep(struct btf_dedup *d);
static int btf_dedup_strings(struct btf_dedup *d);
static int btf_dedup_prim_types(struct btf_dedup *d);
static int btf_dedup_struct_types(struct btf_dedup *d);
static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_resolve_fwds(struct btf_dedup *d);
static int btf_dedup_compact_types(struct btf_dedup *d);
static int btf_dedup_remap_types(struct btf_dedup *d);
@@ -2750,15 +3177,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* Algorithm summary
* =================
*
- * Algorithm completes its work in 6 separate passes:
+ * Algorithm completes its work in 7 separate passes:
*
* 1. Strings deduplication.
* 2. Primitive types deduplication (int, enum, fwd).
* 3. Struct/union types deduplication.
- * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ * 4. Resolve unambiguous forward declarations.
+ * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
* protos, and const/volatile/restrict modifiers).
- * 5. Types compaction.
- * 6. Types remapping.
+ * 6. Types compaction.
+ * 7. Types remapping.
*
* Algorithm determines canonical type descriptor, which is a single
* representative type for each truly unique type. This canonical type is the
@@ -2783,20 +3211,30 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* deduplicating structs/unions is described in greater details in comments for
* `btf_dedup_is_equiv` function.
*/
-int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts)
+int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
{
- struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+ struct btf_dedup *d;
int err;
+ if (!OPTS_VALID(opts, btf_dedup_opts))
+ return libbpf_err(-EINVAL);
+
+ d = btf_dedup_new(btf, opts);
if (IS_ERR(d)) {
pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
- if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ if (btf_ensure_modifiable(btf)) {
+ err = -ENOMEM;
+ goto done;
+ }
+ err = btf_dedup_prep(d);
+ if (err) {
+ pr_debug("btf_dedup_prep failed:%d\n", err);
+ goto done;
+ }
err = btf_dedup_strings(d);
if (err < 0) {
pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -2812,6 +3250,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
pr_debug("btf_dedup_struct_types failed:%d\n", err);
goto done;
}
+ err = btf_dedup_resolve_fwds(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_resolve_fwds failed:%d\n", err);
+ goto done;
+ }
err = btf_dedup_ref_types(d);
if (err < 0) {
pr_debug("btf_dedup_ref_types failed:%d\n", err);
@@ -2830,7 +3273,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
done:
btf_dedup_free(d);
- return err;
+ return libbpf_err(err);
}
#define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -2859,21 +3302,17 @@ struct btf_dedup {
__u32 *hypot_list;
size_t hypot_cnt;
size_t hypot_cap;
+ /* Whether hypothetical mapping, if successful, would need to adjust
+ * already canonicalized types (due to a new forward declaration to
+ * concrete type resolution). In such case, during split BTF dedup
+ * candidate type would still be considered as different, because base
+ * BTF is considered to be immutable.
+ */
+ bool hypot_adjust_canon;
/* Various option modifying behavior of algorithm */
struct btf_dedup_opts opts;
-};
-
-struct btf_str_ptr {
- const char *str;
- __u32 new_off;
- bool used;
-};
-
-struct btf_str_ptrs {
- struct btf_str_ptr *ptrs;
- const char *data;
- __u32 cnt;
- __u32 cap;
+ /* temporary strings deduplication state */
+ struct strset *strs_set;
};
static long hash_combine(long h, long value)
@@ -2882,12 +3321,11 @@ static long hash_combine(long h, long value)
}
#define for_each_dedup_cand(d, node, hash) \
- hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+ hashmap__for_each_key_entry(d->dedup_table, node, hash)
static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
{
- return hashmap__append(d->dedup_table,
- (void *)hash, (void *)(long)type_id);
+ return hashmap__append(d->dedup_table, hash, type_id);
}
static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -2914,6 +3352,7 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
for (i = 0; i < d->hypot_cnt; i++)
d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
d->hypot_cnt = 0;
+ d->hypot_adjust_canon = false;
}
static void btf_dedup_free(struct btf_dedup *d)
@@ -2933,38 +3372,35 @@ static void btf_dedup_free(struct btf_dedup *d)
free(d);
}
-static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_collision_hash_fn(long key, void *ctx)
{
return 0;
}
-static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx)
{
return k1 == k2;
}
-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts)
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts)
{
struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
- int i, err = 0;
+ int i, err = 0, type_cnt;
if (!d)
return ERR_PTR(-ENOMEM);
- d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
- /* dedup_table_size is now used only to force collisions in tests */
- if (opts && opts->dedup_table_size == 1)
+ if (OPTS_GET(opts, force_collisions, false))
hash_fn = btf_dedup_collision_hash_fn;
d->btf = btf;
- d->btf_ext = btf_ext;
+ d->btf_ext = OPTS_GET(opts, btf_ext, NULL);
d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
if (IS_ERR(d->dedup_table)) {
@@ -2973,14 +3409,15 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
goto done;
}
- d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ type_cnt = btf__type_cnt(btf);
+ d->map = malloc(sizeof(__u32) * type_cnt);
if (!d->map) {
err = -ENOMEM;
goto done;
}
/* special BTF "void" type is made canonical immediately */
d->map[0] = 0;
- for (i = 1; i <= btf->nr_types; i++) {
+ for (i = 1; i < type_cnt; i++) {
struct btf_type *t = btf_type_by_id(d->btf, i);
/* VAR and DATASEC are never deduped and are self-canonical */
@@ -2990,12 +3427,12 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
d->map[i] = BTF_UNPROCESSED_ID;
}
- d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ d->hypot_map = malloc(sizeof(__u32) * type_cnt);
if (!d->hypot_map) {
err = -ENOMEM;
goto done;
}
- for (i = 0; i <= btf->nr_types; i++)
+ for (i = 0; i < type_cnt; i++)
d->hypot_map[i] = BTF_UNPROCESSED_ID;
done:
@@ -3007,157 +3444,59 @@ done:
return d;
}
-typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
-
/*
* Iterate over all possible places in .BTF and .BTF.ext that can reference
* string and pass pointer to it to a provided callback `fn`.
*/
-static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx)
{
- void *line_data_cur, *line_data_end;
- int i, j, r, rec_size;
- struct btf_type *t;
+ int i, r;
+
+ for (i = 0; i < d->btf->nr_types; i++) {
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
- for (i = 1; i <= d->btf->nr_types; i++) {
- t = btf_type_by_id(d->btf, i);
- r = fn(&t->name_off, ctx);
+ r = btf_type_visit_str_offs(t, fn, ctx);
if (r)
return r;
-
- switch (btf_kind(t)) {
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- struct btf_member *m = btf_members(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_ENUM: {
- struct btf_enum *m = btf_enum(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *m = btf_params(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- default:
- break;
- }
}
if (!d->btf_ext)
return 0;
- line_data_cur = d->btf_ext->line_info.info;
- line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
- rec_size = d->btf_ext->line_info.rec_size;
-
- while (line_data_cur < line_data_end) {
- struct btf_ext_info_sec *sec = line_data_cur;
- struct bpf_line_info_min *line_info;
- __u32 num_info = sec->num_info;
-
- r = fn(&sec->sec_name_off, ctx);
- if (r)
- return r;
-
- line_data_cur += sizeof(struct btf_ext_info_sec);
- for (i = 0; i < num_info; i++) {
- line_info = line_data_cur;
- r = fn(&line_info->file_name_off, ctx);
- if (r)
- return r;
- r = fn(&line_info->line_off, ctx);
- if (r)
- return r;
- line_data_cur += rec_size;
- }
- }
-
- return 0;
-}
-
-static int str_sort_by_content(const void *a1, const void *a2)
-{
- const struct btf_str_ptr *p1 = a1;
- const struct btf_str_ptr *p2 = a2;
-
- return strcmp(p1->str, p2->str);
-}
-
-static int str_sort_by_offset(const void *a1, const void *a2)
-{
- const struct btf_str_ptr *p1 = a1;
- const struct btf_str_ptr *p2 = a2;
+ r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx);
+ if (r)
+ return r;
- if (p1->str != p2->str)
- return p1->str < p2->str ? -1 : 1;
return 0;
}
-static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
+static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
{
- const struct btf_str_ptr *p = pelem;
+ struct btf_dedup *d = ctx;
+ __u32 str_off = *str_off_ptr;
+ const char *s;
+ int off, err;
- if (str_ptr != p->str)
- return (const char *)str_ptr < p->str ? -1 : 1;
- return 0;
-}
-
-static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
-{
- struct btf_str_ptrs *strs;
- struct btf_str_ptr *s;
-
- if (*str_off_ptr == 0)
+ /* don't touch empty string or string in main BTF */
+ if (str_off == 0 || str_off < d->btf->start_str_off)
return 0;
- strs = ctx;
- s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
- sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
- if (!s)
- return -EINVAL;
- s->used = true;
- return 0;
-}
-
-static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
-{
- struct btf_str_ptrs *strs;
- struct btf_str_ptr *s;
+ s = btf__str_by_offset(d->btf, str_off);
+ if (d->btf->base_btf) {
+ err = btf__find_str(d->btf->base_btf, s);
+ if (err >= 0) {
+ *str_off_ptr = err;
+ return 0;
+ }
+ if (err != -ENOENT)
+ return err;
+ }
- if (*str_off_ptr == 0)
- return 0;
+ off = strset__add_str(d->strs_set, s);
+ if (off < 0)
+ return off;
- strs = ctx;
- s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
- sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
- if (!s)
- return -EINVAL;
- *str_off_ptr = s->new_off;
+ *str_off_ptr = d->btf->start_str_off + off;
return 0;
}
@@ -3174,118 +3513,43 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
*/
static int btf_dedup_strings(struct btf_dedup *d)
{
- char *start = d->btf->strs_data;
- char *end = start + d->btf->hdr->str_len;
- char *p = start, *tmp_strs = NULL;
- struct btf_str_ptrs strs = {
- .cnt = 0,
- .cap = 0,
- .ptrs = NULL,
- .data = start,
- };
- int i, j, err = 0, grp_idx;
- bool grp_used;
+ int err;
if (d->btf->strs_deduped)
return 0;
- /* build index of all strings */
- while (p < end) {
- if (strs.cnt + 1 > strs.cap) {
- struct btf_str_ptr *new_ptrs;
-
- strs.cap += max(strs.cnt / 2, 16U);
- new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
- if (!new_ptrs) {
- err = -ENOMEM;
- goto done;
- }
- strs.ptrs = new_ptrs;
- }
-
- strs.ptrs[strs.cnt].str = p;
- strs.ptrs[strs.cnt].used = false;
-
- p += strlen(p) + 1;
- strs.cnt++;
- }
-
- /* temporary storage for deduplicated strings */
- tmp_strs = malloc(d->btf->hdr->str_len);
- if (!tmp_strs) {
- err = -ENOMEM;
- goto done;
+ d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0);
+ if (IS_ERR(d->strs_set)) {
+ err = PTR_ERR(d->strs_set);
+ goto err_out;
}
- /* mark all used strings */
- strs.ptrs[0].used = true;
- err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
- if (err)
- goto done;
-
- /* sort strings by context, so that we can identify duplicates */
- qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
-
- /*
- * iterate groups of equal strings and if any instance in a group was
- * referenced, emit single instance and remember new offset
- */
- p = tmp_strs;
- grp_idx = 0;
- grp_used = strs.ptrs[0].used;
- /* iterate past end to avoid code duplication after loop */
- for (i = 1; i <= strs.cnt; i++) {
- /*
- * when i == strs.cnt, we want to skip string comparison and go
- * straight to handling last group of strings (otherwise we'd
- * need to handle last group after the loop w/ duplicated code)
+ if (!d->btf->base_btf) {
+ /* insert empty string; we won't be looking it up during strings
+ * dedup, but it's good to have it for generic BTF string lookups
*/
- if (i < strs.cnt &&
- !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
- grp_used = grp_used || strs.ptrs[i].used;
- continue;
- }
-
- /*
- * this check would have been required after the loop to handle
- * last group of strings, but due to <= condition in a loop
- * we avoid that duplication
- */
- if (grp_used) {
- int new_off = p - tmp_strs;
- __u32 len = strlen(strs.ptrs[grp_idx].str);
-
- memmove(p, strs.ptrs[grp_idx].str, len + 1);
- for (j = grp_idx; j < i; j++)
- strs.ptrs[j].new_off = new_off;
- p += len + 1;
- }
-
- if (i < strs.cnt) {
- grp_idx = i;
- grp_used = strs.ptrs[i].used;
- }
+ err = strset__add_str(d->strs_set, "");
+ if (err < 0)
+ goto err_out;
}
- /* replace original strings with deduped ones */
- d->btf->hdr->str_len = p - tmp_strs;
- memmove(start, tmp_strs, d->btf->hdr->str_len);
- end = start + d->btf->hdr->str_len;
-
- /* restore original order for further binary search lookups */
- qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
-
/* remap string offsets */
- err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+ err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
if (err)
- goto done;
+ goto err_out;
- d->btf->hdr->str_len = end - start;
+ /* replace BTF string data and hash with deduped ones */
+ strset__free(d->btf->strs_set);
+ d->btf->hdr->str_len = strset__data_size(d->strs_set);
+ d->btf->strs_set = d->strs_set;
+ d->strs_set = NULL;
d->btf->strs_deduped = true;
+ return 0;
+
+err_out:
+ strset__free(d->strs_set);
+ d->strs_set = NULL;
-done:
- free(tmp_strs);
- free(strs.ptrs);
return err;
}
@@ -3306,8 +3570,8 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
t1->size == t2->size;
}
-/* Calculate type signature hash of INT. */
-static long btf_hash_int(struct btf_type *t)
+/* Calculate type signature hash of INT or TAG. */
+static long btf_hash_int_decl_tag(struct btf_type *t)
{
__u32 info = *(__u32 *)(t + 1);
long h;
@@ -3317,8 +3581,8 @@ static long btf_hash_int(struct btf_type *t)
return h;
}
-/* Check structural equality of two INTs. */
-static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+/* Check structural equality of two INTs or TAGs. */
+static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2)
{
__u32 info1, info2;
@@ -3329,28 +3593,22 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
return info1 == info2;
}
-/* Calculate type signature hash of ENUM. */
+/* Calculate type signature hash of ENUM/ENUM64. */
static long btf_hash_enum(struct btf_type *t)
{
long h;
- /* don't hash vlen and enum members to support enum fwd resolving */
+ /* don't hash vlen, enum members and size to support enum fwd resolving */
h = hash_combine(0, t->name_off);
- h = hash_combine(h, t->info & ~0xffff);
- h = hash_combine(h, t->size);
return h;
}
-/* Check structural equality of two ENUMs. */
-static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2)
{
const struct btf_enum *m1, *m2;
__u16 vlen;
int i;
- if (!btf_equal_common(t1, t2))
- return false;
-
vlen = btf_vlen(t1);
m1 = btf_enum(t1);
m2 = btf_enum(t2);
@@ -3363,19 +3621,55 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
+static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_enum64 *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_enum64(t1);
+ m2 = btf_enum64(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 ||
+ m1->val_hi32 != m2->val_hi32)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/* Check structural equality of two ENUMs or ENUM64s. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ /* t1 & t2 kinds are identical because of btf_equal_common */
+ if (btf_kind(t1) == BTF_KIND_ENUM)
+ return btf_equal_enum_members(t1, t2);
+ else
+ return btf_equal_enum64_members(t1, t2);
+}
+
static inline bool btf_is_enum_fwd(struct btf_type *t)
{
- return btf_is_enum(t) && btf_vlen(t) == 0;
+ return btf_is_any_enum(t) && btf_vlen(t) == 0;
}
static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
{
if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
return btf_equal_enum(t1, t2);
- /* ignore vlen when comparing */
+ /* At this point either t1 or t2 or both are forward declarations, thus:
+ * - skip comparing vlen because it is zero for forward declarations;
+ * - skip comparing size to allow enum forward declarations
+ * to be compatible with enum64 full declarations;
+ * - skip comparing kind for the same reason.
+ */
return t1->name_off == t2->name_off &&
- (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
- t1->size == t2->size;
+ btf_is_any_enum(t1) && btf_is_any_enum(t2);
}
/*
@@ -3400,8 +3694,8 @@ static long btf_hash_struct(struct btf_type *t)
}
/*
- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
- * IDs. This check is performed during type graph equivalence check and
+ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
+ * type IDs. This check is performed during type graph equivalence check and
* referenced types equivalence is checked separately.
*/
static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
@@ -3550,6 +3844,70 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
return true;
}
+/* Prepare split BTF for deduplication by calculating hashes of base BTF's
+ * types and initializing the rest of the state (canonical type mapping) for
+ * the fixed base BTF part.
+ */
+static int btf_dedup_prep(struct btf_dedup *d)
+{
+ struct btf_type *t;
+ int type_id;
+ long h;
+
+ if (!d->btf->base_btf)
+ return 0;
+
+ for (type_id = 1; type_id < d->btf->start_id; type_id++) {
+ t = btf_type_by_id(d->btf, type_id);
+
+ /* all base BTF types are self-canonical by definition */
+ d->map[type_id] = type_id;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
+ /* VAR and DATASEC are never hash/deduplicated */
+ continue;
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_FWD:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
+ h = btf_hash_common(t);
+ break;
+ case BTF_KIND_INT:
+ case BTF_KIND_DECL_TAG:
+ h = btf_hash_int_decl_tag(t);
+ break;
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ h = btf_hash_enum(t);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ h = btf_hash_struct(t);
+ break;
+ case BTF_KIND_ARRAY:
+ h = btf_hash_array(t);
+ break;
+ case BTF_KIND_FUNC_PROTO:
+ h = btf_hash_fnproto(t);
+ break;
+ default:
+ pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id);
+ return -EINVAL;
+ }
+ if (btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/*
* Deduplicate primitive types, that can't reference other types, by calculating
* their type signature hash and comparing them with any possible canonical
@@ -3579,14 +3937,16 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_FUNC_PROTO:
case BTF_KIND_VAR:
case BTF_KIND_DATASEC:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
return 0;
case BTF_KIND_INT:
- h = btf_hash_int(t);
+ h = btf_hash_int_decl_tag(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
- if (btf_equal_int(t, cand)) {
+ if (btf_equal_int_tag(t, cand)) {
new_id = cand_id;
break;
}
@@ -3594,16 +3954,15 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum(t, cand)) {
new_id = cand_id;
break;
}
- if (d->opts.dont_resolve_fwds)
- continue;
if (btf_compat_enum(t, cand)) {
if (btf_is_enum_fwd(t)) {
/* resolve fwd to full enum */
@@ -3617,9 +3976,10 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -3643,8 +4003,8 @@ static int btf_dedup_prim_types(struct btf_dedup *d)
{
int i, err;
- for (i = 1; i <= d->btf->nr_types; i++) {
- err = btf_dedup_prim_type(d, i);
+ for (i = 0; i < d->btf->nr_types; i++) {
+ err = btf_dedup_prim_type(d, d->btf->start_id + i);
if (err)
return err;
}
@@ -3697,6 +4057,46 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
}
+/* Check if given two types are identical ARRAY definitions */
+static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+ struct btf_type *t1, *t2;
+
+ t1 = btf_type_by_id(d->btf, id1);
+ t2 = btf_type_by_id(d->btf, id2);
+ if (!btf_is_array(t1) || !btf_is_array(t2))
+ return false;
+
+ return btf_equal_array(t1, t2);
+}
+
+/* Check if given two types are identical STRUCT/UNION definitions */
+static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+ const struct btf_member *m1, *m2;
+ struct btf_type *t1, *t2;
+ int n, i;
+
+ t1 = btf_type_by_id(d->btf, id1);
+ t2 = btf_type_by_id(d->btf, id2);
+
+ if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
+ return false;
+
+ if (!btf_shallow_equal_struct(t1, t2))
+ return false;
+
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+ if (m1->type != m2->type &&
+ !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
+ !btf_dedup_identical_structs(d, m1->type, m2->type))
+ return false;
+ }
+ return true;
+}
+
/*
* Check equivalence of BTF type graph formed by candidate struct/union (we'll
* call it "candidate graph" in this description for brevity) to a type graph
@@ -3807,8 +4207,30 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
canon_id = resolve_fwd_id(d, canon_id);
hypot_type_id = d->hypot_map[canon_id];
- if (hypot_type_id <= BTF_MAX_NR_TYPES)
- return hypot_type_id == cand_id;
+ if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+ if (hypot_type_id == cand_id)
+ return 1;
+ /* In some cases compiler will generate different DWARF types
+ * for *identical* array type definitions and use them for
+ * different fields within the *same* struct. This breaks type
+ * equivalence check, which makes an assumption that candidate
+ * types sub-graph has a consistent and deduped-by-compiler
+ * types within a single CU. So work around that by explicitly
+ * allowing identical array types here.
+ */
+ if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
+ return 1;
+ /* It turns out that similar situation can happen with
+ * struct/union sometimes, sigh... Handle the case where
+ * structs/unions are exactly the same, down to the referenced
+ * type IDs. Anything more complicated (e.g., if referenced
+ * types are different, but equivalent) is *way more*
+ * complicated and requires a many-to-many equivalence mapping.
+ */
+ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+ return 1;
+ return 0;
+ }
if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
return -ENOMEM;
@@ -3822,8 +4244,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return 0;
/* FWD <--> STRUCT/UNION equivalence check, if enabled */
- if (!d->opts.dont_resolve_fwds
- && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+ if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
&& cand_kind != canon_kind) {
__u16 real_kind;
__u16 fwd_kind;
@@ -3834,6 +4255,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
} else {
real_kind = cand_kind;
fwd_kind = btf_fwd_kind(canon_type);
+ /* we'd need to resolve base FWD to STRUCT/UNION */
+ if (fwd_kind == real_kind && canon_id < d->btf->start_id)
+ d->hypot_adjust_canon = true;
}
return fwd_kind == real_kind;
}
@@ -3843,15 +4267,14 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
switch (cand_kind) {
case BTF_KIND_INT:
- return btf_equal_int(cand_type, canon_type);
+ return btf_equal_int_tag(cand_type, canon_type);
case BTF_KIND_ENUM:
- if (d->opts.dont_resolve_fwds)
- return btf_equal_enum(cand_type, canon_type);
- else
- return btf_compat_enum(cand_type, canon_type);
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
case BTF_KIND_CONST:
@@ -3860,6 +4283,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
if (cand_type->info != canon_type->info)
return 0;
return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
@@ -3871,8 +4295,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return 0;
cand_arr = btf_array(cand_type);
canon_arr = btf_array(canon_type);
- eq = btf_dedup_is_equiv(d,
- cand_arr->index_type, canon_arr->index_type);
+ eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type);
if (eq <= 0)
return eq;
return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
@@ -3955,16 +4378,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
*/
static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
{
- __u32 cand_type_id, targ_type_id;
+ __u32 canon_type_id, targ_type_id;
__u16 t_kind, c_kind;
__u32 t_id, c_id;
int i;
for (i = 0; i < d->hypot_cnt; i++) {
- cand_type_id = d->hypot_list[i];
- targ_type_id = d->hypot_map[cand_type_id];
+ canon_type_id = d->hypot_list[i];
+ targ_type_id = d->hypot_map[canon_type_id];
t_id = resolve_type_id(d, targ_type_id);
- c_id = resolve_type_id(d, cand_type_id);
+ c_id = resolve_type_id(d, canon_type_id);
t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
/*
@@ -3979,9 +4402,26 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
* stability is not a requirement for STRUCT/UNION equivalence
* checks, though.
*/
+
+ /* if it's the split BTF case, we still need to point base FWD
+ * to STRUCT/UNION in a split BTF, because FWDs from split BTF
+ * will be resolved against base FWD. If we don't point base
+ * canonical FWD to the resolved STRUCT/UNION, then all the
+ * FWDs in split BTF won't be correctly resolved to a proper
+ * STRUCT/UNION.
+ */
if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
d->map[c_id] = t_id;
- else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+
+ /* if graph equivalence determined that we'd need to adjust
+ * base canonical types, then we need to only point base FWDs
+ * to STRUCTs/UNIONs and do no more modifications. For all
+ * other purposes the type graphs were not equivalent.
+ */
+ if (d->hypot_adjust_canon)
+ continue;
+
+ if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
d->map[t_id] = c_id;
if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
@@ -4042,7 +4482,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_struct(t);
for_each_dedup_cand(d, hash_entry, h) {
- __u32 cand_id = (__u32)(long)hash_entry->value;
+ __u32 cand_id = hash_entry->value;
int eq;
/*
@@ -4065,8 +4505,10 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
return eq;
if (!eq)
continue;
- new_id = cand_id;
btf_dedup_merge_hypot_map(d);
+ if (d->hypot_adjust_canon) /* not really equivalent */
+ continue;
+ new_id = cand_id;
break;
}
@@ -4081,8 +4523,8 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
{
int i, err;
- for (i = 1; i <= d->btf->nr_types; i++) {
- err = btf_dedup_struct_type(d, i);
+ for (i = 0; i < d->btf->nr_types; i++) {
+ err = btf_dedup_struct_type(d, d->btf->start_id + i);
if (err)
return err;
}
@@ -4137,6 +4579,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
ref_type_id = btf_dedup_ref_type(d, t->type);
if (ref_type_id < 0)
return ref_type_id;
@@ -4144,7 +4587,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -4153,6 +4596,23 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
}
break;
+ case BTF_KIND_DECL_TAG:
+ ref_type_id = btf_dedup_ref_type(d, t->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ t->type = ref_type_id;
+
+ h = btf_hash_int_decl_tag(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = hash_entry->value;
+ cand = btf_type_by_id(d->btf, cand_id);
+ if (btf_equal_int_tag(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
case BTF_KIND_ARRAY: {
struct btf_array *info = btf_array(t);
@@ -4168,7 +4628,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_array(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_array(t, cand)) {
new_id = cand_id;
@@ -4200,7 +4660,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, hash_entry, h) {
- cand_id = (__u32)(long)hash_entry->value;
+ cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_fnproto(t, cand)) {
new_id = cand_id;
@@ -4225,8 +4685,8 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
{
int i, err;
- for (i = 1; i <= d->btf->nr_types; i++) {
- err = btf_dedup_ref_type(d, i);
+ for (i = 0; i < d->btf->nr_types; i++) {
+ err = btf_dedup_ref_type(d, d->btf->start_id + i);
if (err < 0)
return err;
}
@@ -4237,6 +4697,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
}
/*
+ * Collect a map from type names to type ids for all canonical structs
+ * and unions. If the same name is shared by several canonical types
+ * use a special value 0 to indicate this fact.
+ */
+static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
+{
+ __u32 nr_types = btf__type_cnt(d->btf);
+ struct btf_type *t;
+ __u32 type_id;
+ __u16 kind;
+ int err;
+
+ /*
+ * Iterate over base and split module ids in order to get all
+ * available structs in the map.
+ */
+ for (type_id = 1; type_id < nr_types; ++type_id) {
+ t = btf_type_by_id(d->btf, type_id);
+ kind = btf_kind(t);
+
+ if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ continue;
+
+ /* Skip non-canonical types */
+ if (type_id != d->map[type_id])
+ continue;
+
+ err = hashmap__add(names_map, t->name_off, type_id);
+ if (err == -EEXIST)
+ err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
+{
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
+ enum btf_fwd_kind fwd_kind = btf_kflag(t);
+ __u16 cand_kind, kind = btf_kind(t);
+ struct btf_type *cand_t;
+ uintptr_t cand_id;
+
+ if (kind != BTF_KIND_FWD)
+ return 0;
+
+ /* Skip if this FWD already has a mapping */
+ if (type_id != d->map[type_id])
+ return 0;
+
+ if (!hashmap__find(names_map, t->name_off, &cand_id))
+ return 0;
+
+ /* Zero is a special value indicating that name is not unique */
+ if (!cand_id)
+ return 0;
+
+ cand_t = btf_type_by_id(d->btf, cand_id);
+ cand_kind = btf_kind(cand_t);
+ if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
+ (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))
+ return 0;
+
+ d->map[type_id] = cand_id;
+
+ return 0;
+}
+
+/*
+ * Resolve unambiguous forward declarations.
+ *
+ * The lion's share of all FWD declarations is resolved during
+ * `btf_dedup_struct_types` phase when different type graphs are
+ * compared against each other. However, if in some compilation unit a
+ * FWD declaration is not a part of a type graph compared against
+ * another type graph that declaration's canonical type would not be
+ * changed. Example:
+ *
+ * CU #1:
+ *
+ * struct foo;
+ * struct foo *some_global;
+ *
+ * CU #2:
+ *
+ * struct foo { int u; };
+ * struct foo *another_global;
+ *
+ * After `btf_dedup_struct_types` the BTF looks as follows:
+ *
+ * [1] STRUCT 'foo' size=4 vlen=1 ...
+ * [2] INT 'int' size=4 ...
+ * [3] PTR '(anon)' type_id=1
+ * [4] FWD 'foo' fwd_kind=struct
+ * [5] PTR '(anon)' type_id=4
+ *
+ * This pass assumes that such FWD declarations should be mapped to
+ * structs or unions with identical name in case if the name is not
+ * ambiguous.
+ */
+static int btf_dedup_resolve_fwds(struct btf_dedup *d)
+{
+ int i, err;
+ struct hashmap *names_map;
+
+ names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(names_map))
+ return PTR_ERR(names_map);
+
+ err = btf_dedup_fill_unique_names_map(d, names_map);
+ if (err < 0)
+ goto exit;
+
+ for (i = 0; i < d->btf->nr_types; i++) {
+ err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
+ if (err < 0)
+ break;
+ }
+
+exit:
+ hashmap__free(names_map);
+ return err;
+}
+
+/*
* Compact types.
*
* After we established for each type its corresponding canonical representative
@@ -4250,39 +4838,44 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
static int btf_dedup_compact_types(struct btf_dedup *d)
{
__u32 *new_offs;
- __u32 next_type_id = 1;
+ __u32 next_type_id = d->btf->start_id;
+ const struct btf_type *t;
void *p;
- int i, len;
+ int i, id, len;
/* we are going to reuse hypot_map to store compaction remapping */
d->hypot_map[0] = 0;
- for (i = 1; i <= d->btf->nr_types; i++)
- d->hypot_map[i] = BTF_UNPROCESSED_ID;
+ /* base BTF types are not renumbered */
+ for (id = 1; id < d->btf->start_id; id++)
+ d->hypot_map[id] = id;
+ for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++)
+ d->hypot_map[id] = BTF_UNPROCESSED_ID;
p = d->btf->types_data;
- for (i = 1; i <= d->btf->nr_types; i++) {
- if (d->map[i] != i)
+ for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) {
+ if (d->map[id] != id)
continue;
- len = btf_type_size(btf__type_by_id(d->btf, i));
+ t = btf__type_by_id(d->btf, id);
+ len = btf_type_size(t);
if (len < 0)
return len;
- memmove(p, btf__type_by_id(d->btf, i), len);
- d->hypot_map[i] = next_type_id;
- d->btf->type_offs[next_type_id] = p - d->btf->types_data;
+ memmove(p, t, len);
+ d->hypot_map[id] = next_type_id;
+ d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data;
p += len;
next_type_id++;
}
/* shrink struct btf's internal types index and update btf_header */
- d->btf->nr_types = next_type_id - 1;
- d->btf->type_offs_cap = d->btf->nr_types + 1;
+ d->btf->nr_types = next_type_id - d->btf->start_id;
+ d->btf->type_offs_cap = d->btf->nr_types;
d->btf->hdr->type_len = p - d->btf->types_data;
new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
sizeof(*new_offs));
- if (!new_offs)
+ if (d->btf->type_offs_cap && !new_offs)
return -ENOMEM;
d->btf->type_offs = new_offs;
d->btf->hdr->str_off = d->btf->hdr->type_len;
@@ -4296,15 +4889,18 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
* then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
* which is populated during compaction phase.
*/
-static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx)
{
+ struct btf_dedup *d = ctx;
__u32 resolved_type_id, new_type_id;
- resolved_type_id = resolve_type_id(d, type_id);
+ resolved_type_id = resolve_type_id(d, *type_id);
new_type_id = d->hypot_map[resolved_type_id];
if (new_type_id > BTF_MAX_NR_TYPES)
return -EINVAL;
- return new_type_id;
+
+ *type_id = new_type_id;
+ return 0;
}
/*
@@ -4317,15 +4913,106 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
* referenced from any BTF type (e.g., struct fields, func proto args, etc) to
* their final deduped type IDs.
*/
-static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_types(struct btf_dedup *d)
{
- struct btf_type *t = btf_type_by_id(d->btf, type_id);
int i, r;
+ for (i = 0; i < d->btf->nr_types; i++) {
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
+
+ r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d);
+ if (r)
+ return r;
+ }
+
+ if (!d->btf_ext)
+ return 0;
+
+ r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+struct btf *btf__load_vmlinux_btf(void)
+{
+ const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux";
+ /* fall back locations, trying to find vmlinux on disk */
+ const char *locations[] = {
+ "/boot/vmlinux-%1$s",
+ "/lib/modules/%1$s/vmlinux-%1$s",
+ "/lib/modules/%1$s/build/vmlinux",
+ "/usr/lib/modules/%1$s/kernel/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%1$s",
+ "/usr/lib/debug/boot/vmlinux-%1$s.debug",
+ "/usr/lib/debug/lib/modules/%1$s/vmlinux",
+ };
+ char path[PATH_MAX + 1];
+ struct utsname buf;
+ struct btf *btf;
+ int i, err;
+
+ /* is canonical sysfs location accessible? */
+ if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) {
+ pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
+ sysfs_btf_path);
+ } else {
+ btf = btf__parse(sysfs_btf_path, NULL);
+ if (!btf) {
+ err = -errno;
+ pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err);
+ return libbpf_err_ptr(err);
+ }
+ pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path);
+ return btf;
+ }
+
+ /* try fallback locations */
+ uname(&buf);
+ for (i = 0; i < ARRAY_SIZE(locations); i++) {
+ snprintf(path, PATH_MAX, locations[i], buf.release);
+
+ if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
+ continue;
+
+ btf = btf__parse(path, NULL);
+ err = libbpf_get_error(btf);
+ pr_debug("loading kernel BTF '%s': %d\n", path, err);
+ if (err)
+ continue;
+
+ return btf;
+ }
+
+ pr_warn("failed to find valid kernel BTF\n");
+ return libbpf_err_ptr(-ESRCH);
+}
+
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+ char path[80];
+
+ snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+ return btf__parse_split(path, vmlinux_btf);
+}
+
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
switch (btf_kind(t)) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
case BTF_KIND_ENUM:
- break;
+ case BTF_KIND_ENUM64:
+ return 0;
case BTF_KIND_FWD:
case BTF_KIND_CONST:
@@ -4335,140 +5022,193 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
- break;
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
+ return visit(&t->type, ctx);
case BTF_KIND_ARRAY: {
- struct btf_array *arr_info = btf_array(t);
+ struct btf_array *a = btf_array(t);
- r = btf_dedup_remap_type_id(d, arr_info->type);
- if (r < 0)
- return r;
- arr_info->type = r;
- r = btf_dedup_remap_type_id(d, arr_info->index_type);
- if (r < 0)
- return r;
- arr_info->index_type = r;
- break;
+ err = visit(&a->type, ctx);
+ err = err ?: visit(&a->index_type, ctx);
+ return err;
}
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
- struct btf_member *member = btf_members(t);
- __u16 vlen = btf_vlen(t);
+ struct btf_member *m = btf_members(t);
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, member->type);
- if (r < 0)
- return r;
- member->type = r;
- member++;
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
}
- break;
+ return 0;
}
case BTF_KIND_FUNC_PROTO: {
- struct btf_param *param = btf_params(t);
- __u16 vlen = btf_vlen(t);
+ struct btf_param *m = btf_params(t);
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
+ err = visit(&t->type, ctx);
+ if (err)
+ return err;
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, param->type);
- if (r < 0)
- return r;
- param->type = r;
- param++;
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
+ err = visit(&t->name_off, ctx);
+ if (err)
+ return err;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
}
break;
}
+ case BTF_KIND_ENUM: {
+ struct btf_enum *m = btf_enum(t);
- case BTF_KIND_DATASEC: {
- struct btf_var_secinfo *var = btf_var_secinfos(t);
- __u16 vlen = btf_vlen(t);
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ struct btf_enum64 *m = btf_enum64(t);
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, var->type);
- if (r < 0)
- return r;
- var->type = r;
- var++;
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
}
break;
}
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
default:
- return -EINVAL;
+ break;
}
return 0;
}
-static int btf_dedup_remap_types(struct btf_dedup *d)
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
{
- int i, r;
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
- for (i = 1; i <= d->btf->nr_types; i++) {
- r = btf_dedup_remap_type(d, i);
- if (r < 0)
- return r;
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_func_info_min *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
}
+
return 0;
}
-/*
- * Probe few well-known locations for vmlinux kernel image and try to load BTF
- * data out of it to use for target BTF.
- */
-struct btf *libbpf_find_kernel_btf(void)
-{
- struct {
- const char *path_fmt;
- bool raw_btf;
- } locations[] = {
- /* try canonical vmlinux BTF through sysfs first */
- { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
- /* fall back to trying to find vmlinux ELF on disk otherwise */
- { "/boot/vmlinux-%1$s" },
- { "/lib/modules/%1$s/vmlinux-%1$s" },
- { "/lib/modules/%1$s/build/vmlinux" },
- { "/usr/lib/modules/%1$s/kernel/vmlinux" },
- { "/usr/lib/debug/boot/vmlinux-%1$s" },
- { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
- { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
- };
- char path[PATH_MAX + 1];
- struct utsname buf;
- struct btf *btf;
- int i;
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx)
+{
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
- uname(&buf);
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+ }
- for (i = 0; i < ARRAY_SIZE(locations); i++) {
- snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+ seg = &btf_ext->line_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_line_info_min *rec;
- if (access(path, R_OK))
- continue;
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
- if (locations[i].raw_btf)
- btf = btf__parse_raw(path);
- else
- btf = btf__parse_elf(path, NULL);
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->file_name_off, ctx);
+ if (err)
+ return err;
+ err = visit(&rec->line_off, ctx);
+ if (err)
+ return err;
+ }
+ }
- pr_debug("loading kernel BTF '%s': %ld\n",
- path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
- if (IS_ERR(btf))
- continue;
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
- return btf;
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->access_str_off, ctx);
+ if (err)
+ return err;
+ }
}
- pr_warn("failed to find valid kernel BTF\n");
- return ERR_PTR(-ESRCH);
+ return 0;
}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 57247240a20a..8e6880d91c84 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2018 Facebook */
+/*! \file */
#ifndef __LIBBPF_BTF_H
#define __LIBBPF_BTF_H
@@ -30,19 +31,102 @@ enum btf_endianness {
BTF_BIG_ENDIAN = 1,
};
+/**
+ * @brief **btf__free()** frees all data of a BTF object
+ * @param btf BTF object to free
+ */
LIBBPF_API void btf__free(struct btf *btf);
+
+/**
+ * @brief **btf__new()** creates a new instance of a BTF object from the raw
+ * bytes of an ELF's BTF section
+ * @param data raw bytes
+ * @param size number of bytes passed in `data`
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+
+/**
+ * @brief **btf__new_split()** create a new instance of a BTF object from the
+ * provided raw data bytes. It takes another BTF instance, **base_btf**, which
+ * serves as a base BTF, which is extended by types in a newly created BTF
+ * instance
+ * @param data raw bytes
+ * @param size length of raw bytes
+ * @param base_btf the base BTF object
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and
+ * creates non-split BTF.
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
+LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf);
+
+/**
+ * @brief **btf__new_empty()** creates an empty BTF object. Use
+ * `btf__add_*()` to populate such BTF object.
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
LIBBPF_API struct btf *btf__new_empty(void);
+
+/**
+ * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
+ * ELF BTF section except with a base BTF on top of which split BTF should be
+ * based
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to
+ * `btf__new_empty()` and creates non-split BTF.
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
+LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
+
LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf);
LIBBPF_API struct btf *btf__parse_raw(const char *path);
-LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
-LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
const char *type_name, __u32 kind);
-LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
+LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
@@ -54,38 +138,43 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
-LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
-LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
- __u32 expected_key_size,
- __u32 expected_value_size,
- __u32 *key_type_id, __u32 *value_type_id);
-LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
-LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
- __u32 *size);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt);
-LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
-
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
+ const struct btf_type *src_type);
+/**
+ * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf*
+ * @param btf BTF object which all the BTF types and strings are added to
+ * @param src_btf BTF object which all BTF types and referenced strings are copied from
+ * @return BTF type ID of the first appended BTF type, or negative error code
+ *
+ * **btf__add_btf()** can be used to simply and efficiently append the entire
+ * contents of one BTF object to another one. All the BTF type data is copied
+ * over, all referenced type IDs are adjusted by adding a necessary ID offset.
+ * Only strings referenced from BTF types are copied over and deduplicated, so
+ * if there were some unused strings in *src_btf*, those won't be copied over,
+ * which is consistent with the general string deduplication semantics of BTF
+ * writing APIs.
+ *
+ * If any error is encountered during this process, the contents of *btf* is
+ * left intact, which means that **btf__add_btf()** follows the transactional
+ * semantics and the operation as a whole is all-or-nothing.
+ *
+ * *src_btf* has to be non-split BTF, as of now copying types from split BTF
+ * is not supported and will result in -ENOTSUP error code returned.
+ */
+LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf);
LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_array(struct btf *btf,
int index_type_id, int elem_type_id, __u32 nr_elems);
@@ -98,6 +187,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_
/* enum construction APIs */
LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed);
+LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value);
enum btf_fwd_kind {
BTF_FWD_STRUCT = 0,
@@ -110,6 +201,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_
LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);
/* func and func_proto construction APIs */
LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -123,26 +215,36 @@ LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz
LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
__u32 offset, __u32 byte_sz);
+/* tag construction API */
+LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx);
+
struct btf_dedup_opts {
- unsigned int dedup_table_size;
- bool dont_resolve_fwds;
+ size_t sz;
+ /* optional .BTF.ext info to dedup along the main BTF info */
+ struct btf_ext *btf_ext;
+ /* force hash collisions (used for testing) */
+ bool force_collisions;
+ size_t :0;
};
+#define btf_dedup_opts__last_field force_collisions
-LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
- const struct btf_dedup_opts *opts);
+LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
struct btf_dump;
struct btf_dump_opts {
- void *ctx;
+ size_t sz;
};
+#define btf_dump_opts__last_field sz
typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn);
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts);
+
LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
@@ -164,6 +266,7 @@ struct btf_dump_emit_type_decl_opts {
int indent_level;
/* strip all the const/volatile/restrict mods */
bool strip_mods;
+ size_t :0;
};
#define btf_dump_emit_type_decl_opts__last_field strip_mods
@@ -171,9 +274,49 @@ LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts);
+
+struct btf_dump_type_data_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ const char *indent_str;
+ int indent_level;
+ /* below match "show" flags for bpf_show_snprintf() */
+ bool compact; /* no newlines/indentation */
+ bool skip_names; /* skip member/type names */
+ bool emit_zeroes; /* show 0-valued fields */
+ size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts);
+
/*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
*/
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC 12 /* Function */
+#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#endif
+/* The kernel header switched to enums, so the following were never #defined */
+#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
+#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */
+
static inline __u16 btf_kind(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info);
@@ -231,6 +374,11 @@ static inline bool btf_is_enum(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_ENUM;
}
+static inline bool btf_is_enum64(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM64;
+}
+
static inline bool btf_is_fwd(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_FWD;
@@ -262,7 +410,8 @@ static inline bool btf_is_mod(const struct btf_type *t)
return kind == BTF_KIND_VOLATILE ||
kind == BTF_KIND_CONST ||
- kind == BTF_KIND_RESTRICT;
+ kind == BTF_KIND_RESTRICT ||
+ kind == BTF_KIND_TYPE_TAG;
}
static inline bool btf_is_func(const struct btf_type *t)
@@ -285,6 +434,33 @@ static inline bool btf_is_datasec(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_DATASEC;
}
+static inline bool btf_is_float(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FLOAT;
+}
+
+static inline bool btf_is_decl_tag(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_DECL_TAG;
+}
+
+static inline bool btf_is_type_tag(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_TYPE_TAG;
+}
+
+static inline bool btf_is_any_enum(const struct btf_type *t)
+{
+ return btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static inline bool btf_kind_core_compat(const struct btf_type *t1,
+ const struct btf_type *t2)
+{
+ return btf_kind(t1) == btf_kind(t2) ||
+ (btf_is_any_enum(t1) && btf_is_any_enum(t2));
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
@@ -310,6 +486,39 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
+struct btf_enum64;
+
+static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
+{
+ return (struct btf_enum64 *)(t + 1);
+}
+
+static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
+{
+ /* struct btf_enum64 is introduced in Linux 6.0, which is very
+ * bleeding-edge. Here we are avoiding relying on struct btf_enum64
+ * definition coming from kernel UAPI headers to support wider range
+ * of system-wide kernel headers.
+ *
+ * Given this header can be also included from C++ applications, that
+ * further restricts C tricks we can use (like using compatible
+ * anonymous struct). So just treat struct btf_enum64 as
+ * a three-element array of u32 and access second (lo32) and third
+ * (hi32) elements directly.
+ *
+ * For reference, here is a struct btf_enum64 definition:
+ *
+ * const struct btf_enum64 {
+ * __u32 name_off;
+ * __u32 val_lo32;
+ * __u32 val_hi32;
+ * };
+ */
+ const __u32 *e64 = (const __u32 *)e;
+
+ return ((__u64)e64[2] << 32) | e64[1];
+}
+
static inline struct btf_member *btf_members(const struct btf_type *t)
{
return (struct btf_member *)(t + 1);
@@ -353,6 +562,12 @@ btf_var_secinfos(const struct btf_type *t)
return (struct btf_var_secinfo *)(t + 1);
}
+struct btf_decl_tag;
+static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
+{
+ return (struct btf_decl_tag *)(t + 1);
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 2f9d685bd522..4d9f30bf7f01 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -10,7 +10,10 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+#include <endian.h>
#include <errno.h>
+#include <limits.h>
#include <linux/err.h>
#include <linux/btf.h>
#include <linux/kernel.h>
@@ -53,13 +56,33 @@ struct btf_dump_type_aux_state {
__u8 referenced: 1;
};
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN 32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+ const void *data_end; /* end of valid data to show */
+ bool compact;
+ bool skip_names;
+ bool emit_zeroes;
+ __u8 indent_lvl; /* base indent level */
+ char indent_str[BTF_DATA_INDENT_STR_LEN];
+ /* below are used during iteration */
+ int depth;
+ bool is_array_member;
+ bool is_array_terminated;
+ bool is_array_char;
+};
+
struct btf_dump {
const struct btf *btf;
- const struct btf_ext *btf_ext;
btf_dump_printf_fn_t printf_fn;
- struct btf_dump_opts opts;
+ void *cb_ctx;
int ptr_sz;
bool strip_mods;
+ bool skip_anon_defs;
int last_id;
/* per-type auxiliary state */
@@ -89,16 +112,20 @@ struct btf_dump {
* name occurrences
*/
struct hashmap *ident_names;
+ /*
+ * data for typed display; allocated if needed.
+ */
+ struct btf_dump_data *typed_dump;
};
-static size_t str_hash_fn(const void *key, void *ctx)
+static size_t str_hash_fn(long key, void *ctx)
{
- return str_hash(key);
+ return str_hash((void *)key);
}
-static bool str_equal_fn(const void *a, const void *b, void *ctx)
+static bool str_equal_fn(long a, long b, void *ctx)
{
- return strcmp(a, b) == 0;
+ return strcmp((void *)a, (void *)b) == 0;
}
static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
@@ -111,7 +138,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
va_list args;
va_start(args, fmt);
- d->printf_fn(d->opts.ctx, fmt, args);
+ d->printf_fn(d->cb_ctx, fmt, args);
va_end(args);
}
@@ -119,21 +146,26 @@ static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_resize(struct btf_dump *d);
struct btf_dump *btf_dump__new(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const struct btf_dump_opts *opts,
- btf_dump_printf_fn_t printf_fn)
+ btf_dump_printf_fn_t printf_fn,
+ void *ctx,
+ const struct btf_dump_opts *opts)
{
struct btf_dump *d;
int err;
+ if (!OPTS_VALID(opts, btf_dump_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (!printf_fn)
+ return libbpf_err_ptr(-EINVAL);
+
d = calloc(1, sizeof(struct btf_dump));
if (!d)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
d->btf = btf;
- d->btf_ext = btf_ext;
d->printf_fn = printf_fn;
- d->opts.ctx = opts ? opts->ctx : NULL;
+ d->cb_ctx = ctx;
d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
@@ -156,21 +188,21 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
return d;
err:
btf_dump__free(d);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
static int btf_dump_resize(struct btf_dump *d)
{
- int err, last_id = btf__get_nr_types(d->btf);
+ int err, last_id = btf__type_cnt(d->btf) - 1;
if (last_id <= d->last_id)
return 0;
- if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
- sizeof(*d->type_states), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+ sizeof(*d->type_states), last_id + 1))
return -ENOMEM;
- if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
- sizeof(*d->cached_names), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+ sizeof(*d->cached_names), last_id + 1))
return -ENOMEM;
if (d->last_id == 0) {
@@ -188,6 +220,17 @@ static int btf_dump_resize(struct btf_dump *d)
return 0;
}
+static void btf_dump_free_names(struct hashmap *map)
+{
+ size_t bkt;
+ struct hashmap_entry *cur;
+
+ hashmap__for_each_entry(map, cur, bkt)
+ free((void *)cur->pkey);
+
+ hashmap__free(map);
+}
+
void btf_dump__free(struct btf_dump *d)
{
int i;
@@ -206,8 +249,8 @@ void btf_dump__free(struct btf_dump *d)
free(d->cached_names);
free(d->emit_queue);
free(d->decl_stack);
- hashmap__free(d->type_names);
- hashmap__free(d->ident_names);
+ btf_dump_free_names(d->type_names);
+ btf_dump_free_names(d->ident_names);
free(d);
}
@@ -235,17 +278,17 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
{
int err, i;
- if (id > btf__get_nr_types(d->btf))
- return -EINVAL;
+ if (id >= btf__type_cnt(d->btf))
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return err;
+ return libbpf_err(err);
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
- return err;
+ return libbpf_err(err);
for (i = 0; i < d->emit_queue_cnt; i++)
btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -267,18 +310,20 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
*/
static int btf_dump_mark_referenced(struct btf_dump *d)
{
- int i, j, n = btf__get_nr_types(d->btf);
+ int i, j, n = btf__type_cnt(d->btf);
const struct btf_type *t;
__u16 vlen;
- for (i = d->last_id + 1; i <= n; i++) {
+ for (i = d->last_id + 1; i < n; i++) {
t = btf__type_by_id(d->btf, i);
vlen = btf_vlen(t);
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
break;
case BTF_KIND_VOLATILE:
@@ -288,6 +333,8 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
d->type_states[t->type].referenced = 1;
break;
@@ -453,6 +500,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
switch (btf_kind(t)) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
tstate->order_state = ORDERED;
return 0;
@@ -462,7 +510,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return err;
case BTF_KIND_ARRAY:
- return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+ return btf_dump_order_type(d, btf_array(t)->type, false);
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
@@ -494,6 +542,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return 1;
}
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
/*
* non-anonymous or non-referenced enums are top-level
@@ -530,6 +579,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
return btf_dump_order_type(d, t->type, through_ptr);
case BTF_KIND_FUNC_PROTO: {
@@ -554,6 +604,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
case BTF_KIND_FUNC:
case BTF_KIND_VAR:
case BTF_KIND_DATASEC:
+ case BTF_KIND_DECL_TAG:
d->type_states[id].order_state = ORDERED;
return 0;
@@ -693,6 +744,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
tstate->emit_state = EMITTED;
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
if (top_level_def) {
btf_dump_emit_enum_def(d, id, t, 0);
btf_dump_printf(d, ";\n\n");
@@ -703,6 +755,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
btf_dump_emit_type(d, t->type, cont_id);
break;
case BTF_KIND_ARRAY:
@@ -763,11 +816,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
break;
case BTF_KIND_FUNC_PROTO: {
const struct btf_param *p = btf_params(t);
- __u16 vlen = btf_vlen(t);
+ __u16 n = btf_vlen(t);
int i;
btf_dump_emit_type(d, t->type, cont_id);
- for (i = 0; i < vlen; i++, p++)
+ for (i = 0; i < n; i++, p++)
btf_dump_emit_type(d, p->type, cont_id);
break;
@@ -781,14 +834,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
const struct btf_member *m;
- int align, i, bit_sz;
+ int max_align = 1, align, i, bit_sz;
__u16 vlen;
- align = btf__align_of(btf, id);
- /* size of a non-packed struct has to be a multiple of its alignment*/
- if (align && t->size % align)
- return true;
-
m = btf_members(t);
vlen = btf_vlen(t);
/* all non-bitfield fields have to be naturally aligned */
@@ -797,8 +845,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
bit_sz = btf_member_bitfield_size(t, i);
if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
return true;
+ max_align = max(align, max_align);
}
-
+ /* size of a non-packed struct has to be a multiple of its alignment */
+ if (t->size % max_align != 0)
+ return true;
/*
* if original struct was marked as packed, but its layout is
* naturally aligned, we'll detect that it's not packed
@@ -806,52 +857,106 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
return false;
}
-static int chip_away_bits(int total, int at_most)
-{
- return total % at_most ? : at_most;
-}
-
static void btf_dump_emit_bit_padding(const struct btf_dump *d,
- int cur_off, int m_off, int m_bit_sz,
- int align, int lvl)
+ int cur_off, int next_off, int next_align,
+ bool in_bitfield, int lvl)
{
- int off_diff = m_off - cur_off;
- int ptr_bits = d->ptr_sz * 8;
+ const struct {
+ const char *name;
+ int bits;
+ } pads[] = {
+ {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
+ };
+ int new_off, pad_bits, bits, i;
+ const char *pad_type;
+
+ if (cur_off >= next_off)
+ return; /* no gap */
+
+ /* For filling out padding we want to take advantage of
+ * natural alignment rules to minimize unnecessary explicit
+ * padding. First, we find the largest type (among long, int,
+ * short, or char) that can be used to force naturally aligned
+ * boundary. Once determined, we'll use such type to fill in
+ * the remaining padding gap. In some cases we can rely on
+ * compiler filling some gaps, but sometimes we need to force
+ * alignment to close natural alignment with markers like
+ * `long: 0` (this is always the case for bitfields). Note
+ * that even if struct itself has, let's say 4-byte alignment
+ * (i.e., it only uses up to int-aligned types), using `long:
+ * X;` explicit padding doesn't actually change struct's
+ * overall alignment requirements, but compiler does take into
+ * account that type's (long, in this example) natural
+ * alignment requirements when adding implicit padding. We use
+ * this fact heavily and don't worry about ruining correct
+ * struct alignment requirement.
+ */
+ for (i = 0; i < ARRAY_SIZE(pads); i++) {
+ pad_bits = pads[i].bits;
+ pad_type = pads[i].name;
- if (off_diff <= 0)
- /* no gap */
- return;
- if (m_bit_sz == 0 && off_diff < align * 8)
- /* natural padding will take care of a gap */
- return;
+ new_off = roundup(cur_off, pad_bits);
+ if (new_off <= next_off)
+ break;
+ }
- while (off_diff > 0) {
- const char *pad_type;
- int pad_bits;
-
- if (ptr_bits > 32 && off_diff > 32) {
- pad_type = "long";
- pad_bits = chip_away_bits(off_diff, ptr_bits);
- } else if (off_diff > 16) {
- pad_type = "int";
- pad_bits = chip_away_bits(off_diff, 32);
- } else if (off_diff > 8) {
- pad_type = "short";
- pad_bits = chip_away_bits(off_diff, 16);
- } else {
- pad_type = "char";
- pad_bits = chip_away_bits(off_diff, 8);
+ if (new_off > cur_off && new_off <= next_off) {
+ /* We need explicit `<type>: 0` aligning mark if next
+ * field is right on alignment offset and its
+ * alignment requirement is less strict than <type>'s
+ * alignment (so compiler won't naturally align to the
+ * offset we expect), or if subsequent `<type>: X`,
+ * will actually completely fit in the remaining hole,
+ * making compiler basically ignore `<type>: X`
+ * completely.
+ */
+ if (in_bitfield ||
+ (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
+ (new_off != next_off && next_off - new_off <= new_off - cur_off))
+ /* but for bitfields we'll emit explicit bit count */
+ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
+ in_bitfield ? new_off - cur_off : 0);
+ cur_off = new_off;
+ }
+
+ /* Now we know we start at naturally aligned offset for a chosen
+ * padding type (long, int, short, or char), and so the rest is just
+ * a straightforward filling of remaining padding gap with full
+ * `<type>: sizeof(<type>);` markers, except for the last one, which
+ * might need smaller than sizeof(<type>) padding.
+ */
+ while (cur_off != next_off) {
+ bits = min(next_off - cur_off, pad_bits);
+ if (bits == pad_bits) {
+ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+ cur_off += bits;
+ continue;
+ }
+ /* For the remainder padding that doesn't cover entire
+ * pad_type bit length, we pick the smallest necessary type.
+ * This is pure aesthetics, we could have just used `long`,
+ * but having smallest necessary one communicates better the
+ * scale of the padding gap.
+ */
+ for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
+ pad_type = pads[i].name;
+ pad_bits = pads[i].bits;
+ if (pad_bits < bits)
+ continue;
+
+ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
+ cur_off += bits;
+ break;
}
- btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
- off_diff -= pad_bits;
}
}
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
- btf_dump_printf(d, "%s %s",
+ btf_dump_printf(d, "%s%s%s",
btf_is_struct(t) ? "struct" : "union",
+ t->name_off ? " " : "",
btf_dump_type_name(d, id));
}
@@ -862,9 +967,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
{
const struct btf_member *m = btf_members(t);
bool is_struct = btf_is_struct(t);
- int align, i, packed, off = 0;
+ bool packed, prev_bitfield = false;
+ int align, i, off = 0;
__u16 vlen = btf_vlen(t);
+ align = btf__align_of(d->btf, id);
packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
btf_dump_printf(d, "%s%s%s {",
@@ -874,37 +981,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
for (i = 0; i < vlen; i++, m++) {
const char *fname;
- int m_off, m_sz;
+ int m_off, m_sz, m_align;
+ bool in_bitfield;
fname = btf_name_of(d, m->name_off);
m_sz = btf_member_bitfield_size(t, i);
m_off = btf_member_bit_offset(t, i);
- align = packed ? 1 : btf__align_of(d->btf, m->type);
+ m_align = packed ? 1 : btf__align_of(d->btf, m->type);
+
+ in_bitfield = prev_bitfield && m_sz != 0;
- btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+ btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
btf_dump_printf(d, "\n%s", pfx(lvl + 1));
btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
if (m_sz) {
btf_dump_printf(d, ": %d", m_sz);
off = m_off + m_sz;
+ prev_bitfield = true;
} else {
m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
off = m_off + m_sz * 8;
+ prev_bitfield = false;
}
+
btf_dump_printf(d, ";");
}
/* pad at the end, if necessary */
- if (is_struct) {
- align = packed ? 1 : btf__align_of(d->btf, id);
- btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
- lvl + 1);
- }
+ if (is_struct)
+ btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
- if (vlen)
+ /*
+ * Keep `struct empty {}` on a single line,
+ * only print newline when there are regular or padding fields.
+ */
+ if (vlen || t->size) {
btf_dump_printf(d, "\n");
- btf_dump_printf(d, "%s}", pfx(lvl));
+ btf_dump_printf(d, "%s}", pfx(lvl));
+ } else {
+ btf_dump_printf(d, "}");
+ }
if (packed)
btf_dump_printf(d, " __attribute__((packed))");
}
@@ -941,38 +1058,118 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
}
-static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
- const struct btf_type *t,
- int lvl)
+static void btf_dump_emit_enum32_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
{
const struct btf_enum *v = btf_enum(t);
- __u16 vlen = btf_vlen(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
const char *name;
size_t dup_cnt;
int i;
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ /* enumerators share namespace with typedef idents */
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,";
+ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val);
+ }
+ }
+}
+
+static void btf_dump_emit_enum64_val(struct btf_dump *d,
+ const struct btf_type *t,
+ int lvl, __u16 vlen)
+{
+ const struct btf_enum64 *v = btf_enum64(t);
+ bool is_signed = btf_kflag(t);
+ const char *fmt_str;
+ const char *name;
+ size_t dup_cnt;
+ __u64 val;
+ int i;
+
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ val = btf_enum64_value(v);
+ if (dup_cnt > 1) {
+ fmt_str = is_signed ? "\n%s%s___%zd = %lldLL,"
+ : "\n%s%s___%zd = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name, dup_cnt,
+ (unsigned long long)val);
+ } else {
+ fmt_str = is_signed ? "\n%s%s = %lldLL,"
+ : "\n%s%s = %lluULL,";
+ btf_dump_printf(d, fmt_str,
+ pfx(lvl + 1), name,
+ (unsigned long long)val);
+ }
+ }
+}
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ __u16 vlen = btf_vlen(t);
+
btf_dump_printf(d, "enum%s%s",
t->name_off ? " " : "",
btf_dump_type_name(d, id));
- if (vlen) {
- btf_dump_printf(d, " {");
- for (i = 0; i < vlen; i++, v++) {
- name = btf_name_of(d, v->name_off);
- /* enumerators share namespace with typedef idents */
- dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
- if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %u,",
- pfx(lvl + 1), name, dup_cnt,
- (__u32)v->val);
- } else {
- btf_dump_printf(d, "\n%s%s = %u,",
- pfx(lvl + 1), name,
- (__u32)v->val);
+ if (!vlen)
+ return;
+
+ btf_dump_printf(d, " {");
+ if (btf_is_enum(t))
+ btf_dump_emit_enum32_val(d, t, lvl, vlen);
+ else
+ btf_dump_emit_enum64_val(d, t, lvl, vlen);
+ btf_dump_printf(d, "\n%s}", pfx(lvl));
+
+ /* special case enums with special sizes */
+ if (t->size == 1) {
+ /* one-byte enums can be forced with mode(byte) attribute */
+ btf_dump_printf(d, " __attribute__((mode(byte)))");
+ } else if (t->size == 8 && d->ptr_sz == 8) {
+ /* enum can be 8-byte sized if one of the enumerator values
+ * doesn't fit in 32-bit integer, or by adding mode(word)
+ * attribute (but probably only on 64-bit architectures); do
+ * our best here to try to satisfy the contract without adding
+ * unnecessary attributes
+ */
+ bool needs_word_mode;
+
+ if (btf_is_enum(t)) {
+ /* enum can't represent 64-bit values, so we need word mode */
+ needs_word_mode = true;
+ } else {
+ /* enum64 needs mode(word) if none of its values has
+ * non-zero upper 32-bits (which means that all values
+ * fit in 32-bit integers and won't cause compiler to
+ * bump enum to be 64-bit naturally
+ */
+ int i;
+
+ needs_word_mode = true;
+ for (i = 0; i < vlen; i++) {
+ if (btf_enum64(t)[i].val_hi32 != 0) {
+ needs_word_mode = false;
+ break;
+ }
}
}
- btf_dump_printf(d, "\n%s}", pfx(lvl));
+ if (needs_word_mode)
+ btf_dump_printf(d, " __attribute__((mode(word)))");
}
+
}
static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
@@ -1073,11 +1270,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
int lvl, err;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return -EINVAL;
+ return libbpf_err(err);
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
@@ -1122,6 +1319,7 @@ skip_mod:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_TYPE_TAG:
id = t->type;
break;
case BTF_KIND_ARRAY:
@@ -1129,10 +1327,12 @@ skip_mod:
break;
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
case BTF_KIND_FWD:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FLOAT:
goto done;
default:
pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@ -1247,6 +1447,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
switch (kind) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
btf_dump_printf(d, "%s", name);
@@ -1255,15 +1456,16 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_UNION:
btf_dump_emit_mods(d, decls);
/* inline anonymous struct/union */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_struct_def(d, id, t, lvl);
else
btf_dump_emit_struct_fwd(d, id, t);
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_enum_def(d, id, t, lvl);
else
btf_dump_emit_enum_fwd(d, id, t);
@@ -1288,6 +1490,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_RESTRICT:
btf_dump_printf(d, " restrict");
break;
+ case BTF_KIND_TYPE_TAG:
+ btf_dump_emit_mods(d, decls);
+ name = btf_name_of(d, t->name_off);
+ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+ break;
case BTF_KIND_ARRAY: {
const struct btf_array *a = btf_array(t);
const struct btf_type *next_t;
@@ -1388,15 +1595,59 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
btf_dump_emit_name(d, fname, last_was_ptr);
}
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+ bool top_level)
+{
+ const struct btf_type *t;
+
+ /* for array members, we don't bother emitting type name for each
+ * member to avoid the redundancy of
+ * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+ */
+ if (d->typed_dump->is_array_member)
+ return;
+
+ /* avoid type name specification for variable/section; it will be done
+ * for the associated variable value(s).
+ */
+ t = btf__type_by_id(d->btf, id);
+ if (btf_is_var(t) || btf_is_datasec(t))
+ return;
+
+ if (top_level)
+ btf_dump_printf(d, "(");
+
+ d->skip_anon_defs = true;
+ d->strip_mods = true;
+ btf_dump_emit_type_decl(d, id, "", 0);
+ d->strip_mods = false;
+ d->skip_anon_defs = false;
+
+ if (top_level)
+ btf_dump_printf(d, ")");
+}
+
/* return number of duplicates (occurrences) of a given name */
static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
const char *orig_name)
{
+ char *old_name, *new_name;
size_t dup_cnt = 0;
+ int err;
- hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+ new_name = strdup(orig_name);
+ if (!new_name)
+ return 1;
+
+ (void)hashmap__find(name_map, orig_name, &dup_cnt);
dup_cnt++;
- hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+ err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
+ if (err)
+ free(new_name);
+
+ free(old_name);
return dup_cnt;
}
@@ -1416,6 +1667,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
if (s->name_resolved)
return *cached_name ? *cached_name : orig_name;
+ if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+ s->name_resolved = 1;
+ return orig_name;
+ }
+
dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
if (dup_cnt > 1) {
const size_t max_len = 256;
@@ -1438,3 +1694,849 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
{
return btf_dump_resolve_name(d, id, d->ident_names);
}
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+ return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+ return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+ int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+ if (d->typed_dump->compact)
+ return;
+
+ for (i = 0; i < lvl; i++)
+ btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...) \
+ btf_dump_printf(d, fmt "%s%s", \
+ ##__VA_ARGS__, \
+ btf_dump_data_delim(d), \
+ btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id)
+{
+ btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+ return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz,
+ __u64 *value)
+{
+ __u16 left_shift_bits, right_shift_bits;
+ const __u8 *bytes = data;
+ __u8 nr_copy_bits;
+ __u64 num = 0;
+ int i;
+
+ /* Maximum supported bitfield size is 64 bits */
+ if (t->size > 8) {
+ pr_warn("unexpected bitfield size %d\n", t->size);
+ return -EINVAL;
+ }
+
+ /* Bitfield value retrieval is done in two steps; first relevant bytes are
+ * stored in num, then we left/right shift num to eliminate irrelevant bits.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ for (i = t->size - 1; i >= 0; i--)
+ num = num * 256 + bytes[i];
+ nr_copy_bits = bit_sz + bits_offset;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (i = 0; i < t->size; i++)
+ num = num * 256 + bytes[i];
+ nr_copy_bits = t->size * 8 - bits_offset;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ left_shift_bits = 64 - nr_copy_bits;
+ right_shift_bits = 64 - bit_sz;
+
+ *value = (num << left_shift_bits) >> right_shift_bits;
+
+ return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 check_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+ if (err)
+ return err;
+ if (check_num == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 print_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+ if (err)
+ return err;
+
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+ return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ static __u8 bytecmp[16] = {};
+ int nr_bytes;
+
+ /* For pointer types, pointer size is not defined on a per-type basis.
+ * On dump creation however, we store the pointer size.
+ */
+ if (btf_kind(t) == BTF_KIND_PTR)
+ nr_bytes = d->ptr_sz;
+ else
+ nr_bytes = t->size;
+
+ if (nr_bytes < 1 || nr_bytes > 16) {
+ pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+ return -EINVAL;
+ }
+
+ if (memcmp(data, bytecmp, nr_bytes) == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static bool ptr_is_aligned(const struct btf *btf, __u32 type_id,
+ const void *data)
+{
+ int alignment = btf__align_of(btf, type_id);
+
+ if (alignment == 0)
+ return false;
+
+ return ((uintptr_t)data) % alignment == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __u8 encoding = btf_int_encoding(t);
+ bool sign = encoding & BTF_INT_SIGNED;
+ char buf[16] __attribute__((aligned(16)));
+ int sz = t->size;
+
+ if (sz == 0 || sz > sizeof(buf)) {
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+
+ /* handle packed int data - accesses of integers not aligned on
+ * int boundaries can cause problems on some platforms.
+ */
+ if (!ptr_is_aligned(d->btf, type_id, data)) {
+ memcpy(buf, data, sz);
+ data = buf;
+ }
+
+ switch (sz) {
+ case 16: {
+ const __u64 *ints = data;
+ __u64 lsi, msi;
+
+ /* avoid use of __int128 as some 32-bit platforms do not
+ * support it.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ lsi = ints[0];
+ msi = ints[1];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ lsi = ints[1];
+ msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (msi == 0)
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+ else
+ btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+ (unsigned long long)lsi);
+ break;
+ }
+ case 8:
+ if (sign)
+ btf_dump_type_values(d, "%lld", *(long long *)data);
+ else
+ btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+ break;
+ case 4:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s32 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u32 *)data);
+ break;
+ case 2:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s16 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u16 *)data);
+ break;
+ case 1:
+ if (d->typed_dump->is_array_char) {
+ /* check for null terminator */
+ if (d->typed_dump->is_array_terminated)
+ break;
+ if (*(char *)data == '\0') {
+ d->typed_dump->is_array_terminated = true;
+ break;
+ }
+ if (isprint(*(char *)data)) {
+ btf_dump_type_values(d, "'%c'", *(char *)data);
+ break;
+ }
+ }
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s8 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u8 *)data);
+ break;
+ default:
+ pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union float_data {
+ long double ld;
+ double d;
+ float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data)
+{
+ const union float_data *flp = data;
+ union float_data fl;
+ int sz = t->size;
+
+ /* handle unaligned data; copy to local union */
+ if (!ptr_is_aligned(d->btf, type_id, data)) {
+ memcpy(&fl, data, sz);
+ flp = &fl;
+ }
+
+ switch (sz) {
+ case 16:
+ btf_dump_type_values(d, "%Lf", flp->ld);
+ break;
+ case 8:
+ btf_dump_type_values(d, "%lf", flp->d);
+ break;
+ case 4:
+ btf_dump_type_values(d, "%f", flp->f);
+ break;
+ default:
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+ const struct btf_type *v,
+ __u32 id,
+ const void *data)
+{
+ enum btf_func_linkage linkage = btf_var(v)->linkage;
+ const struct btf_type *t;
+ const char *l;
+ __u32 type_id;
+
+ switch (linkage) {
+ case BTF_FUNC_STATIC:
+ l = "static ";
+ break;
+ case BTF_FUNC_EXTERN:
+ l = "extern ";
+ break;
+ case BTF_FUNC_GLOBAL:
+ default:
+ l = "";
+ break;
+ }
+
+ /* format of output here is [linkage] [type] [varname] = (type)value,
+ * for example "static int cpu_profile_flip = (int)1"
+ */
+ btf_dump_printf(d, "%s", l);
+ type_id = v->type;
+ t = btf__type_by_id(d->btf, type_id);
+ btf_dump_emit_type_cast(d, type_id, false);
+ btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+ return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 i, elem_type_id;
+ __s64 elem_size;
+ bool is_array_member;
+
+ elem_type_id = array->type;
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ if (elem_size <= 0) {
+ pr_warn("unexpected elem size %zd for array type [%u]\n",
+ (ssize_t)elem_size, id);
+ return -EINVAL;
+ }
+
+ if (btf_is_int(elem_type)) {
+ /*
+ * BTF_INT_CHAR encoding never seems to be set for
+ * char arrays, so if size is 1 and element is
+ * printable as a char, we'll do that.
+ */
+ if (elem_size == 1)
+ d->typed_dump->is_array_char = true;
+ }
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+ /* may be a multidimensional array, so store current "is array member"
+ * status so we can restore it correctly later.
+ */
+ is_array_member = d->typed_dump->is_array_member;
+ d->typed_dump->is_array_member = true;
+ for (i = 0; i < array->nelems; i++, data += elem_size) {
+ if (d->typed_dump->is_array_terminated)
+ break;
+ btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+ }
+ d->typed_dump->is_array_member = is_array_member;
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "]");
+
+ return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+ int i, err = 0;
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ const char *mname;
+ __u32 moffset;
+ __u8 bit_sz;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ mname = btf_name_of(d, m->name_off);
+ moffset = btf_member_bit_offset(t, i);
+
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err < 0)
+ return err;
+ }
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "}");
+ return err;
+}
+
+union ptr_data {
+ unsigned int p;
+ unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) {
+ btf_dump_type_values(d, "%p", *(void **)data);
+ } else {
+ union ptr_data pt;
+
+ memcpy(&pt, data, d->ptr_sz);
+ if (d->ptr_sz == 4)
+ btf_dump_type_values(d, "0x%x", pt.p);
+ else
+ btf_dump_type_values(d, "0x%llx", pt.lp);
+ }
+ return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u32 id,
+ __s64 *value)
+{
+ bool is_signed = btf_kflag(t);
+
+ if (!ptr_is_aligned(d->btf, id, data)) {
+ __u64 val;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+ if (err)
+ return err;
+ *value = (__s64)val;
+ return 0;
+ }
+
+ switch (t->size) {
+ case 8:
+ *value = *(__s64 *)data;
+ return 0;
+ case 4:
+ *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data;
+ return 0;
+ case 2:
+ *value = is_signed ? *(__s16 *)data : *(__u16 *)data;
+ return 0;
+ case 1:
+ *value = is_signed ? *(__s8 *)data : *(__u8 *)data;
+ return 0;
+ default:
+ pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+ return -EINVAL;
+ }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ bool is_signed;
+ __s64 value;
+ int i, err;
+
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+
+ is_signed = btf_kflag(t);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, is_signed ? "%d" : "%u", value);
+ } else {
+ const struct btf_enum64 *e;
+
+ for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) {
+ if (value != btf_enum64_value(e))
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL",
+ (unsigned long long)value);
+ }
+ return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ __u32 i;
+ int err;
+
+ btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+ var = btf__type_by_id(d->btf, vsi->type);
+ err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+ if (err < 0)
+ return err;
+ btf_dump_printf(d, ";");
+ }
+ return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __s64 size;
+
+ if (bit_sz) {
+ /* bits_offset is at most 7. bit_sz is at most 128. */
+ __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8;
+
+ /* When bit_sz is non zero, it is called from
+ * btf_dump_struct_data() where it only cares about
+ * negative error value.
+ * Return nr_bytes in success case to make it
+ * consistent as the regular integer case below.
+ */
+ return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes;
+ }
+
+ size = btf__resolve_size(d->btf, id);
+
+ if (size < 0 || size >= INT_MAX) {
+ pr_warn("unexpected size [%zu] for id [%u]\n",
+ (size_t)size, id);
+ return -EINVAL;
+ }
+
+ /* Only do overflow checking for base types; we do not want to
+ * avoid showing part of a struct, union or array, even if we
+ * do not have enough data to show the full object. By
+ * restricting overflow checking to base types we can ensure
+ * that partial display succeeds, while avoiding overflowing
+ * and using bogus data for display.
+ */
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+ if (!t) {
+ pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+ id);
+ return -EINVAL;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+ return -E2BIG;
+ break;
+ default:
+ break;
+ }
+ return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __s64 value;
+ int i, err;
+
+ /* toplevel exceptions; we show zero values if
+ * - we ask for them (emit_zeros)
+ * - if we are at top-level so we see "struct empty { }"
+ * - or if we are an array member and the array is non-empty and
+ * not a char array; we don't want to be in a situation where we
+ * have an integer array 0, 1, 0, 1 and only show non-zero values.
+ * If the array contains zeroes only, or is a char array starting
+ * with a '\0', the array-level check_zero() will prevent showing it;
+ * we are concerned with determining zero value at the array member
+ * level here.
+ */
+ if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+ (d->typed_dump->is_array_member &&
+ !d->typed_dump->is_array_char))
+ return 0;
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ if (bit_sz)
+ return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 elem_type_id, elem_size;
+ bool ischar;
+
+ elem_type_id = array->type;
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+ ischar = btf_is_int(elem_type) && elem_size == 1;
+
+ /* check all elements; if _any_ element is nonzero, all
+ * of array is displayed. We make an exception however
+ * for char arrays where the first element is 0; these
+ * are considered zeroed also, even if later elements are
+ * non-zero because the string is terminated.
+ */
+ for (i = 0; i < array->nelems; i++) {
+ if (i == 0 && ischar && *(char *)data == 0)
+ return -ENODATA;
+ err = btf_dump_type_data_check_zero(d, elem_type,
+ elem_type_id,
+ data +
+ (i * elem_size),
+ bits_offset, 0);
+ if (err != -ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+
+ /* if any struct/union member is non-zero, the struct/union
+ * is considered non-zero and dumped.
+ */
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ __u32 moffset;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ moffset = btf_member_bit_offset(t, i);
+
+ /* btf_int_bits() does not store member bitfield size;
+ * bitfield size needs to be stored here so int display
+ * of member can retrieve it.
+ */
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err != ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+ if (value == 0)
+ return -ENODATA;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ int size, err = 0;
+
+ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);
+ if (size < 0)
+ return size;
+ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+ if (err) {
+ /* zeroed data is expected and not an error, so simply skip
+ * dumping such data. Record other errors however.
+ */
+ if (err == -ENODATA)
+ return size;
+ return err;
+ }
+ btf_dump_data_pfx(d);
+
+ if (!d->typed_dump->skip_names) {
+ if (fname && strlen(fname) > 0)
+ btf_dump_printf(d, ".%s = ", fname);
+ btf_dump_emit_type_cast(d, id, true);
+ }
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_DECL_TAG:
+ err = btf_dump_unsupported_data(d, t, id);
+ break;
+ case BTF_KIND_INT:
+ if (bit_sz)
+ err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+ else
+ err = btf_dump_int_data(d, t, id, data, bits_offset);
+ break;
+ case BTF_KIND_FLOAT:
+ err = btf_dump_float_data(d, t, id, data);
+ break;
+ case BTF_KIND_PTR:
+ err = btf_dump_ptr_data(d, t, id, data);
+ break;
+ case BTF_KIND_ARRAY:
+ err = btf_dump_array_data(d, t, id, data);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ err = btf_dump_struct_data(d, t, id, data);
+ break;
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* handle bitfield and int enum values */
+ if (bit_sz) {
+ __u64 print_num;
+ __s64 enum_val;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+ &print_num);
+ if (err)
+ break;
+ enum_val = (__s64)print_num;
+ err = btf_dump_enum_data(d, t, id, &enum_val);
+ } else
+ err = btf_dump_enum_data(d, t, id, data);
+ break;
+ case BTF_KIND_VAR:
+ err = btf_dump_var_data(d, t, id, data);
+ break;
+ case BTF_KIND_DATASEC:
+ err = btf_dump_datasec_data(d, t, id, data);
+ break;
+ default:
+ pr_warn("unexpected kind [%u] for id [%u]\n",
+ BTF_INFO_KIND(t->info), id);
+ return -EINVAL;
+ }
+ if (err < 0)
+ return err;
+ return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts)
+{
+ struct btf_dump_data typed_dump = {};
+ const struct btf_type *t;
+ int ret;
+
+ if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+ return libbpf_err(-EINVAL);
+
+ t = btf__type_by_id(d->btf, id);
+ if (!t)
+ return libbpf_err(-ENOENT);
+
+ d->typed_dump = &typed_dump;
+ d->typed_dump->data_end = data + data_sz;
+ d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+ /* default indent string is a tab */
+ if (!OPTS_GET(opts, indent_str, NULL))
+ d->typed_dump->indent_str[0] = '\t';
+ else
+ libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
+ sizeof(d->typed_dump->indent_str));
+
+ d->typed_dump->compact = OPTS_GET(opts, compact, false);
+ d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+ d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+ ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+ d->typed_dump = NULL;
+
+ return libbpf_err(ret);
+}
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
new file mode 100644
index 000000000000..c92e02394159
--- /dev/null
+++ b/tools/lib/bpf/elf.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <libelf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
+ * the symbol is hidden and can only be seen when referenced using an
+ * explicit version number. This is a GNU extension.
+ */
+#define VERSYM_HIDDEN 0x8000
+
+/* This is the mask for the rest of the data in a word read from a
+ * SHT_GNU_versym section.
+ */
+#define VERSYM_VERSION 0x7fff
+
+int elf_open(const char *binary_path, struct elf_fd *elf_fd)
+{
+ char errmsg[STRERR_BUFSIZE];
+ int fd, ret;
+ Elf *elf;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("elf: failed to init libelf for %s\n", binary_path);
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+ fd = open(binary_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("elf: failed to open %s: %s\n", binary_path,
+ libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
+ return ret;
+ }
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
+ close(fd);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ elf_fd->fd = fd;
+ elf_fd->elf = elf;
+ return 0;
+}
+
+void elf_close(struct elf_fd *elf_fd)
+{
+ if (!elf_fd)
+ return;
+ elf_end(elf_fd->elf);
+ close(elf_fd->fd);
+}
+
+/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
+static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
+{
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ GElf_Shdr sh;
+
+ if (!gelf_getshdr(scn, &sh))
+ continue;
+ if (sh.sh_type == sh_type)
+ return scn;
+ }
+ return NULL;
+}
+
+struct elf_sym {
+ const char *name;
+ GElf_Sym sym;
+ GElf_Shdr sh;
+ int ver;
+ bool hidden;
+};
+
+struct elf_sym_iter {
+ Elf *elf;
+ Elf_Data *syms;
+ Elf_Data *versyms;
+ Elf_Data *verdefs;
+ size_t nr_syms;
+ size_t strtabidx;
+ size_t verdef_strtabidx;
+ size_t next_sym_idx;
+ struct elf_sym sym;
+ int st_type;
+};
+
+static int elf_sym_iter_new(struct elf_sym_iter *iter,
+ Elf *elf, const char *binary_path,
+ int sh_type, int st_type)
+{
+ Elf_Scn *scn = NULL;
+ GElf_Ehdr ehdr;
+ GElf_Shdr sh;
+
+ memset(iter, 0, sizeof(*iter));
+
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
+ return -EINVAL;
+ }
+
+ scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
+ if (!scn) {
+ pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
+ binary_path);
+ return -ENOENT;
+ }
+
+ if (!gelf_getshdr(scn, &sh))
+ return -EINVAL;
+
+ iter->strtabidx = sh.sh_link;
+ iter->syms = elf_getdata(scn, 0);
+ if (!iter->syms) {
+ pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
+ binary_path, elf_errmsg(-1));
+ return -EINVAL;
+ }
+ iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
+ iter->elf = elf;
+ iter->st_type = st_type;
+
+ /* Version symbol table is meaningful to dynsym only */
+ if (sh_type != SHT_DYNSYM)
+ return 0;
+
+ scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL);
+ if (!scn)
+ return 0;
+ iter->versyms = elf_getdata(scn, 0);
+
+ scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL);
+ if (!scn)
+ return 0;
+
+ iter->verdefs = elf_getdata(scn, 0);
+ if (!iter->verdefs || !gelf_getshdr(scn, &sh)) {
+ pr_warn("elf: failed to get verdef ELF section in '%s'\n", binary_path);
+ return -EINVAL;
+ }
+ iter->verdef_strtabidx = sh.sh_link;
+
+ return 0;
+}
+
+static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
+{
+ struct elf_sym *ret = &iter->sym;
+ GElf_Sym *sym = &ret->sym;
+ const char *name = NULL;
+ GElf_Versym versym;
+ Elf_Scn *sym_scn;
+ size_t idx;
+
+ for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
+ if (!gelf_getsym(iter->syms, idx, sym))
+ continue;
+ if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
+ continue;
+ name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
+ if (!name)
+ continue;
+ sym_scn = elf_getscn(iter->elf, sym->st_shndx);
+ if (!sym_scn)
+ continue;
+ if (!gelf_getshdr(sym_scn, &ret->sh))
+ continue;
+
+ iter->next_sym_idx = idx + 1;
+ ret->name = name;
+ ret->ver = 0;
+ ret->hidden = false;
+
+ if (iter->versyms) {
+ if (!gelf_getversym(iter->versyms, idx, &versym))
+ continue;
+ ret->ver = versym & VERSYM_VERSION;
+ ret->hidden = versym & VERSYM_HIDDEN;
+ }
+ return ret;
+ }
+
+ return NULL;
+}
+
+static const char *elf_get_vername(struct elf_sym_iter *iter, int ver)
+{
+ GElf_Verdaux verdaux;
+ GElf_Verdef verdef;
+ int offset;
+
+ if (!iter->verdefs)
+ return NULL;
+
+ offset = 0;
+ while (gelf_getverdef(iter->verdefs, offset, &verdef)) {
+ if (verdef.vd_ndx != ver) {
+ if (!verdef.vd_next)
+ break;
+
+ offset += verdef.vd_next;
+ continue;
+ }
+
+ if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux))
+ break;
+
+ return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name);
+
+ }
+ return NULL;
+}
+
+static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym,
+ const char *name, size_t name_len, const char *lib_ver)
+{
+ const char *ver_name;
+
+ /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER
+ * make sure the func part matches the user specified name
+ */
+ if (strncmp(sym->name, name, name_len) != 0)
+ return false;
+
+ /* ...but we don't want a search for "foo" to match 'foo2" also, so any
+ * additional characters in sname should be of the form "@@LIB".
+ */
+ if (sym->name[name_len] != '\0' && sym->name[name_len] != '@')
+ return false;
+
+ /* If user does not specify symbol version, then we got a match */
+ if (!lib_ver)
+ return true;
+
+ /* If user specifies symbol version, for dynamic symbols,
+ * get version name from ELF verdef section for comparison.
+ */
+ if (sh_type == SHT_DYNSYM) {
+ ver_name = elf_get_vername(iter, sym->ver);
+ if (!ver_name)
+ return false;
+ return strcmp(ver_name, lib_ver) == 0;
+ }
+
+ /* For normal symbols, it is already in form of func@LIB_VER */
+ return strcmp(sym->name, name) == 0;
+}
+
+/* Transform symbol's virtual address (absolute for binaries and relative
+ * for shared libs) into file offset, which is what kernel is expecting
+ * for uprobe/uretprobe attachment.
+ * See Documentation/trace/uprobetracer.rst for more details. This is done
+ * by looking up symbol's containing section's header and using iter's virtual
+ * address (sh_addr) and corresponding file offset (sh_offset) to transform
+ * sym.st_value (virtual address) into desired final file offset.
+ */
+static unsigned long elf_sym_offset(struct elf_sym *sym)
+{
+ return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
+}
+
+/* Find offset of function name in the provided ELF object. "binary_path" is
+ * the path to the ELF binary represented by "elf", and only used for error
+ * reporting matters. "name" matches symbol name or name@@LIB for library
+ * functions.
+ */
+long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
+{
+ int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ const char *at_symbol, *lib_ver;
+ bool is_shared_lib;
+ long ret = -ENOENT;
+ size_t name_len;
+ GElf_Ehdr ehdr;
+
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ /* for shared lib case, we do not need to calculate relative offset */
+ is_shared_lib = ehdr.e_type == ET_DYN;
+
+ /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */
+ at_symbol = strchr(name, '@');
+ if (at_symbol) {
+ name_len = at_symbol - name;
+ /* skip second @ if it's @@LIB_VER case */
+ if (at_symbol[1] == '@')
+ at_symbol++;
+ lib_ver = at_symbol + 1;
+ } else {
+ name_len = strlen(name);
+ lib_ver = NULL;
+ }
+
+ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
+ * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
+ * linked binary may not have SHT_DYMSYM, so absence of a section should not be
+ * reported as a warning/error.
+ */
+ for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
+ struct elf_sym_iter iter;
+ struct elf_sym *sym;
+ int last_bind = -1;
+ int cur_bind;
+
+ ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
+ if (ret == -ENOENT)
+ continue;
+ if (ret)
+ goto out;
+
+ while ((sym = elf_sym_iter_next(&iter))) {
+ if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver))
+ continue;
+
+ cur_bind = GELF_ST_BIND(sym->sym.st_info);
+
+ if (ret > 0) {
+ /* handle multiple matches */
+ if (elf_sym_offset(sym) == ret) {
+ /* same offset, no problem */
+ continue;
+ } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
+ /* Only accept one non-weak bind. */
+ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
+ sym->name, name, binary_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ } else if (cur_bind == STB_WEAK) {
+ /* already have a non-weak bind, and
+ * this is a weak bind, so ignore.
+ */
+ continue;
+ }
+ }
+
+ ret = elf_sym_offset(sym);
+ last_bind = cur_bind;
+ }
+ if (ret > 0)
+ break;
+ }
+
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
+ ret);
+ } else {
+ if (ret == 0) {
+ pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
+ is_shared_lib ? "should not be 0 in a shared library" :
+ "try using shared library path instead");
+ ret = -ENOENT;
+ } else {
+ pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
+ }
+ }
+out:
+ return ret;
+}
+
+/* Find offset of function name in ELF object specified by path. "name" matches
+ * symbol name or name@@LIB for library functions.
+ */
+long elf_find_func_offset_from_file(const char *binary_path, const char *name)
+{
+ struct elf_fd elf_fd;
+ long ret = -ENOENT;
+
+ ret = elf_open(binary_path, &elf_fd);
+ if (ret)
+ return ret;
+ ret = elf_find_func_offset(elf_fd.elf, binary_path, name);
+ elf_close(&elf_fd);
+ return ret;
+}
+
+struct symbol {
+ const char *name;
+ int bind;
+ int idx;
+};
+
+static int symbol_cmp(const void *a, const void *b)
+{
+ const struct symbol *sym_a = a;
+ const struct symbol *sym_b = b;
+
+ return strcmp(sym_a->name, sym_b->name);
+}
+
+/*
+ * Return offsets in @poffsets for symbols specified in @syms array argument.
+ * On success returns 0 and offsets are returned in allocated array with @cnt
+ * size, that needs to be released by the caller.
+ */
+int elf_resolve_syms_offsets(const char *binary_path, int cnt,
+ const char **syms, unsigned long **poffsets,
+ int st_type)
+{
+ int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ int err = 0, i, cnt_done = 0;
+ unsigned long *offsets;
+ struct symbol *symbols;
+ struct elf_fd elf_fd;
+
+ err = elf_open(binary_path, &elf_fd);
+ if (err)
+ return err;
+
+ offsets = calloc(cnt, sizeof(*offsets));
+ symbols = calloc(cnt, sizeof(*symbols));
+
+ if (!offsets || !symbols) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ symbols[i].name = syms[i];
+ symbols[i].idx = i;
+ }
+
+ qsort(symbols, cnt, sizeof(*symbols), symbol_cmp);
+
+ for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
+ struct elf_sym_iter iter;
+ struct elf_sym *sym;
+
+ err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type);
+ if (err == -ENOENT)
+ continue;
+ if (err)
+ goto out;
+
+ while ((sym = elf_sym_iter_next(&iter))) {
+ unsigned long sym_offset = elf_sym_offset(sym);
+ int bind = GELF_ST_BIND(sym->sym.st_info);
+ struct symbol *found, tmp = {
+ .name = sym->name,
+ };
+ unsigned long *offset;
+
+ found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp);
+ if (!found)
+ continue;
+
+ offset = &offsets[found->idx];
+ if (*offset > 0) {
+ /* same offset, no problem */
+ if (*offset == sym_offset)
+ continue;
+ /* handle multiple matches */
+ if (found->bind != STB_WEAK && bind != STB_WEAK) {
+ /* Only accept one non-weak bind. */
+ pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n",
+ sym->name, sym_offset, binary_path, *offset);
+ err = -ESRCH;
+ goto out;
+ } else if (bind == STB_WEAK) {
+ /* already have a non-weak bind, and
+ * this is a weak bind, so ignore.
+ */
+ continue;
+ }
+ } else {
+ cnt_done++;
+ }
+ *offset = sym_offset;
+ found->bind = bind;
+ }
+ }
+
+ if (cnt != cnt_done) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ *poffsets = offsets;
+
+out:
+ free(symbols);
+ if (err)
+ free(offsets);
+ elf_close(&elf_fd);
+ return err;
+}
+
+/*
+ * Return offsets in @poffsets for symbols specified by @pattern argument.
+ * On success returns 0 and offsets are returned in allocated @poffsets
+ * array with the @pctn size, that needs to be released by the caller.
+ */
+int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
+ unsigned long **poffsets, size_t *pcnt)
+{
+ int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM };
+ unsigned long *offsets = NULL;
+ size_t cap = 0, cnt = 0;
+ struct elf_fd elf_fd;
+ int err = 0, i;
+
+ err = elf_open(binary_path, &elf_fd);
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
+ struct elf_sym_iter iter;
+ struct elf_sym *sym;
+
+ err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
+ if (err == -ENOENT)
+ continue;
+ if (err)
+ goto out;
+
+ while ((sym = elf_sym_iter_next(&iter))) {
+ if (!glob_match(sym->name, pattern))
+ continue;
+
+ err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets),
+ cnt + 1);
+ if (err)
+ goto out;
+
+ offsets[cnt++] = elf_sym_offset(sym);
+ }
+
+ /* If we found anything in the first symbol section,
+ * do not search others to avoid duplicates.
+ */
+ if (cnt)
+ break;
+ }
+
+ if (cnt) {
+ *poffsets = offsets;
+ *pcnt = cnt;
+ } else {
+ err = -ENOENT;
+ }
+
+out:
+ if (err)
+ free(offsets);
+ elf_close(&elf_fd);
+ return err;
+}
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
new file mode 100644
index 000000000000..4e783cc7fc4b
--- /dev/null
+++ b/tools/lib/bpf/features.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/kernel.h>
+#include <linux/filter.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(int token_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.license = ptr_to_u64("GPL");
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
+ libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+ /* make sure loading with name works */
+ ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+ return probe_fd(ret);
+}
+
+static int probe_kern_global_data(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ insns[0].imm = map;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ close(map);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func_global(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* static void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_qmark_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0?.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_float(int token_fd)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_decl_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* attr */
+ BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_type_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* attr */
+ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
+ /* ptr */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_array_mmap(int token_fd)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0),
+ .token_fd = token_fd,
+ );
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ /* use any valid combination of program type and (optional)
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+ * to see if kernel supports expected_attach_type field for
+ * BPF_PROG_LOAD command
+ */
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+static int probe_module_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ char name[16];
+ int fd, err;
+
+ fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (fd < 0)
+ return 0; /* BTF not supported at all */
+
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+ * kernel's module BTF support coincides with support for
+ * name/name_len fields in struct bpf_btf_info.
+ */
+ err = bpf_btf_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ return !err;
+}
+
+static int probe_perf_link(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, link_fd, err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+ unsigned long offset = 0;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &load_opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* Creating uprobe in '/' binary should fail with -EBADF. */
+ link_opts.uprobe_multi.path = "/";
+ link_opts.uprobe_multi.offsets = &offset;
+ link_opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(int token_fd)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_arg_ctx_tag(int token_fd)
+{
+ static const char strs[] = "\0a\0b\0arg:ctx\0";
+ const __u32 types[] = {
+ /* [1] INT */
+ BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
+ /* [2] PTR -> VOID */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* [3] FUNC_PROTO `int(void *a)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(1 /* "a" */, 2),
+ /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
+ BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
+ /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(3 /* "b" */, 2),
+ /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
+ BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
+ /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
+ BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
+ };
+ const struct bpf_insn insns[] = {
+ /* main prog */
+ BPF_CALL_REL(+1),
+ BPF_EXIT_INSN(),
+ /* global subprog */
+ BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
+ BPF_EXIT_INSN(),
+ };
+ const struct bpf_func_info_min func_infos[] = {
+ { 0, 4 }, /* main prog -> FUNC 'a' */
+ { 2, 6 }, /* subprog -> FUNC 'b' */
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (btf_fd < 0)
+ return 0;
+
+ opts.prog_btf_fd = btf_fd;
+ opts.func_info = &func_infos;
+ opts.func_info_cnt = ARRAY_SIZE(func_infos);
+ opts.func_info_rec_size = sizeof(func_infos[0]);
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
+ "GPL", insns, insn_cnt, &opts);
+ close(btf_fd);
+
+ return probe_fd(prog_fd);
+}
+
+typedef int (*feature_probe_fn)(int /* token_fd */);
+
+static struct kern_feature_cache feature_cache;
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ },
+ [FEAT_MODULE_BTF] = {
+ "module BTF support", probe_module_btf,
+ },
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
+ [FEAT_BTF_DECL_TAG] = {
+ "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+ },
+ [FEAT_BTF_TYPE_TAG] = {
+ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+ },
+ [FEAT_MEMCG_ACCOUNT] = {
+ "memcg-based memory accounting", probe_memcg_account,
+ },
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
+ [FEAT_SYSCALL_WRAPPER] = {
+ "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+ },
+ [FEAT_UPROBE_MULTI_LINK] = {
+ "BPF multi-uprobe link support", probe_uprobe_multi_link,
+ },
+ [FEAT_ARG_CTX_TAG] = {
+ "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag,
+ },
+ [FEAT_BTF_QMARK_DATASEC] = {
+ "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
+ },
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ /* assume global feature cache, unless custom one is provided */
+ if (!cache)
+ cache = &feature_cache;
+
+ if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
+ ret = feat->probe(cache->token_fd);
+ if (ret > 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
+}
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
new file mode 100644
index 000000000000..cf3323fd47b8
--- /dev/null
+++ b/tools/lib/bpf/gen_loader.c
@@ -0,0 +1,1123 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/param.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+#include "bpf_gen_internal.h"
+#include "skel_internal.h"
+#include <asm/byteorder.h>
+
+#define MAX_USED_MAPS 64
+#define MAX_USED_PROGS 32
+#define MAX_KFUNC_DESCS 256
+#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS)
+
+/* The following structure describes the stack layout of the loader program.
+ * In addition R6 contains the pointer to context.
+ * R7 contains the result of the last sys_bpf command (typically error or FD).
+ * R9 contains the result of the last sys_close command.
+ *
+ * Naming convention:
+ * ctx - bpf program context
+ * stack - bpf program stack
+ * blob - bpf_attr-s, strings, insns, map data.
+ * All the bytes that loader prog will use for read/write.
+ */
+struct loader_stack {
+ __u32 btf_fd;
+ __u32 inner_map_fd;
+ __u32 prog_fd[MAX_USED_PROGS];
+};
+
+#define stack_off(field) \
+ (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field))
+
+#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
+
+static int blob_fd_array_off(struct bpf_gen *gen, int index)
+{
+ return gen->fd_array + index * sizeof(int);
+}
+
+static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->insn_cur - gen->insn_start;
+ void *insn_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ insn_start = realloc(gen->insn_start, off + size);
+ if (!insn_start) {
+ gen->error = -ENOMEM;
+ free(gen->insn_start);
+ gen->insn_start = NULL;
+ return -ENOMEM;
+ }
+ gen->insn_start = insn_start;
+ gen->insn_cur = insn_start + off;
+ return 0;
+}
+
+static int realloc_data_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->data_cur - gen->data_start;
+ void *data_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ data_start = realloc(gen->data_start, off + size);
+ if (!data_start) {
+ gen->error = -ENOMEM;
+ free(gen->data_start);
+ gen->data_start = NULL;
+ return -ENOMEM;
+ }
+ gen->data_start = data_start;
+ gen->data_cur = data_start + off;
+ return 0;
+}
+
+static void emit(struct bpf_gen *gen, struct bpf_insn insn)
+{
+ if (realloc_insn_buf(gen, sizeof(insn)))
+ return;
+ memcpy(gen->insn_cur, &insn, sizeof(insn));
+ gen->insn_cur += sizeof(insn);
+}
+
+static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2)
+{
+ emit(gen, insn1);
+ emit(gen, insn2);
+}
+
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
+{
+ size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz;
+ int i;
+
+ gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
+ gen->log_level = log_level;
+ /* save ctx pointer into R6 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
+
+ /* bzero stack */
+ emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10));
+ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
+
+ /* amount of stack actually used, only used to calculate iterations, not stack offset */
+ nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]);
+ /* jump over cleanup code */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
+ /* size of cleanup code below (including map fd cleanup) */
+ (nr_progs_sz / 4) * 3 + 2 +
+ /* 6 insns for emit_sys_close_blob,
+ * 6 insns for debug_regs in emit_sys_close_blob
+ */
+ nr_maps * (6 + (gen->log_level ? 6 : 0))));
+
+ /* remember the label where all error branches will jump to */
+ gen->cleanup_label = gen->insn_cur - gen->insn_start;
+ /* emit cleanup code: close all temp FDs */
+ for (i = 0; i < nr_progs_sz; i += 4) {
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ }
+ for (i = 0; i < nr_maps; i++)
+ emit_sys_close_blob(gen, blob_fd_array_off(gen, i));
+ /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
+ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ emit(gen, BPF_EXIT_INSN());
+}
+
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
+{
+ __u32 size8 = roundup(size, 8);
+ __u64 zero = 0;
+ void *prev;
+
+ if (realloc_data_buf(gen, size8))
+ return 0;
+ prev = gen->data_cur;
+ if (data) {
+ memcpy(gen->data_cur, data, size);
+ memcpy(gen->data_cur + size, &zero, size8 - size);
+ } else {
+ memset(gen->data_cur, 0, size8);
+ }
+ gen->data_cur += size8;
+ return prev - gen->data_start;
+}
+
+/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative
+ * to start of fd_array. Caller can decide if it is usable or not.
+ */
+static int add_map_fd(struct bpf_gen *gen)
+{
+ if (gen->nr_maps == MAX_USED_MAPS) {
+ pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
+ gen->error = -E2BIG;
+ return 0;
+ }
+ return gen->nr_maps++;
+}
+
+static int add_kfunc_btf_fd(struct bpf_gen *gen)
+{
+ int cur;
+
+ if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
+ cur = add_data(gen, NULL, sizeof(int));
+ return (cur - gen->fd_array) / sizeof(int);
+ }
+ return MAX_USED_MAPS + gen->nr_fd_array++;
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */
+static void emit_rel_store(struct bpf_gen *gen, int off, int data)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, data));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_off));
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_off));
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
+}
+
+static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
+ bool check_non_zero)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off));
+ if (check_non_zero)
+ /* If value in ctx is zero don't update the blob.
+ * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c
+ */
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
+}
+
+static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
+{
+ emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, attr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf));
+ /* remember the result in R7 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+}
+
+static bool is_simm16(__s64 value)
+{
+ return value == (__s64)(__s16)value;
+}
+
+static void emit_check_err(struct bpf_gen *gen)
+{
+ __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+
+ /* R7 contains result of last sys_bpf command.
+ * if (R7 < 0) goto cleanup;
+ */
+ if (is_simm16(off)) {
+ emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+}
+
+/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */
+static void emit_debug(struct bpf_gen *gen, int reg1, int reg2,
+ const char *fmt, va_list args)
+{
+ char buf[1024];
+ int addr, len, ret;
+
+ if (!gen->log_level)
+ return;
+ ret = vsnprintf(buf, sizeof(buf), fmt, args);
+ if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0)
+ /* The special case to accommodate common debug_ret():
+ * to avoid specifying BPF_REG_7 and adding " r=%%d" to
+ * prints explicitly.
+ */
+ strcat(buf, " r=%d");
+ len = strlen(buf) + 1;
+ addr = add_data(gen, buf, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, addr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ if (reg1 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1));
+ if (reg2 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk));
+}
+
+static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, reg1, reg2, fmt, args);
+ va_end(args);
+}
+
+static void debug_ret(struct bpf_gen *gen, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, BPF_REG_7, -1, fmt, args);
+ va_end(args);
+}
+
+static void __emit_sys_close(struct bpf_gen *gen)
+{
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0,
+ /* 2 is the number of the following insns
+ * * 6 is additional insns in debug_regs
+ */
+ 2 + (gen->log_level ? 6 : 0)));
+ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d");
+}
+
+static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off));
+ __emit_sys_close(gen);
+}
+
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_off));
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+ __emit_sys_close(gen);
+}
+
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
+{
+ int i;
+
+ if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) {
+ pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n",
+ nr_progs, gen->nr_progs, nr_maps, gen->nr_maps);
+ gen->error = -EFAULT;
+ return gen->error;
+ }
+ emit_sys_close_stack(gen, stack_off(btf_fd));
+ for (i = 0; i < gen->nr_progs; i++)
+ move_stack2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * gen->nr_maps +
+ sizeof(struct bpf_prog_desc) * i +
+ offsetof(struct bpf_prog_desc, prog_fd), 4,
+ stack_off(prog_fd[i]));
+ for (i = 0; i < gen->nr_maps; i++)
+ move_blob2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * i +
+ offsetof(struct bpf_map_desc, map_fd), 4,
+ blob_fd_array_off(gen, i));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(gen, BPF_EXIT_INSN());
+ pr_debug("gen: finish %d\n", gen->error);
+ if (!gen->error) {
+ struct gen_loader_opts *opts = gen->opts;
+
+ opts->insns = gen->insn_start;
+ opts->insns_sz = gen->insn_cur - gen->insn_start;
+ opts->data = gen->data_start;
+ opts->data_sz = gen->data_cur - gen->data_start;
+ }
+ return gen->error;
+}
+
+void bpf_gen__free(struct bpf_gen *gen)
+{
+ if (!gen)
+ return;
+ free(gen->data_start);
+ free(gen->insn_start);
+ free(gen);
+}
+
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
+ __u32 btf_raw_size)
+{
+ int attr_size = offsetofend(union bpf_attr, btf_log_level);
+ int btf_data, btf_load_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: load_btf: size %d\n", btf_raw_size);
+ btf_data = add_data(gen, btf_raw_data, btf_raw_size);
+
+ attr.btf_size = btf_raw_size;
+ btf_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with a pointer to the BTF data */
+ emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data);
+ /* emit BTF_LOAD command */
+ emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size);
+ debug_ret(gen, "btf_load size %d", btf_raw_size);
+ emit_check_err(gen);
+ /* remember btf_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd)));
+}
+
+void bpf_gen__map_create(struct bpf_gen *gen,
+ enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size, __u32 value_size, __u32 max_entries,
+ struct bpf_map_create_opts *map_attr, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, map_extra);
+ bool close_inner_map_fd = false;
+ int map_create_attr, idx;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.map_flags = map_attr->map_flags;
+ attr.map_extra = map_attr->map_extra;
+ if (map_name)
+ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
+ attr.numa_node = map_attr->numa_node;
+ attr.map_ifindex = map_attr->map_ifindex;
+ attr.max_entries = max_entries;
+ attr.btf_key_type_id = map_attr->btf_key_type_id;
+ attr.btf_value_type_id = map_attr->btf_value_type_id;
+
+ pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
+ attr.map_name, map_idx, map_type, attr.btf_value_type_id);
+
+ map_create_attr = add_data(gen, &attr, attr_size);
+ if (attr.btf_value_type_id)
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4,
+ stack_off(btf_fd));
+ switch (attr.map_type) {
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4,
+ stack_off(inner_map_fd));
+ close_inner_map_fd = true;
+ break;
+ default:
+ break;
+ }
+ /* conditionally update max_entries */
+ if (map_idx >= 0)
+ move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, max_entries),
+ true /* check that max_entries != 0 */);
+ /* emit MAP_CREATE command */
+ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
+ debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
+ attr.map_name, map_idx, map_type, value_size,
+ attr.btf_value_type_id);
+ emit_check_err(gen);
+ /* remember map_fd in the stack, if successful */
+ if (map_idx < 0) {
+ /* This bpf_gen__map_create() function is called with map_idx >= 0
+ * for all maps that libbpf loading logic tracks.
+ * It's called with -1 to create an inner map.
+ */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(inner_map_fd)));
+ } else if (map_idx != gen->nr_maps) {
+ gen->error = -EDOM; /* internal bug */
+ return;
+ } else {
+ /* add_map_fd does gen->nr_maps++ */
+ idx = add_map_fd(gen);
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_fd_array_off(gen, idx)));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0));
+ }
+ if (close_inner_map_fd)
+ emit_sys_close_stack(gen, stack_off(inner_map_fd));
+}
+
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
+ enum bpf_attach_type type)
+{
+ const char *prefix;
+ int kind, ret;
+
+ btf_get_kernel_prefix_kind(type, &prefix, &kind);
+ gen->attach_kind = kind;
+ ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
+ prefix, attach_name);
+ if (ret >= sizeof(gen->attach_target))
+ gen->error = -ENOSPC;
+}
+
+static void emit_find_attach_target(struct bpf_gen *gen)
+{
+ int name, len = strlen(gen->attach_target) + 1;
+
+ pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind);
+ name = add_data(gen, gen->attach_target, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)",
+ gen->attach_target, gen->attach_kind);
+ emit_check_err(gen);
+ /* if successful, btf_id is in lower 32-bit of R7 and
+ * btf_obj_fd is in upper 32-bit
+ */
+}
+
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
+ bool is_typeless, bool is_ld64, int kind, int insn_idx)
+{
+ struct ksym_relo_desc *relo;
+
+ relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo));
+ if (!relo) {
+ gen->error = -ENOMEM;
+ return;
+ }
+ gen->relos = relo;
+ relo += gen->relo_cnt;
+ relo->name = name;
+ relo->is_weak = is_weak;
+ relo->is_typeless = is_typeless;
+ relo->is_ld64 = is_ld64;
+ relo->kind = kind;
+ relo->insn_idx = insn_idx;
+ gen->relo_cnt++;
+}
+
+/* returns existing ksym_desc with ref incremented, or inserts a new one */
+static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
+{
+ struct ksym_desc *kdesc;
+ int i;
+
+ for (i = 0; i < gen->nr_ksyms; i++) {
+ kdesc = &gen->ksyms[i];
+ if (kdesc->kind == relo->kind && kdesc->is_ld64 == relo->is_ld64 &&
+ !strcmp(kdesc->name, relo->name)) {
+ kdesc->ref++;
+ return kdesc;
+ }
+ }
+ kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc));
+ if (!kdesc) {
+ gen->error = -ENOMEM;
+ return NULL;
+ }
+ gen->ksyms = kdesc;
+ kdesc = &gen->ksyms[gen->nr_ksyms++];
+ kdesc->name = relo->name;
+ kdesc->kind = relo->kind;
+ kdesc->ref = 1;
+ kdesc->off = 0;
+ kdesc->insn = 0;
+ kdesc->is_ld64 = relo->is_ld64;
+ return kdesc;
+}
+
+/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7}
+ * Returns result in BPF_REG_7
+ */
+static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo)
+{
+ int name_off, len = strlen(relo->name) + 1;
+
+ name_off = add_data(gen, relo->name, len);
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name_off));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
+}
+
+/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7}
+ * Returns result in BPF_REG_7
+ * Returns u64 symbol addr in BPF_REG_9
+ */
+static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo)
+{
+ int name_off, len = strlen(relo->name) + 1, res_off;
+
+ name_off = add_data(gen, relo->name, len);
+ res_off = add_data(gen, NULL, 8); /* res is u64 */
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name_off));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, res_off));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name));
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind);
+}
+
+/* Expects:
+ * BPF_REG_8 - pointer to instruction
+ *
+ * We need to reuse BTF fd for same symbol otherwise each relocation takes a new
+ * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols,
+ * this would mean a new BTF fd index for each entry. By pairing symbol name
+ * with index, we get the insn->imm, insn->off pairing that kernel uses for
+ * kfunc_tab, which becomes the effective limit even though all of them may
+ * share same index in fd_array (such that kfunc_btf_tab has 1 element).
+ */
+static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
+{
+ struct ksym_desc *kdesc;
+ int btf_fd_idx;
+
+ kdesc = get_ksym_desc(gen, relo);
+ if (!kdesc)
+ return;
+ /* try to copy from existing bpf_insn */
+ if (kdesc->ref > 1) {
+ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + offsetof(struct bpf_insn, imm));
+ move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2,
+ kdesc->insn + offsetof(struct bpf_insn, off));
+ goto log;
+ }
+ /* remember insn offset, so we can copy BTF ID and FD later */
+ kdesc->insn = insn;
+ emit_bpf_find_by_name_kind(gen, relo);
+ if (!relo->is_weak)
+ emit_check_err(gen);
+ /* get index in fd_array to store BTF FD at */
+ btf_fd_idx = add_kfunc_btf_fd(gen);
+ if (btf_fd_idx > INT16_MAX) {
+ pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n",
+ btf_fd_idx, relo->name);
+ gen->error = -E2BIG;
+ return;
+ }
+ kdesc->off = btf_fd_idx;
+ /* jump to success case */
+ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+ /* set value for imm, off as 0 */
+ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+ /* skip success case for ret < 0 */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10));
+ /* store btf_id into insn[insn_idx].imm */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
+ /* obtain fd in BPF_REG_9 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
+ /* load fd_array slot pointer */
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
+ /* store BTF fd in slot, 0 for vmlinux */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
+ /* jump to insn[insn_idx].off store if fd denotes module BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
+ /* set the default value for off */
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+ /* skip BTF fd store for vmlinux BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
+ /* store index into insn[insn_idx].off */
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
+log:
+ if (!gen->log_level)
+ return;
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8,
+ offsetof(struct bpf_insn, imm)));
+ emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8,
+ offsetof(struct bpf_insn, off)));
+ debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d",
+ relo->name, kdesc->ref);
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_fd_array_off(gen, kdesc->off)));
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0));
+ debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd",
+ relo->name, kdesc->ref);
+}
+
+static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo,
+ int ref)
+{
+ if (!gen->log_level)
+ return;
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8,
+ offsetof(struct bpf_insn, imm)));
+ emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) +
+ offsetof(struct bpf_insn, imm)));
+ debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d",
+ relo->is_typeless, relo->is_weak, relo->name, ref);
+ emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code)));
+ debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg",
+ relo->is_typeless, relo->is_weak, relo->name, ref);
+}
+
+/* Expects:
+ * BPF_REG_8 - pointer to instruction
+ */
+static void emit_relo_ksym_typeless(struct bpf_gen *gen,
+ struct ksym_relo_desc *relo, int insn)
+{
+ struct ksym_desc *kdesc;
+
+ kdesc = get_ksym_desc(gen, relo);
+ if (!kdesc)
+ return;
+ /* try to copy from existing ldimm64 insn */
+ if (kdesc->ref > 1) {
+ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + offsetof(struct bpf_insn, imm));
+ move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
+ goto log;
+ }
+ /* remember insn offset, so we can copy ksym addr later */
+ kdesc->insn = insn;
+ /* skip typeless ksym_desc in fd closing loop in cleanup_relos */
+ kdesc->typeless = true;
+ emit_bpf_kallsyms_lookup_name(gen, relo);
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1));
+ emit_check_err(gen);
+ /* store lower half of addr into insn[insn_idx].imm */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm)));
+ /* store upper half of addr into insn[insn_idx + 1].imm */
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9,
+ sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
+log:
+ emit_ksym_relo_log(gen, relo, kdesc->ref);
+}
+
+static __u32 src_reg_mask(void)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ return 0x0f; /* src_reg,dst_reg,... */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ return 0xf0; /* dst_reg,src_reg,... */
+#else
+#error "Unsupported bit endianness, cannot proceed"
+#endif
+}
+
+/* Expects:
+ * BPF_REG_8 - pointer to instruction
+ */
+static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
+{
+ struct ksym_desc *kdesc;
+ __u32 reg_mask;
+
+ kdesc = get_ksym_desc(gen, relo);
+ if (!kdesc)
+ return;
+ /* try to copy from existing ldimm64 insn */
+ if (kdesc->ref > 1) {
+ move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
+ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+ kdesc->insn + offsetof(struct bpf_insn, imm));
+ /* jump over src_reg adjustment if imm (btf_id) is not 0, reuse BPF_REG_0 from move_blob2blob
+ * If btf_id is zero, clear BPF_PSEUDO_BTF_ID flag in src_reg of ld_imm64 insn
+ */
+ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));
+ goto clear_src_reg;
+ }
+ /* remember insn offset, so we can copy BTF ID and FD later */
+ kdesc->insn = insn;
+ emit_bpf_find_by_name_kind(gen, relo);
+ if (!relo->is_weak)
+ emit_check_err(gen);
+ /* jump to success case */
+ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+ /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */
+ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
+ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0));
+ /* skip success case for ret < 0 */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
+ /* store btf_id into insn[insn_idx].imm */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
+ /* store btf_obj_fd into insn[insn_idx + 1].imm */
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,
+ sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
+ /* skip src_reg adjustment */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3));
+clear_src_reg:
+ /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */
+ reg_mask = src_reg_mask();
+ emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code)));
+ emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask));
+ emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code)));
+
+ emit_ksym_relo_log(gen, relo, kdesc->ref);
+}
+
+void bpf_gen__record_relo_core(struct bpf_gen *gen,
+ const struct bpf_core_relo *core_relo)
+{
+ struct bpf_core_relo *relos;
+
+ relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos));
+ if (!relos) {
+ gen->error = -ENOMEM;
+ return;
+ }
+ gen->core_relos = relos;
+ relos += gen->core_relo_cnt;
+ memcpy(relos, core_relo, sizeof(*relos));
+ gen->core_relo_cnt++;
+}
+
+static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
+{
+ int insn;
+
+ pr_debug("gen: emit_relo (%d): %s at %d %s\n",
+ relo->kind, relo->name, relo->insn_idx, relo->is_ld64 ? "ld64" : "call");
+ insn = insns + sizeof(struct bpf_insn) * relo->insn_idx;
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn));
+ if (relo->is_ld64) {
+ if (relo->is_typeless)
+ emit_relo_ksym_typeless(gen, relo, insn);
+ else
+ emit_relo_ksym_btf(gen, relo, insn);
+ } else {
+ emit_relo_kfunc_btf(gen, relo, insn);
+ }
+}
+
+static void emit_relos(struct bpf_gen *gen, int insns)
+{
+ int i;
+
+ for (i = 0; i < gen->relo_cnt; i++)
+ emit_relo(gen, gen->relos + i, insns);
+}
+
+static void cleanup_core_relo(struct bpf_gen *gen)
+{
+ if (!gen->core_relo_cnt)
+ return;
+ free(gen->core_relos);
+ gen->core_relo_cnt = 0;
+ gen->core_relos = NULL;
+}
+
+static void cleanup_relos(struct bpf_gen *gen, int insns)
+{
+ struct ksym_desc *kdesc;
+ int i, insn;
+
+ for (i = 0; i < gen->nr_ksyms; i++) {
+ kdesc = &gen->ksyms[i];
+ /* only close fds for typed ksyms and kfuncs */
+ if (kdesc->is_ld64 && !kdesc->typeless) {
+ /* close fd recorded in insn[insn_idx + 1].imm */
+ insn = kdesc->insn;
+ insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm);
+ emit_sys_close_blob(gen, insn);
+ } else if (!kdesc->is_ld64) {
+ emit_sys_close_blob(gen, blob_fd_array_off(gen, kdesc->off));
+ if (kdesc->off < MAX_FD_ARRAY_SZ)
+ gen->nr_fd_array--;
+ }
+ }
+ if (gen->nr_ksyms) {
+ free(gen->ksyms);
+ gen->nr_ksyms = 0;
+ gen->ksyms = NULL;
+ }
+ if (gen->relo_cnt) {
+ free(gen->relos);
+ gen->relo_cnt = 0;
+ gen->relos = NULL;
+ }
+ cleanup_core_relo(gen);
+}
+
+void bpf_gen__prog_load(struct bpf_gen *gen,
+ enum bpf_prog_type prog_type, const char *prog_name,
+ const char *license, struct bpf_insn *insns, size_t insn_cnt,
+ struct bpf_prog_load_opts *load_attr, int prog_idx)
+{
+ int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos;
+ int attr_size = offsetofend(union bpf_attr, core_relo_rec_size);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n",
+ prog_type, insn_cnt, prog_idx);
+ /* add license string to blob of bytes */
+ license_off = add_data(gen, license, strlen(license) + 1);
+ /* add insns to blob of bytes */
+ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn));
+
+ attr.prog_type = prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = 0;
+ attr.insn_cnt = (__u32)insn_cnt;
+ attr.prog_flags = load_attr->prog_flags;
+
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_cnt = load_attr->func_info_cnt;
+ func_info = add_data(gen, load_attr->func_info,
+ attr.func_info_cnt * attr.func_info_rec_size);
+
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_cnt = load_attr->line_info_cnt;
+ line_info = add_data(gen, load_attr->line_info,
+ attr.line_info_cnt * attr.line_info_rec_size);
+
+ attr.core_relo_rec_size = sizeof(struct bpf_core_relo);
+ attr.core_relo_cnt = gen->core_relo_cnt;
+ core_relos = add_data(gen, gen->core_relos,
+ attr.core_relo_cnt * attr.core_relo_rec_size);
+
+ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
+ prog_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with a pointer to license */
+ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off);
+
+ /* populate union bpf_attr with a pointer to instructions */
+ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off);
+
+ /* populate union bpf_attr with a pointer to func_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
+
+ /* populate union bpf_attr with a pointer to line_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
+
+ /* populate union bpf_attr with a pointer to core_relos */
+ emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos);
+
+ /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */
+ emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array);
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4,
+ stack_off(btf_fd));
+ if (gen->attach_kind) {
+ emit_find_attach_target(gen);
+ /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, prog_load_attr));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_id)));
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_obj_fd)));
+ }
+ emit_relos(gen, insns_off);
+ /* emit PROG_LOAD command */
+ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
+ debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
+ /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
+ cleanup_relos(gen, insns_off);
+ if (gen->attach_kind) {
+ emit_sys_close_blob(gen,
+ attr_field(prog_load_attr, attach_btf_obj_fd));
+ gen->attach_kind = 0;
+ }
+ emit_check_err(gen);
+ /* remember prog_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(prog_fd[gen->nr_progs])));
+ gen->nr_progs++;
+}
+
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
+ __u32 value_size)
+{
+ int attr_size = offsetofend(union bpf_attr, flags);
+ int map_update_attr, value, key;
+ union bpf_attr attr;
+ int zero = 0;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_update_elem: idx %d\n", map_idx);
+
+ value = add_data(gen, pvalue, value_size);
+ key = add_data(gen, &zero, sizeof(zero));
+
+ /* if (map_desc[map_idx].initial_value) {
+ * if (ctx->flags & BPF_SKEL_KERNEL)
+ * bpf_probe_read_kernel(value, value_size, initial_value);
+ * else
+ * bpf_copy_from_user(value, value_size, initial_value);
+ * }
+ */
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, initial_value)));
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, value));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+ offsetof(struct bpf_loader_ctx, flags)));
+ emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
+
+ map_update_attr = add_data(gen, &attr, attr_size);
+ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
+ blob_fd_array_off(gen, map_idx));
+ emit_rel_store(gen, attr_field(map_update_attr, key), key);
+ emit_rel_store(gen, attr_field(map_update_attr, value), value);
+ /* emit MAP_UPDATE_ELEM command */
+ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+ debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size);
+ emit_check_err(gen);
+}
+
+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot,
+ int inner_map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, flags);
+ int map_update_attr, key;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n",
+ outer_map_idx, slot, inner_map_idx);
+
+ key = add_data(gen, &slot, sizeof(slot));
+
+ map_update_attr = add_data(gen, &attr, attr_size);
+ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
+ blob_fd_array_off(gen, outer_map_idx));
+ emit_rel_store(gen, attr_field(map_update_attr, key), key);
+ emit_rel_store(gen, attr_field(map_update_attr, value),
+ blob_fd_array_off(gen, inner_map_idx));
+
+ /* emit MAP_UPDATE_ELEM command */
+ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+ debug_ret(gen, "populate_outer_map outer %d key %d inner %d",
+ outer_map_idx, slot, inner_map_idx);
+ emit_check_err(gen);
+}
+
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, map_fd);
+ int map_freeze_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_freeze: idx %d\n", map_idx);
+ map_freeze_attr = add_data(gen, &attr, attr_size);
+ move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4,
+ blob_fd_array_off(gen, map_idx));
+ /* emit MAP_FREEZE command */
+ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size);
+ debug_ret(gen, "map_freeze");
+ emit_check_err(gen);
+}
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index 3c20b126d60d..140ee4055676 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
void hashmap__free(struct hashmap *map)
{
- if (!map)
+ if (IS_ERR_OR_NULL(map))
return;
hashmap__clear(map);
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
}
static bool hashmap_find_entry(const struct hashmap *map,
- const void *key, size_t hash,
+ const long key, size_t hash,
struct hashmap_entry ***pprev,
struct hashmap_entry **entry)
{
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
return false;
}
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value)
{
struct hashmap_entry *entry;
size_t h;
int err;
if (old_key)
- *old_key = NULL;
+ *old_key = 0;
if (old_value)
- *old_value = NULL;
+ *old_value = 0;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
return 0;
}
-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
{
struct hashmap_entry *entry;
size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
return true;
}
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+ long *old_key, long *old_value)
{
struct hashmap_entry **pprev, *entry;
size_t h;
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
return true;
}
-
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..c12f8320e668 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,6 +15,9 @@
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
+ if (bits == 0)
+ return 0;
+
#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
/* LP64 case */
return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -37,12 +40,32 @@ static inline size_t str_hash(const char *s)
return h;
}
-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ * long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ * type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ * hasmap_entry->key should be used for integer keys and
+ * hasmap_entry->pkey should be used for pointer keys,
+ * same goes for values.
+ */
struct hashmap_entry {
- const void *key;
- void *value;
+ union {
+ long key;
+ const void *pkey;
+ };
+ union {
+ long value;
+ void *pvalue;
+ };
struct hashmap_entry *next;
};
@@ -57,16 +80,6 @@ struct hashmap {
size_t sz;
};
-#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
- .hash_fn = (hash_fn), \
- .equal_fn = (equal_fn), \
- .ctx = (ctx), \
- .buckets = NULL, \
- .cap = 0, \
- .cap_bits = 0, \
- .sz = 0, \
-}
-
void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn, void *ctx);
struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
@@ -99,6 +112,13 @@ enum hashmap_insert_strategy {
HASHMAP_APPEND,
};
+#define hashmap_cast_ptr(p) ({ \
+ _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \
+ sizeof(*(p)) == sizeof(long), \
+ #p " pointee should be a long-sized integer or a pointer"); \
+ (long *)(p); \
+})
+
/*
* hashmap__insert() adds key/value entry w/ various semantics, depending on
* provided strategy value. If a given key/value pair replaced already
@@ -106,42 +126,38 @@ enum hashmap_insert_strategy {
* through old_key and old_value to allow calling code do proper memory
* management.
*/
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
- enum hashmap_insert_strategy strategy,
- const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value);
-static inline int hashmap__add(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+ hashmap_insert((map), (long)(key), (long)(value), (strategy), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-static inline int hashmap__set(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_SET,
- old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
-static inline int hashmap__update(struct hashmap *map,
- const void *key, void *value,
- const void **old_key, void **old_value)
-{
- return hashmap__insert(map, key, value, HASHMAP_UPDATE,
- old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
-static inline int hashmap__append(struct hashmap *map,
- const void *key, void *value)
-{
- return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value) \
+ hashmap_delete((map), (long)(key), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
-bool hashmap__delete(struct hashmap *map, const void *key,
- const void **old_key, void **old_value);
+bool hashmap_find(const struct hashmap *map, long key, long *value);
-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+#define hashmap__find(map, key, value) \
+ hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
/*
* hashmap__for_each_entry - iterate over all entries in hashmap
@@ -174,17 +190,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
* @key: key to iterate entries for
*/
#define hashmap__for_each_key_entry(map, cur, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur; \
cur = cur->next) \
if (map->equal_fn(cur->key, (_key), map->ctx))
#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur && ({ tmp = cur->next; true; }); \
cur = tmp) \
if (map->equal_fn(cur->key, (_key), map->ctx))
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 313034117070..efab29b8935b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -31,11 +31,10 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/filter.h>
-#include <linux/list.h>
#include <linux/limits.h>
#include <linux/perf_event.h>
+#include <linux/bpf_perf_event.h>
#include <linux/ring_buffer.h>
-#include <linux/version.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -54,15 +53,15 @@
#include "str_error.h"
#include "libbpf_internal.h"
#include "hashmap.h"
-
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
+#include "bpf_gen_internal.h"
+#include "zip.h"
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
+
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -73,8 +72,157 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static const struct btf_type *
-skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
+static int map_set_def_max_entries(struct bpf_map *map);
+
+static const char * const attach_type_name[] = {
+ [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
+ [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
+ [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
+ [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
+ [BPF_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
+ [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
+ [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
+ [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
+ [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
+ [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
+ [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
+ [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
+ [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
+ [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
+ [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
+ [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
+ [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
+ [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
+ [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
+ [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "trace_raw_tp",
+ [BPF_TRACE_FENTRY] = "trace_fentry",
+ [BPF_TRACE_FEXIT] = "trace_fexit",
+ [BPF_MODIFY_RETURN] = "modify_return",
+ [BPF_LSM_MAC] = "lsm_mac",
+ [BPF_LSM_CGROUP] = "lsm_cgroup",
+ [BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
+ [BPF_PERF_EVENT] = "perf_event",
+ [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
+ [BPF_STRUCT_OPS] = "struct_ops",
+ [BPF_NETFILTER] = "netfilter",
+ [BPF_TCX_INGRESS] = "tcx_ingress",
+ [BPF_TCX_EGRESS] = "tcx_egress",
+ [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
+ [BPF_NETKIT_PRIMARY] = "netkit_primary",
+ [BPF_NETKIT_PEER] = "netkit_peer",
+};
+
+static const char * const link_type_name[] = {
+ [BPF_LINK_TYPE_UNSPEC] = "unspec",
+ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_LINK_TYPE_TRACING] = "tracing",
+ [BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
+ [BPF_LINK_TYPE_XDP] = "xdp",
+ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
+ [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_LINK_TYPE_NETFILTER] = "netfilter",
+ [BPF_LINK_TYPE_TCX] = "tcx",
+ [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
+ [BPF_LINK_TYPE_NETKIT] = "netkit",
+};
+
+static const char * const map_type_name[] = {
+ [BPF_MAP_TYPE_UNSPEC] = "unspec",
+ [BPF_MAP_TYPE_HASH] = "hash",
+ [BPF_MAP_TYPE_ARRAY] = "array",
+ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
+ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
+ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
+ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
+ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
+ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
+ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
+ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
+ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
+ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
+ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
+ [BPF_MAP_TYPE_DEVMAP] = "devmap",
+ [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
+ [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+ [BPF_MAP_TYPE_CPUMAP] = "cpumap",
+ [BPF_MAP_TYPE_XSKMAP] = "xskmap",
+ [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
+ [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+ [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
+ [BPF_MAP_TYPE_QUEUE] = "queue",
+ [BPF_MAP_TYPE_STACK] = "stack",
+ [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
+ [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
+ [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
+ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
+ [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
+ [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
+ [BPF_MAP_TYPE_ARENA] = "arena",
+};
+
+static const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
+ [BPF_PROG_TYPE_NETFILTER] = "netfilter",
+};
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
@@ -89,9 +237,10 @@ static libbpf_print_fn_t __libbpf_pr = __base_pr;
libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
{
- libbpf_print_fn_t old_print_fn = __libbpf_pr;
+ libbpf_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
- __libbpf_pr = fn;
return old_print_fn;
}
@@ -99,13 +248,20 @@ __printf(2, 3)
void libbpf_print(enum libbpf_print_level level, const char *format, ...)
{
va_list args;
+ int old_errno;
+ libbpf_print_fn_t print_fn;
- if (!__libbpf_pr)
+ print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
return;
+ old_errno = errno;
+
va_start(args, format);
__libbpf_pr(level, format, args);
va_end(args);
+
+ errno = old_errno;
}
static void pr_perm_msg(int err)
@@ -155,62 +311,89 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
-enum kern_feature_id {
- /* v4.14: kernel support for program & map names. */
- FEAT_PROG_NAME,
- /* v5.2: kernel support for global data sections. */
- FEAT_GLOBAL_DATA,
- /* BTF support */
- FEAT_BTF,
- /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
- FEAT_BTF_FUNC,
- /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
- FEAT_BTF_DATASEC,
- /* BTF_FUNC_GLOBAL is supported */
- FEAT_BTF_GLOBAL_FUNC,
- /* BPF_F_MMAPABLE is supported for arrays */
- FEAT_ARRAY_MMAP,
- /* kernel support for expected_attach_type in BPF_PROG_LOAD */
- FEAT_EXP_ATTACH_TYPE,
- /* bpf_probe_read_{kernel,user}[_str] helpers */
- FEAT_PROBE_READ_KERN,
- /* BPF_PROG_BIND_MAP is supported */
- FEAT_PROG_BIND_MAP,
- __FEAT_CNT,
-};
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+ /* as of v1.0 libbpf_set_strict_mode() is a no-op */
+ return 0;
+}
+
+__u32 libbpf_major_version(void)
+{
+ return LIBBPF_MAJOR_VERSION;
+}
+
+__u32 libbpf_minor_version(void)
+{
+ return LIBBPF_MINOR_VERSION;
+}
-static bool kernel_supports(enum kern_feature_id feat_id);
+const char *libbpf_version_string(void)
+{
+#define __S(X) #X
+#define _S(X) __S(X)
+ return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
+#undef _S
+#undef __S
+}
enum reloc_type {
RELO_LD64,
RELO_CALL,
RELO_DATA,
- RELO_EXTERN,
+ RELO_EXTERN_LD64,
+ RELO_EXTERN_CALL,
+ RELO_SUBPROG_ADDR,
+ RELO_CORE,
};
struct reloc_desc {
enum reloc_type type;
int insn_idx;
- int map_idx;
- int sym_off;
- bool processed;
+ union {
+ const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
+ struct {
+ int map_idx;
+ int sym_off;
+ int ext_idx;
+ };
+ };
};
-struct bpf_sec_def;
-
-typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
+/* stored as sec_def->cookie for all libbpf-supported SEC()s */
+enum sec_def_flags {
+ SEC_NONE = 0,
+ /* expected_attach_type is optional, if kernel doesn't support that */
+ SEC_EXP_ATTACH_OPT = 1,
+ /* legacy, only used by libbpf_get_type_names() and
+ * libbpf_attach_type_by_name(), not used by libbpf itself at all.
+ * This used to be associated with cgroup (and few other) BPF programs
+ * that were attachable through BPF_PROG_ATTACH command. Pretty
+ * meaningless nowadays, though.
+ */
+ SEC_ATTACHABLE = 2,
+ SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
+ /* attachment target is specified through BTF ID in either kernel or
+ * other BPF program's BTF object
+ */
+ SEC_ATTACH_BTF = 4,
+ /* BPF program type allows sleeping/blocking in kernel */
+ SEC_SLEEPABLE = 8,
+ /* BPF program support non-linear XDP buffer */
+ SEC_XDP_FRAGS = 16,
+ /* Setup proper attach type for usdt probes. */
+ SEC_USDT = 32,
+};
struct bpf_sec_def {
- const char *sec;
- size_t len;
+ char *sec;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
- bool is_exp_attach_type_optional;
- bool is_attachable;
- bool is_attach_btf;
- bool is_sleepable;
- attach_fn_t attach_fn;
+ long cookie;
+ int handler_id;
+
+ libbpf_prog_setup_fn_t prog_setup_fn;
+ libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
+ libbpf_prog_attach_fn_t prog_attach_fn;
};
/*
@@ -218,9 +401,10 @@ struct bpf_sec_def {
* linux/filter.h.
*/
struct bpf_program {
- const struct bpf_sec_def *sec_def;
+ char *name;
char *sec_name;
size_t sec_idx;
+ const struct bpf_sec_def *sec_def;
/* this program's instruction offset (in number of instructions)
* within its containing ELF section
*/
@@ -240,12 +424,6 @@ struct bpf_program {
*/
size_t sub_insn_off;
- char *name;
- /* sec_name with / replaced by _; makes recursive pinning
- * in bpf_object__pin_programs easier
- */
- char *pin_name;
-
/* instructions that belong to BPF program; insns[0] is located at
* sec_insn_off instruction within its ELF section in ELF file, so
* when mapping ELF file instruction index to the local instruction,
@@ -260,24 +438,28 @@ struct bpf_program {
struct reloc_desc *reloc_desc;
int nr_reloc;
- int log_level;
- struct {
- int nr;
- int *fds;
- } instances;
- bpf_program_prep_t preprocessor;
+ /* BPF verifier log settings */
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
struct bpf_object *obj;
- void *priv;
- bpf_program_clear_priv_t clear_priv;
- bool load;
+ int fd;
+ bool autoload;
+ bool autoattach;
+ bool sym_global;
+ bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+ int exception_cb_idx;
+
int prog_ifindex;
+ __u32 attach_btf_obj_fd;
__u32 attach_btf_id;
__u32 attach_prog_fd;
+
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
@@ -315,6 +497,8 @@ struct bpf_struct_ops {
#define KCONFIG_SEC ".kconfig"
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
+#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
+#define ARENA_SEC ".arena.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -324,15 +508,23 @@ enum libbpf_map_type {
LIBBPF_MAP_KCONFIG,
};
-static const char * const libbpf_type_to_btf_name[] = {
- [LIBBPF_MAP_DATA] = DATA_SEC,
- [LIBBPF_MAP_BSS] = BSS_SEC,
- [LIBBPF_MAP_RODATA] = RODATA_SEC,
- [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC,
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
};
struct bpf_map {
+ struct bpf_object *obj;
char *name;
+ /* real_name is defined for special internal maps (.rodata*,
+ * .data*, .bss, .kconfig) and preserves their original ELF section
+ * name. This is important to be able to find corresponding BTF
+ * DATASEC information.
+ */
+ char *real_name;
int fd;
int sec_idx;
size_t sec_offset;
@@ -341,11 +533,10 @@ struct bpf_map {
struct bpf_map_def def;
__u32 numa_node;
__u32 btf_var_idx;
+ int mod_btf_fd;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
- void *priv;
- bpf_map_clear_priv_t clear_priv;
enum libbpf_map_type libbpf_type;
void *mmaped;
struct bpf_struct_ops *st_ops;
@@ -355,6 +546,8 @@ struct bpf_map {
char *pin_path;
bool pinned;
bool reused;
+ bool autocreate;
+ __u64 map_extra;
};
enum extern_type {
@@ -378,6 +571,7 @@ struct extern_desc {
int btf_id;
int sec_btf_id;
const char *name;
+ char *essent_name;
bool is_set;
bool is_weak;
union {
@@ -392,15 +586,64 @@ struct extern_desc {
unsigned long long addr;
/* target btf_id of the corresponding kernel var. */
- int vmlinux_btf_id;
+ int kernel_btf_obj_fd;
+ int kernel_btf_id;
/* local btf_id of the ksym extern's type. */
__u32 type_id;
+ /* BTF fd index to be patched in for insn->off, this is
+ * 0 for vmlinux BTF, index in obj->fd_array for module
+ * BTF
+ */
+ __s16 btf_fd_idx;
} ksym;
};
};
-static LIST_HEAD(bpf_objects_list);
+struct module_btf {
+ struct btf *btf;
+ char *name;
+ __u32 id;
+ int fd;
+ int fd_array_idx;
+};
+
+enum sec_type {
+ SEC_UNUSED = 0,
+ SEC_RELO,
+ SEC_BSS,
+ SEC_DATA,
+ SEC_RODATA,
+ SEC_ST_OPS,
+};
+
+struct elf_sec_desc {
+ enum sec_type sec_type;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+};
+
+struct elf_state {
+ int fd;
+ const void *obj_buf;
+ size_t obj_buf_sz;
+ Elf *elf;
+ Elf64_Ehdr *ehdr;
+ Elf_Data *symbols;
+ Elf_Data *arena_data;
+ size_t shstrndx; /* section index for section name strings */
+ size_t strtabidx;
+ struct elf_sec_desc *secs;
+ size_t sec_cnt;
+ int btf_maps_shndx;
+ __u32 btf_maps_sec_btf_id;
+ int text_shndx;
+ int symbols_shndx;
+ bool has_st_ops;
+ int arena_data_shndx;
+};
+
+struct usdt_manager;
struct bpf_object {
char name[BPF_OBJ_NAME_LEN];
@@ -417,95 +660,73 @@ struct bpf_object {
struct extern_desc *externs;
int nr_extern;
int kconfig_map_idx;
- int rodata_map_idx;
bool loaded;
bool has_subcalls;
+ bool has_rodata;
- /*
- * Information when doing elf related work. Only valid if fd
- * is valid.
- */
- struct {
- int fd;
- const void *obj_buf;
- size_t obj_buf_sz;
- Elf *elf;
- GElf_Ehdr ehdr;
- Elf_Data *symbols;
- Elf_Data *data;
- Elf_Data *rodata;
- Elf_Data *bss;
- Elf_Data *st_ops_data;
- size_t shstrndx; /* section index for section name strings */
- size_t strtabidx;
- struct {
- GElf_Shdr shdr;
- Elf_Data *data;
- } *reloc_sects;
- int nr_reloc_sects;
- int maps_shndx;
- int btf_maps_shndx;
- __u32 btf_maps_sec_btf_id;
- int text_shndx;
- int symbols_shndx;
- int data_shndx;
- int rodata_shndx;
- int bss_shndx;
- int st_ops_shndx;
- } efile;
- /*
- * All loaded bpf_object is linked in a list, which is
- * hidden to caller. bpf_objects__<func> handlers deal with
- * all objects.
- */
- struct list_head list;
+ struct bpf_gen *gen_loader;
+
+ /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
+ struct elf_state efile;
struct btf *btf;
+ struct btf_ext *btf_ext;
+
/* Parse and load BTF vmlinux if any of the programs in the object need
* it at load time.
*/
struct btf *btf_vmlinux;
- struct btf_ext *btf_ext;
-
- void *priv;
- bpf_object_clear_priv_t clear_priv;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+ * override for vmlinux BTF.
+ */
+ char *btf_custom_path;
+ /* vmlinux BTF override for CO-RE relocations */
+ struct btf *btf_vmlinux_override;
+ /* Lazily initialized kernel module BTFs */
+ struct module_btf *btf_modules;
+ bool btf_modules_loaded;
+ size_t btf_module_cnt;
+ size_t btf_module_cap;
+
+ /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
+
+ int *fd_array;
+ size_t fd_array_cap;
+ size_t fd_array_cnt;
+
+ struct usdt_manager *usdt_man;
+
+ struct bpf_map *arena_map;
+ void *arena_data;
+ size_t arena_data_sz;
+
+ struct kern_feature_cache *feat_cache;
+ char *token_path;
+ int token_fd;
char path[];
};
-#define obj_elf_valid(o) ((o)->efile.elf)
static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
-static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym);
+static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
void bpf_program__unload(struct bpf_program *prog)
{
- int i;
-
if (!prog)
return;
- /*
- * If the object is opened but the program was never loaded,
- * it is possible that prog->instances.nr == -1.
- */
- if (prog->instances.nr > 0) {
- for (i = 0; i < prog->instances.nr; i++)
- zclose(prog->instances.fds[i]);
- } else if (prog->instances.nr != -1) {
- pr_warn("Internal error: instances.nr is %d\n",
- prog->instances.nr);
- }
-
- prog->instances.nr = -1;
- zfree(&prog->instances.fds);
+ zclose(prog->fd);
zfree(&prog->func_info);
zfree(&prog->line_info);
@@ -516,16 +737,9 @@ static void bpf_program__exit(struct bpf_program *prog)
if (!prog)
return;
- if (prog->clear_priv)
- prog->clear_priv(prog, prog->priv);
-
- prog->priv = NULL;
- prog->clear_priv = NULL;
-
bpf_program__unload(prog);
zfree(&prog->name);
zfree(&prog->sec_name);
- zfree(&prog->pin_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
@@ -534,17 +748,6 @@ static void bpf_program__exit(struct bpf_program *prog)
prog->sec_idx = -1;
}
-static char *__bpf_program__pin_name(struct bpf_program *prog)
-{
- char *name, *p;
-
- name = p = strdup(prog->sec_name);
- while ((p = strchr(p, '/')))
- *p = '_';
-
- return name;
-}
-
static bool insn_is_subprog_call(const struct bpf_insn *insn)
{
return BPF_CLASS(insn->code) == BPF_JMP &&
@@ -555,13 +758,21 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL);
+}
+
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
size_t sec_off, void *insn_data, size_t insn_data_sz)
{
- int i;
-
if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
sec_name, name, sec_off, insn_data_sz);
@@ -578,10 +789,25 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->insns_cnt = prog->sec_insn_cnt;
prog->type = BPF_PROG_TYPE_UNSPEC;
- prog->load = true;
+ prog->fd = -1;
+ prog->exception_cb_idx = -1;
+
+ /* libbpf's convention for SEC("?abc...") is that it's just like
+ * SEC("abc...") but the corresponding bpf_program starts out with
+ * autoload set to false.
+ */
+ if (sec_name[0] == '?') {
+ prog->autoload = false;
+ /* from now on forget there was ? in section name */
+ sec_name++;
+ } else {
+ prog->autoload = true;
+ }
+
+ prog->autoattach = true;
- prog->instances.fds = NULL;
- prog->instances.nr = -1;
+ /* inherit object's log_level */
+ prog->log_level = obj->log_level;
prog->sec_name = strdup(sec_name);
if (!prog->sec_name)
@@ -591,22 +817,11 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
if (!prog->name)
goto errout;
- prog->pin_name = __bpf_program__pin_name(prog);
- if (!prog->pin_name)
- goto errout;
-
prog->insns = malloc(insn_data_sz);
if (!prog->insns)
goto errout;
memcpy(prog->insns, insn_data, insn_data_sz);
- for (i = 0; i < prog->insns_cnt; i++) {
- if (insn_is_subprog_call(&prog->insns[i])) {
- obj->has_subcalls = true;
- break;
- }
- }
-
return 0;
errout:
pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
@@ -618,27 +833,30 @@ static int
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
const char *sec_name, int sec_idx)
{
+ Elf_Data *symbols = obj->efile.symbols;
struct bpf_program *prog, *progs;
void *data = sec_data->d_buf;
- size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
- int nr_progs, err;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
+ int nr_progs, err, i;
const char *name;
- GElf_Sym sym;
+ Elf64_Sym *sym;
progs = obj->programs;
nr_progs = obj->nr_programs;
- sec_off = 0;
+ nr_syms = symbols->d_size / sizeof(Elf64_Sym);
- while (sec_off < sec_sz) {
- if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
- pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
- sec_name, sec_off);
- return -LIBBPF_ERRNO__FORMAT;
- }
+ for (i = 0; i < nr_syms; i++) {
+ sym = elf_sym_by_idx(obj, i);
+
+ if (sym->st_shndx != sec_idx)
+ continue;
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ continue;
- prog_sz = sym.st_size;
+ prog_sz = sym->st_size;
+ sec_off = sym->st_value;
- name = elf_sym_str(obj, sym.st_name);
+ name = elf_sym_str(obj, sym->st_name);
if (!name) {
pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
sec_name, sec_off);
@@ -651,6 +869,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
+ if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
+ pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
+ return -ENOTSUP;
+ }
+
pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
@@ -674,26 +897,25 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
+ if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
+ prog->sym_global = true;
+
+ /* if function is a global/weak symbol, but has restricted
+ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
+ * as static to enable more permissive BPF verification mode
+ * with more outside context available to BPF verifier
+ */
+ if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+ || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
+ prog->mark_btf_static = true;
+
nr_progs++;
obj->nr_programs = nr_progs;
-
- sec_off += prog_sz;
}
return 0;
}
-static __u32 get_kernel_version(void)
-{
- __u32 major, minor, patch;
- struct utsname info;
-
- uname(&info);
- if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
- return 0;
- return KERNEL_VERSION(major, minor, patch);
-}
-
static const struct btf_member *
find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
{
@@ -723,22 +945,33 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t,
return NULL;
}
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ struct module_btf **res_mod_btf);
+
#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind);
static int
-find_struct_ops_kern_types(const struct btf *btf, const char *tname,
+find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
+ struct module_btf **mod_btf,
const struct btf_type **type, __u32 *type_id,
const struct btf_type **vtype, __u32 *vtype_id,
const struct btf_member **data_member)
{
const struct btf_type *kern_type, *kern_vtype;
const struct btf_member *kern_data_member;
+ struct btf *btf;
__s32 kern_vtype_id, kern_type_id;
+ char tname[256];
__u32 i;
- kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ snprintf(tname, sizeof(tname), "%.*s",
+ (int)bpf_core_essential_name_len(tname_raw), tname_raw);
+
+ kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
+ &btf, mod_btf);
if (kern_type_id < 0) {
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
tname);
@@ -791,15 +1024,72 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map)
return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
}
+static bool is_valid_st_ops_program(struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (&obj->programs[i] == prog)
+ return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
+ }
+
+ return false;
+}
+
+/* For each struct_ops program P, referenced from some struct_ops map M,
+ * enable P.autoload if there are Ms for which M.autocreate is true,
+ * disable P.autoload if for all Ms M.autocreate is false.
+ * Don't change P.autoload for programs that are not referenced from any maps.
+ */
+static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
+{
+ struct bpf_program *prog, *slot_prog;
+ struct bpf_map *map;
+ int i, j, k, vlen;
+
+ for (i = 0; i < obj->nr_programs; ++i) {
+ int should_load = false;
+ int use_cnt = 0;
+
+ prog = &obj->programs[i];
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ continue;
+
+ for (j = 0; j < obj->nr_maps; ++j) {
+ map = &obj->maps[j];
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ vlen = btf_vlen(map->st_ops->type);
+ for (k = 0; k < vlen; ++k) {
+ slot_prog = map->st_ops->progs[k];
+ if (prog != slot_prog)
+ continue;
+
+ use_cnt++;
+ if (map->autocreate)
+ should_load = true;
+ }
+ }
+ if (use_cnt)
+ prog->autoload = should_load;
+ }
+
+ return 0;
+}
+
/* Init the map's fields that depend on kern_btf */
-static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
- const struct btf *btf,
- const struct btf *kern_btf)
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
{
const struct btf_member *member, *kern_member, *kern_data_member;
const struct btf_type *type, *kern_type, *kern_vtype;
__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
+ struct bpf_object *obj = map->obj;
+ const struct btf *btf = obj->btf;
struct bpf_struct_ops *st_ops;
+ const struct btf *kern_btf;
+ struct module_btf *mod_btf;
void *data, *kern_data;
const char *tname;
int err;
@@ -807,16 +1097,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
st_ops = map->st_ops;
type = st_ops->type;
tname = st_ops->tname;
- err = find_struct_ops_kern_types(kern_btf, tname,
+ err = find_struct_ops_kern_types(obj, tname, &mod_btf,
&kern_type, &kern_type_id,
&kern_vtype, &kern_vtype_id,
&kern_data_member);
if (err)
return err;
+ kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
+
pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
+ map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
map->def.value_size = kern_vtype->size;
map->btf_vmlinux_value_type_id = kern_vtype_id;
@@ -874,26 +1167,59 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
if (btf_is_ptr(mtype)) {
struct bpf_program *prog;
- mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id);
+ /* Update the value from the shadow type */
+ prog = *(void **)mdata;
+ st_ops->progs[i] = prog;
+ if (!prog)
+ continue;
+ if (!is_valid_st_ops_program(obj, prog)) {
+ pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
+ map->name, mname);
+ return -ENOTSUP;
+ }
+
kern_mtype = skip_mods_and_typedefs(kern_btf,
kern_mtype->type,
&kern_mtype_id);
- if (!btf_is_func_proto(mtype) ||
- !btf_is_func_proto(kern_mtype)) {
- pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n",
+
+ /* mtype->type must be a func_proto which was
+ * guaranteed in bpf_object__collect_st_ops_relos(),
+ * so only check kern_mtype for func_proto here.
+ */
+ if (!btf_is_func_proto(kern_mtype)) {
+ pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
map->name, mname);
return -ENOTSUP;
}
- prog = st_ops->progs[i];
- if (!prog) {
- pr_debug("struct_ops init_kern %s: func ptr %s is not set\n",
- map->name, mname);
- continue;
+ if (mod_btf)
+ prog->attach_btf_obj_fd = mod_btf->fd;
+
+ /* if we haven't yet processed this BPF program, record proper
+ * attach_btf_id and member_idx
+ */
+ if (!prog->attach_btf_id) {
+ prog->attach_btf_id = kern_type_id;
+ prog->expected_attach_type = kern_member_idx;
}
- prog->attach_btf_id = kern_type_id;
- prog->expected_attach_type = kern_member_idx;
+ /* struct_ops BPF prog can be re-used between multiple
+ * .struct_ops & .struct_ops.link as long as it's the
+ * same struct_ops struct definition and the same
+ * function pointer field
+ */
+ if (prog->attach_btf_id != kern_type_id) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->attach_btf_id, kern_type_id);
+ return -EINVAL;
+ }
+ if (prog->expected_attach_type != kern_member_idx) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->expected_attach_type, kern_member_idx);
+ return -EINVAL;
+ }
st_ops->kern_func_off[i] = kern_data_off + kern_moff;
@@ -934,8 +1260,10 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
if (!bpf_map__is_struct_ops(map))
continue;
- err = bpf_map__init_kern_struct_ops(map, obj->btf,
- obj->btf_vmlinux);
+ if (!map->autocreate)
+ continue;
+
+ err = bpf_map__init_kern_struct_ops(map);
if (err)
return err;
}
@@ -943,7 +1271,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
return 0;
}
-static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
+static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
+ int shndx, Elf_Data *data)
{
const struct btf_type *type, *datasec;
const struct btf_var_secinfo *vsi;
@@ -954,15 +1283,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
struct bpf_map *map;
__u32 i;
- if (obj->efile.st_ops_shndx == -1)
+ if (shndx == -1)
return 0;
btf = obj->btf;
- datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
+ datasec_id = btf__find_by_name_kind(btf, sec_name,
BTF_KIND_DATASEC);
if (datasec_id < 0) {
pr_warn("struct_ops init: DATASEC %s not found\n",
- STRUCT_OPS_SEC);
+ sec_name);
return -EINVAL;
}
@@ -975,7 +1304,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
type_id = btf__resolve_type(obj->btf, vsi->type);
if (type_id < 0) {
pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
- vsi->type, STRUCT_OPS_SEC);
+ vsi->type, sec_name);
return -EINVAL;
}
@@ -994,16 +1323,27 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
if (IS_ERR(map))
return PTR_ERR(map);
- map->sec_idx = obj->efile.st_ops_shndx;
+ map->sec_idx = shndx;
map->sec_offset = vsi->offset;
map->name = strdup(var_name);
if (!map->name)
return -ENOMEM;
+ map->btf_value_type_id = type_id;
+
+ /* Follow same convention as for programs autoload:
+ * SEC("?.struct_ops") means map is not created by default.
+ */
+ if (sec_name[0] == '?') {
+ map->autocreate = false;
+ /* from now on forget there was ? in section name */
+ sec_name++;
+ }
map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
map->def.key_size = sizeof(int);
map->def.value_size = type->size;
map->def.max_entries = 1;
+ map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
map->st_ops = calloc(1, sizeof(*map->st_ops));
if (!map->st_ops)
@@ -1016,14 +1356,14 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
return -ENOMEM;
- if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
+ if (vsi->offset + type->size > data->d_size) {
pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
- var_name, STRUCT_OPS_SEC);
+ var_name, sec_name);
return -EINVAL;
}
memcpy(st_ops->data,
- obj->efile.st_ops_data->d_buf + vsi->offset,
+ data->d_buf + vsi->offset,
type->size);
st_ops->tname = tname;
st_ops->type = type;
@@ -1036,6 +1376,29 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
return 0;
}
+static int bpf_object_init_struct_ops(struct bpf_object *obj)
+{
+ const char *sec_name;
+ int sec_idx, err;
+
+ for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
+ struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
+
+ if (desc->sec_type != SEC_ST_OPS)
+ continue;
+
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!sec_name)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static struct bpf_object *bpf_object__new(const char *path,
const void *obj_buf,
size_t obj_buf_sz,
@@ -1052,12 +1415,10 @@ static struct bpf_object *bpf_object__new(const char *path,
strcpy(obj->path, path);
if (obj_name) {
- strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
- obj->name[sizeof(obj->name) - 1] = 0;
+ libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
} else {
/* Using basename() GNU version which doesn't modify arg. */
- strncpy(obj->name, basename((void *)path),
- sizeof(obj->name) - 1);
+ libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
end = strchr(obj->name, '.');
if (end)
*end = 0;
@@ -1072,69 +1433,48 @@ static struct bpf_object *bpf_object__new(const char *path,
*/
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
- obj->efile.maps_shndx = -1;
obj->efile.btf_maps_shndx = -1;
- obj->efile.data_shndx = -1;
- obj->efile.rodata_shndx = -1;
- obj->efile.bss_shndx = -1;
- obj->efile.st_ops_shndx = -1;
obj->kconfig_map_idx = -1;
- obj->rodata_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
- INIT_LIST_HEAD(&obj->list);
- list_add(&obj->list, &bpf_objects_list);
return obj;
}
static void bpf_object__elf_finish(struct bpf_object *obj)
{
- if (!obj_elf_valid(obj))
+ if (!obj->efile.elf)
return;
- if (obj->efile.elf) {
- elf_end(obj->efile.elf);
- obj->efile.elf = NULL;
- }
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
obj->efile.symbols = NULL;
- obj->efile.data = NULL;
- obj->efile.rodata = NULL;
- obj->efile.bss = NULL;
- obj->efile.st_ops_data = NULL;
+ obj->efile.arena_data = NULL;
- zfree(&obj->efile.reloc_sects);
- obj->efile.nr_reloc_sects = 0;
+ zfree(&obj->efile.secs);
+ obj->efile.sec_cnt = 0;
zclose(obj->efile.fd);
obj->efile.obj_buf = NULL;
obj->efile.obj_buf_sz = 0;
}
-/* if libelf is old and doesn't support mmap(), fall back to read() */
-#ifndef ELF_C_READ_MMAP
-#define ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static int bpf_object__elf_init(struct bpf_object *obj)
{
+ Elf64_Ehdr *ehdr;
int err = 0;
- GElf_Ehdr *ep;
+ Elf *elf;
- if (obj_elf_valid(obj)) {
+ if (obj->efile.elf) {
pr_warn("elf: init internal error\n");
return -LIBBPF_ERRNO__LIBELF;
}
if (obj->efile.obj_buf_sz > 0) {
- /*
- * obj_buf should have been validated by
- * bpf_object__open_buffer().
- */
- obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
- obj->efile.obj_buf_sz);
+ /* obj_buf should have been validated by bpf_object__open_mem(). */
+ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
} else {
- obj->efile.fd = open(obj->path, O_RDONLY);
+ obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
if (obj->efile.fd < 0) {
char errmsg[STRERR_BUFSIZE], *cp;
@@ -1144,39 +1484,53 @@ static int bpf_object__elf_init(struct bpf_object *obj)
return err;
}
- obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
+ elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
}
- if (!obj->efile.elf) {
+ if (!elf) {
pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__LIBELF;
goto errout;
}
- if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+ obj->efile.elf = elf;
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ err = -LIBBPF_ERRNO__FORMAT;
+ pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
+ goto errout;
+ }
+
+ if (gelf_getclass(elf) != ELFCLASS64) {
+ err = -LIBBPF_ERRNO__FORMAT;
+ pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
+ goto errout;
+ }
+
+ obj->efile.ehdr = ehdr = elf64_getehdr(elf);
+ if (!obj->efile.ehdr) {
pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
- ep = &obj->efile.ehdr;
- if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+ if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
pr_warn("elf: failed to get section names section index for %s: %s\n",
obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
- if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+ /* ELF is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
pr_warn("elf: failed to get section names strings from %s: %s\n",
obj->path, elf_errmsg(-1));
- return -LIBBPF_ERRNO__FORMAT;
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
}
/* Old LLVM set e_machine to EM_NONE */
- if (ep->e_type != ET_REL ||
- (ep->e_machine && ep->e_machine != EM_BPF)) {
+ if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
@@ -1190,11 +1544,11 @@ errout:
static int bpf_object__check_endianness(struct bpf_object *obj)
{
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
return 0;
-#elif __BYTE_ORDER == __BIG_ENDIAN
- if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
return 0;
#else
# error "Unrecognized __BYTE_ORDER__"
@@ -1206,7 +1560,14 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
static int
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
{
- memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
+ if (!data) {
+ pr_warn("invalid license section in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
+ * go over allowed ELF data section buffer
+ */
+ libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
pr_debug("license of %s is %s\n", obj->path, obj->license);
return 0;
}
@@ -1216,7 +1577,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
{
__u32 kver;
- if (size != sizeof(kver)) {
+ if (!data || size != sizeof(kver)) {
pr_warn("invalid kver section in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
@@ -1234,128 +1595,197 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
return false;
}
-int bpf_object__section_size(const struct bpf_object *obj, const char *name,
- __u32 *size)
+static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
{
- int ret = -ENOENT;
+ Elf_Data *data;
+ Elf_Scn *scn;
- *size = 0;
- if (!name) {
+ if (!name)
return -EINVAL;
- } else if (!strcmp(name, DATA_SEC)) {
- if (obj->efile.data)
- *size = obj->efile.data->d_size;
- } else if (!strcmp(name, BSS_SEC)) {
- if (obj->efile.bss)
- *size = obj->efile.bss->d_size;
- } else if (!strcmp(name, RODATA_SEC)) {
- if (obj->efile.rodata)
- *size = obj->efile.rodata->d_size;
- } else if (!strcmp(name, STRUCT_OPS_SEC)) {
- if (obj->efile.st_ops_data)
- *size = obj->efile.st_ops_data->d_size;
- } else {
- Elf_Scn *scn = elf_sec_by_name(obj, name);
- Elf_Data *data = elf_sec_data(obj, scn);
- if (data) {
- ret = 0; /* found it */
- *size = data->d_size;
- }
+ scn = elf_sec_by_name(obj, name);
+ data = elf_sec_data(obj, scn);
+ if (data) {
+ *size = data->d_size;
+ return 0; /* found it */
}
- return *size ? 0 : ret;
+ return -ENOENT;
}
-int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
- __u32 *off)
+static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
{
Elf_Data *symbols = obj->efile.symbols;
const char *sname;
size_t si;
- if (!name || !off)
- return -EINVAL;
-
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
- GElf_Sym sym;
+ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
+ Elf64_Sym *sym = elf_sym_by_idx(obj, si);
- if (!gelf_getsym(symbols, si, &sym))
+ if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
continue;
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
- GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
continue;
- sname = elf_sym_str(obj, sym.st_name);
+ sname = elf_sym_str(obj, sym->st_name);
if (!sname) {
- pr_warn("failed to get sym name string for var %s\n",
- name);
- return -EIO;
- }
- if (strcmp(name, sname) == 0) {
- *off = sym.st_value;
- return 0;
+ pr_warn("failed to get sym name string for var %s\n", name);
+ return ERR_PTR(-EIO);
}
+ if (strcmp(name, sname) == 0)
+ return sym;
}
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
}
-static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static int sys_memfd_create(const char *name, unsigned flags)
{
- struct bpf_map *new_maps;
- size_t new_cap;
- int i;
+ return syscall(__NR_memfd_create, name, flags);
+}
- if (obj->nr_maps < obj->maps_cap)
- return &obj->maps[obj->nr_maps++];
+static int create_placeholder_fd(void)
+{
+ int fd;
- new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
- new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
- if (!new_maps) {
- pr_warn("alloc maps for object failed\n");
- return ERR_PTR(-ENOMEM);
- }
+ fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
+ if (fd < 0)
+ return -errno;
+ return fd;
+}
- obj->maps_cap = new_cap;
- obj->maps = new_maps;
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int err;
- /* zero out new maps */
- memset(obj->maps + obj->nr_maps, 0,
- (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
- /*
- * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
- * when failure (zclose won't close negative fd)).
+ err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
+ sizeof(*obj->maps), obj->nr_maps + 1);
+ if (err)
+ return ERR_PTR(err);
+
+ map = &obj->maps[obj->nr_maps++];
+ map->obj = obj;
+ /* Preallocate map FD without actually creating BPF map just yet.
+ * These map FD "placeholders" will be reused later without changing
+ * FD value when map is actually created in the kernel.
+ *
+ * This is useful to be able to perform BPF program relocations
+ * without having to create BPF maps before that step. This allows us
+ * to finalize and load BTF very late in BPF object's loading phase,
+ * right before BPF maps have to be created and BPF programs have to
+ * be loaded. By having these map FD placeholders we can perform all
+ * the sanitizations, relocations, and any other adjustments before we
+ * start creating actual BPF kernel objects (BTF, maps, progs).
*/
- for (i = obj->nr_maps; i < obj->maps_cap; i++) {
- obj->maps[i].fd = -1;
- obj->maps[i].inner_map_fd = -1;
- }
+ map->fd = create_placeholder_fd();
+ if (map->fd < 0)
+ return ERR_PTR(map->fd);
+ map->inner_map_fd = -1;
+ map->autocreate = true;
- return &obj->maps[obj->nr_maps++];
+ return map;
}
-static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
{
- long page_sz = sysconf(_SC_PAGE_SIZE);
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t map_sz;
- map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
+ map_sz = (size_t)roundup(value_sz, 8) * max_entries;
map_sz = roundup(map_sz, page_sz);
return map_sz;
}
-static char *internal_map_name(struct bpf_object *obj,
- enum libbpf_map_type type)
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+
+ switch (map->def.type) {
+ case BPF_MAP_TYPE_ARRAY:
+ return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ case BPF_MAP_TYPE_ARENA:
+ return page_sz * map->def.max_entries;
+ default:
+ return 0; /* not supported */
+ }
+}
+
+static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
+{
+ void *mmaped;
+
+ if (!map->mmaped)
+ return -EINVAL;
+
+ if (old_sz == new_sz)
+ return 0;
+
+ mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (mmaped == MAP_FAILED)
+ return -errno;
+
+ memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
+ munmap(map->mmaped, old_sz);
+ map->mmaped = mmaped;
+ return 0;
+}
+
+static char *internal_map_name(struct bpf_object *obj, const char *real_name)
{
char map_name[BPF_OBJ_NAME_LEN], *p;
- const char *sfx = libbpf_type_to_btf_name[type];
- int sfx_len = max((size_t)7, strlen(sfx));
- int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
- strlen(obj->name));
+ int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
+
+ /* This is one of the more confusing parts of libbpf for various
+ * reasons, some of which are historical. The original idea for naming
+ * internal names was to include as much of BPF object name prefix as
+ * possible, so that it can be distinguished from similar internal
+ * maps of a different BPF object.
+ * As an example, let's say we have bpf_object named 'my_object_name'
+ * and internal map corresponding to '.rodata' ELF section. The final
+ * map name advertised to user and to the kernel will be
+ * 'my_objec.rodata', taking first 8 characters of object name and
+ * entire 7 characters of '.rodata'.
+ * Somewhat confusingly, if internal map ELF section name is shorter
+ * than 7 characters, e.g., '.bss', we still reserve 7 characters
+ * for the suffix, even though we only have 4 actual characters, and
+ * resulting map will be called 'my_objec.bss', not even using all 15
+ * characters allowed by the kernel. Oh well, at least the truncated
+ * object name is somewhat consistent in this case. But if the map
+ * name is '.kconfig', we'll still have entirety of '.kconfig' added
+ * (8 chars) and thus will be left with only first 7 characters of the
+ * object name ('my_obje'). Happy guessing, user, that the final map
+ * name will be "my_obje.kconfig".
+ * Now, with libbpf starting to support arbitrarily named .rodata.*
+ * and .data.* data sections, it's possible that ELF section name is
+ * longer than allowed 15 chars, so we now need to be careful to take
+ * only up to 15 first characters of ELF name, taking no BPF object
+ * name characters at all. So '.rodata.abracadabra' will result in
+ * '.rodata.abracad' kernel and user-visible name.
+ * We need to keep this convoluted logic intact for .data, .bss and
+ * .rodata maps, but for new custom .data.custom and .rodata.custom
+ * maps we use their ELF names as is, not prepending bpf_object name
+ * in front. We still need to truncate them to 15 characters for the
+ * kernel. Full name can be recovered for such maps by using DATASEC
+ * BTF type associated with such map's value type, though.
+ */
+ if (sfx_len >= BPF_OBJ_NAME_LEN)
+ sfx_len = BPF_OBJ_NAME_LEN - 1;
+
+ /* if there are two or more dots in map name, it's a custom dot map */
+ if (strchr(real_name + 1, '.') != NULL)
+ pfx_len = 0;
+ else
+ pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
- sfx_len, libbpf_type_to_btf_name[type]);
+ sfx_len, real_name);
/* sanitise map name to characters allowed by kernel */
for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
@@ -1366,11 +1796,46 @@ static char *internal_map_name(struct bpf_object *obj,
}
static int
+map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
+
+/* Internal BPF map is mmap()'able only if at least one of corresponding
+ * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
+ * variable and it's not marked as __hidden (which turns it into, effectively,
+ * a STATIC variable).
+ */
+static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
+{
+ const struct btf_type *t, *vt;
+ struct btf_var_secinfo *vsi;
+ int i, n;
+
+ if (!map->btf_value_type_id)
+ return false;
+
+ t = btf__type_by_id(obj->btf, map->btf_value_type_id);
+ if (!btf_is_datasec(t))
+ return false;
+
+ vsi = btf_var_secinfos(t);
+ for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
+ vt = btf__type_by_id(obj->btf, vsi->type);
+ if (!btf_is_var(vt))
+ continue;
+
+ if (btf_var(vt)->linkage != BTF_VAR_STATIC)
+ return true;
+ }
+
+ return false;
+}
+
+static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
- int sec_idx, void *data, size_t data_sz)
+ const char *real_name, int sec_idx, void *data, size_t data_sz)
{
struct bpf_map_def *def;
struct bpf_map *map;
+ size_t mmap_sz;
int err;
map = bpf_object__add_map(obj);
@@ -1380,9 +1845,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
map->libbpf_type = type;
map->sec_idx = sec_idx;
map->sec_offset = 0;
- map->name = internal_map_name(obj, type);
- if (!map->name) {
- pr_warn("failed to alloc map name\n");
+ map->real_name = strdup(real_name);
+ map->name = internal_map_name(obj, real_name);
+ if (!map->real_name || !map->name) {
+ zfree(&map->real_name);
+ zfree(&map->name);
return -ENOMEM;
}
@@ -1392,19 +1859,26 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->value_size = data_sz;
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
- ? BPF_F_RDONLY_PROG : 0;
- def->map_flags |= BPF_F_MMAPABLE;
+ ? BPF_F_RDONLY_PROG : 0;
+
+ /* failures are fine because of maps like .rodata.str1.1 */
+ (void) map_fill_btf_type_info(obj, map);
+
+ if (map_is_mmapable(obj, map))
+ def->map_flags |= BPF_F_MMAPABLE;
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map->name, map->sec_idx, map->sec_offset, def->map_flags);
- map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+ mmap_sz = bpf_map_mmap_sz(map);
+ map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (map->mmaped == MAP_FAILED) {
err = -errno;
map->mmaped = NULL;
pr_warn("failed to alloc map '%s' content buffer: %d\n",
map->name, err);
+ zfree(&map->real_name);
zfree(&map->name);
return err;
}
@@ -1418,34 +1892,47 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
static int bpf_object__init_global_data_maps(struct bpf_object *obj)
{
- int err;
+ struct elf_sec_desc *sec_desc;
+ const char *sec_name;
+ int err = 0, sec_idx;
/*
* Populate obj->maps with libbpf internal maps.
*/
- if (obj->efile.data_shndx >= 0) {
- err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
- obj->efile.data_shndx,
- obj->efile.data->d_buf,
- obj->efile.data->d_size);
- if (err)
- return err;
- }
- if (obj->efile.rodata_shndx >= 0) {
- err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
- obj->efile.rodata_shndx,
- obj->efile.rodata->d_buf,
- obj->efile.rodata->d_size);
- if (err)
- return err;
+ for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
+ sec_desc = &obj->efile.secs[sec_idx];
- obj->rodata_map_idx = obj->nr_maps - 1;
- }
- if (obj->efile.bss_shndx >= 0) {
- err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
- obj->efile.bss_shndx,
- NULL,
- obj->efile.bss->d_size);
+ /* Skip recognized sections with size 0. */
+ if (!sec_desc->data || sec_desc->data->d_size == 0)
+ continue;
+
+ switch (sec_desc->sec_type) {
+ case SEC_DATA:
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+ sec_name, sec_idx,
+ sec_desc->data->d_buf,
+ sec_desc->data->d_size);
+ break;
+ case SEC_RODATA:
+ obj->has_rodata = true;
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+ sec_name, sec_idx,
+ sec_desc->data->d_buf,
+ sec_desc->data->d_size);
+ break;
+ case SEC_BSS:
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+ sec_name, sec_idx,
+ NULL,
+ sec_desc->data->d_size);
+ break;
+ default:
+ /* skip */
+ break;
+ }
if (err)
return err;
}
@@ -1471,7 +1958,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
switch (ext->kcfg.type) {
case KCFG_BOOL:
if (value == 'm') {
- pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
+ pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
ext->name, value);
return -EINVAL;
}
@@ -1492,7 +1979,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
case KCFG_INT:
case KCFG_CHAR_ARR:
default:
- pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
+ pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
ext->name, value);
return -EINVAL;
}
@@ -1506,7 +1993,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
size_t len;
if (ext->kcfg.type != KCFG_CHAR_ARR) {
- pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
+ pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
+ ext->name, value);
return -EINVAL;
}
@@ -1520,7 +2008,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
/* strip quotes */
len -= 2;
if (len >= ext->kcfg.sz) {
- pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
ext->name, value, len, ext->kcfg.sz - 1);
len = ext->kcfg.sz - 1;
}
@@ -1577,23 +2065,38 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
__u64 value)
{
- if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
- pr_warn("extern (kcfg) %s=%llu should be integer\n",
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
+ ext->kcfg.type != KCFG_BOOL) {
+ pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
+ ext->name, (unsigned long long)value);
+ return -EINVAL;
+ }
+ if (ext->kcfg.type == KCFG_BOOL && value > 1) {
+ pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
ext->name, (unsigned long long)value);
return -EINVAL;
+
}
if (!is_kcfg_value_in_range(ext, value)) {
- pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+ pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
ext->name, (unsigned long long)value, ext->kcfg.sz);
return -ERANGE;
}
switch (ext->kcfg.sz) {
- case 1: *(__u8 *)ext_val = value; break;
- case 2: *(__u16 *)ext_val = value; break;
- case 4: *(__u32 *)ext_val = value; break;
- case 8: *(__u64 *)ext_val = value; break;
- default:
- return -EINVAL;
+ case 1:
+ *(__u8 *)ext_val = value;
+ break;
+ case 2:
+ *(__u16 *)ext_val = value;
+ break;
+ case 4:
+ *(__u32 *)ext_val = value;
+ break;
+ case 8:
+ *(__u64 *)ext_val = value;
+ break;
+ default:
+ return -EINVAL;
}
ext->is_set = true;
return 0;
@@ -1608,7 +2111,7 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
void *ext_val;
__u64 num;
- if (strncmp(buf, "CONFIG_", 7))
+ if (!str_has_pfx(buf, "CONFIG_"))
return 0;
sep = strchr(buf, '=');
@@ -1647,16 +2150,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
/* assume integer */
err = parse_u64(value, &num);
if (err) {
- pr_warn("extern (kcfg) %s=%s should be integer\n",
- ext->name, value);
+ pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
return err;
}
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+ pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
+ return -EINVAL;
+ }
err = set_kcfg_value_num(ext, ext_val, num);
break;
}
if (err)
return err;
- pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
+ pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
return 0;
}
@@ -1675,9 +2181,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
return -ENAMETOOLONG;
/* gzopen also accepts uncompressed files. */
- file = gzopen(buf, "r");
+ file = gzopen(buf, "re");
if (!file)
- file = gzopen("/proc/config.gz", "r");
+ file = gzopen("/proc/config.gz", "re");
if (!file) {
pr_warn("failed to open system Kconfig\n");
@@ -1742,7 +2248,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj)
map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
- obj->efile.symbols_shndx,
+ ".kconfig", obj->efile.symbols_shndx,
NULL, map_sz);
if (err)
return err;
@@ -1752,130 +2258,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj)
return 0;
}
-static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
-{
- Elf_Data *symbols = obj->efile.symbols;
- int i, map_def_sz = 0, nr_maps = 0, nr_syms;
- Elf_Data *data = NULL;
- Elf_Scn *scn;
-
- if (obj->efile.maps_shndx < 0)
- return 0;
-
- if (!symbols)
- return -EINVAL;
-
-
- scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
- data = elf_sec_data(obj, scn);
- if (!scn || !data) {
- pr_warn("elf: failed to get legacy map definitions for %s\n",
- obj->path);
- return -EINVAL;
- }
-
- /*
- * Count number of maps. Each map has a name.
- * Array of maps is not supported: only the first element is
- * considered.
- *
- * TODO: Detect array of map and report error.
- */
- nr_syms = symbols->d_size / sizeof(GElf_Sym);
- for (i = 0; i < nr_syms; i++) {
- GElf_Sym sym;
-
- if (!gelf_getsym(symbols, i, &sym))
- continue;
- if (sym.st_shndx != obj->efile.maps_shndx)
- continue;
- nr_maps++;
- }
- /* Assume equally sized map definitions */
- pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
- nr_maps, data->d_size, obj->path);
-
- if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
- pr_warn("elf: unable to determine legacy map definition size in %s\n",
- obj->path);
- return -EINVAL;
- }
- map_def_sz = data->d_size / nr_maps;
-
- /* Fill obj->maps using data in "maps" section. */
- for (i = 0; i < nr_syms; i++) {
- GElf_Sym sym;
- const char *map_name;
- struct bpf_map_def *def;
- struct bpf_map *map;
-
- if (!gelf_getsym(symbols, i, &sym))
- continue;
- if (sym.st_shndx != obj->efile.maps_shndx)
- continue;
-
- map = bpf_object__add_map(obj);
- if (IS_ERR(map))
- return PTR_ERR(map);
-
- map_name = elf_sym_str(obj, sym.st_name);
- if (!map_name) {
- pr_warn("failed to get map #%d name sym string for obj %s\n",
- i, obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
-
- map->libbpf_type = LIBBPF_MAP_UNSPEC;
- map->sec_idx = sym.st_shndx;
- map->sec_offset = sym.st_value;
- pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
- map_name, map->sec_idx, map->sec_offset);
- if (sym.st_value + map_def_sz > data->d_size) {
- pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
- obj->path, map_name);
- return -EINVAL;
- }
-
- map->name = strdup(map_name);
- if (!map->name) {
- pr_warn("failed to alloc map name\n");
- return -ENOMEM;
- }
- pr_debug("map %d is \"%s\"\n", i, map->name);
- def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
- /*
- * If the definition of the map in the object file fits in
- * bpf_map_def, copy it. Any extra fields in our version
- * of bpf_map_def will default to zero as a result of the
- * calloc above.
- */
- if (map_def_sz <= sizeof(struct bpf_map_def)) {
- memcpy(&map->def, def, map_def_sz);
- } else {
- /*
- * Here the map structure being read is bigger than what
- * we expect, truncate if the excess bits are all zero.
- * If they are not zero, reject this map as
- * incompatible.
- */
- char *b;
-
- for (b = ((char *)def) + sizeof(struct bpf_map_def);
- b < ((char *)def) + map_def_sz; b++) {
- if (*b != 0) {
- pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
- obj->path, map_name);
- if (strict)
- return -EINVAL;
- }
- }
- memcpy(&map->def, def, sizeof(struct bpf_map_def));
- }
- }
- return 0;
-}
-
-static const struct btf_type *
+const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
@@ -1906,9 +2289,9 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
-static const char *btf_kind_str(const struct btf_type *t)
+static const char *__btf_kind_str(__u16 kind)
{
- switch (btf_kind(t)) {
+ switch (kind) {
case BTF_KIND_UNKN: return "void";
case BTF_KIND_INT: return "int";
case BTF_KIND_PTR: return "ptr";
@@ -1925,10 +2308,19 @@ static const char *btf_kind_str(const struct btf_type *t)
case BTF_KIND_FUNC_PROTO: return "func_proto";
case BTF_KIND_VAR: return "var";
case BTF_KIND_DATASEC: return "datasec";
+ case BTF_KIND_FLOAT: return "float";
+ case BTF_KIND_DECL_TAG: return "decl_tag";
+ case BTF_KIND_TYPE_TAG: return "type_tag";
+ case BTF_KIND_ENUM64: return "enum64";
default: return "unknown";
}
}
+const char *btf_kind_str(const struct btf_type *t)
+{
+ return __btf_kind_str(btf_kind(t));
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -1966,271 +2358,410 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return true;
}
-static int build_map_pin_path(struct bpf_map *map, const char *path)
+static bool get_map_field_long(const char *map_name, const struct btf *btf,
+ const struct btf_member *m, __u64 *res)
{
- char buf[PATH_MAX];
- int len;
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+ const char *name = btf__name_by_offset(btf, m->name_off);
- if (!path)
- path = "/sys/fs/bpf";
+ if (btf_is_ptr(t)) {
+ __u32 res32;
+ bool ret;
+
+ ret = get_map_field_int(map_name, btf, m, &res32);
+ if (ret)
+ *res = (__u64)res32;
+ return ret;
+ }
+
+ if (!btf_is_enum(t) && !btf_is_enum64(t)) {
+ pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
+ map_name, name, btf_kind_str(t));
+ return false;
+ }
- len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
+ if (btf_vlen(t) != 1) {
+ pr_warn("map '%s': attr '%s': invalid __ulong\n",
+ map_name, name);
+ return false;
+ }
+
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e = btf_enum(t);
+
+ *res = e->val;
+ } else {
+ const struct btf_enum64 *e = btf_enum64(t);
+
+ *res = btf_enum64_value(e);
+ }
+ return true;
+}
+
+static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
+{
+ int len;
+
+ len = snprintf(buf, buf_sz, "%s/%s", path, name);
if (len < 0)
return -EINVAL;
- else if (len >= PATH_MAX)
+ if (len >= buf_sz)
return -ENAMETOOLONG;
+ return 0;
+}
+
+static int build_map_pin_path(struct bpf_map *map, const char *path)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ if (!path)
+ path = BPF_FS_DEFAULT_PATH;
+
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
+ return err;
+
return bpf_map__set_pin_path(map, buf);
}
+/* should match definition in bpf_helpers.h */
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
-static int parse_btf_map_def(struct bpf_object *obj,
- struct bpf_map *map,
- const struct btf_type *def,
- bool strict, bool is_inner,
- const char *pin_root_path)
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def)
{
const struct btf_type *t;
const struct btf_member *m;
+ bool is_inner = inner_def == NULL;
int vlen, i;
- vlen = btf_vlen(def);
- m = btf_members(def);
+ vlen = btf_vlen(def_t);
+ m = btf_members(def_t);
for (i = 0; i < vlen; i++, m++) {
- const char *name = btf__name_by_offset(obj->btf, m->name_off);
+ const char *name = btf__name_by_offset(btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.type))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
return -EINVAL;
- pr_debug("map '%s': found type = %u.\n",
- map->name, map->def.type);
+ map_def->parts |= MAP_DEF_MAP_TYPE;
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.max_entries))
+ if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
return -EINVAL;
- pr_debug("map '%s': found max_entries = %u.\n",
- map->name, map->def.max_entries);
+ map_def->parts |= MAP_DEF_MAX_ENTRIES;
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.map_flags))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
return -EINVAL;
- pr_debug("map '%s': found map_flags = %u.\n",
- map->name, map->def.map_flags);
+ map_def->parts |= MAP_DEF_MAP_FLAGS;
} else if (strcmp(name, "numa_node") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
return -EINVAL;
- pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+ map_def->parts |= MAP_DEF_NUMA_NODE;
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found key_size = %u.\n",
- map->name, sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map->name, map->def.key_size, sz);
+ map_name, map_def->key_size, sz);
return -EINVAL;
}
- map->def.key_size = sz;
+ map_def->key_size = sz;
+ map_def->parts |= MAP_DEF_KEY_SIZE;
} else if (strcmp(name, "key") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': key spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map->name, map->def.key_size, (ssize_t)sz);
+ map_name, map_def->key_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.key_size = sz;
- map->btf_key_type_id = t->type;
+ map_def->key_size = sz;
+ map_def->key_type_id = t->type;
+ map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found value_size = %u.\n",
- map->name, sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map->name, map->def.value_size, sz);
+ map_name, map_def->value_size, sz);
return -EINVAL;
}
- map->def.value_size = sz;
+ map_def->value_size = sz;
+ map_def->parts |= MAP_DEF_VALUE_SIZE;
} else if (strcmp(name, "value") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': value spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map->name, map->def.value_size, (ssize_t)sz);
+ map_name, map_def->value_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.value_size = sz;
- map->btf_value_type_id = t->type;
+ map_def->value_size = sz;
+ map_def->value_type_id = t->type;
+ map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
}
else if (strcmp(name, "values") == 0) {
+ bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
+ bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
+ const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
+ char inner_map_name[128];
int err;
if (is_inner) {
pr_warn("map '%s': multi-level inner maps not supported.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
if (i != vlen - 1) {
pr_warn("map '%s': '%s' member should be last.\n",
- map->name, name);
+ map_name, name);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type)) {
- pr_warn("map '%s': should be map-in-map.\n",
- map->name);
+ if (!is_map_in_map && !is_prog_array) {
+ pr_warn("map '%s': should be map-in-map or prog-array.\n",
+ map_name);
return -ENOTSUP;
}
- if (map->def.value_size && map->def.value_size != 4) {
+ if (map_def->value_size && map_def->value_size != 4) {
pr_warn("map '%s': conflicting value size %u != 4.\n",
- map->name, map->def.value_size);
+ map_name, map_def->value_size);
return -EINVAL;
}
- map->def.value_size = 4;
- t = btf__type_by_id(obj->btf, m->type);
+ map_def->value_size = 4;
+ t = btf__type_by_id(btf, m->type);
if (!t) {
- pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
- map->name, m->type);
+ pr_warn("map '%s': %s type [%d] not found.\n",
+ map_name, desc, m->type);
return -EINVAL;
}
if (!btf_is_array(t) || btf_array(t)->nelems) {
- pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
- map->name);
+ pr_warn("map '%s': %s spec is not a zero-sized array.\n",
+ map_name, desc);
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
- NULL);
+ t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ pr_warn("map '%s': %s def is of unexpected kind %s.\n",
+ map_name, desc, btf_kind_str(t));
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ t = skip_mods_and_typedefs(btf, t->type, NULL);
+ if (is_prog_array) {
+ if (!btf_is_func_proto(t)) {
+ pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
+ map_name, btf_kind_str(t));
+ return -EINVAL;
+ }
+ continue;
+ }
if (!btf_is_struct(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- map->inner_map = calloc(1, sizeof(*map->inner_map));
- if (!map->inner_map)
- return -ENOMEM;
- map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
- map->inner_map->name = malloc(strlen(map->name) +
- sizeof(".inner") + 1);
- if (!map->inner_map->name)
- return -ENOMEM;
- sprintf(map->inner_map->name, "%s.inner", map->name);
-
- err = parse_btf_map_def(obj, map->inner_map, t, strict,
- true /* is_inner */, NULL);
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
+ err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
if (err)
return err;
+
+ map_def->parts |= MAP_DEF_INNER_MAP;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
- int err;
if (is_inner) {
- pr_debug("map '%s': inner def can't be pinned.\n",
- map->name);
+ pr_warn("map '%s': inner def can't be pinned.\n", map_name);
return -EINVAL;
}
- if (!get_map_field_int(map->name, obj->btf, m, &val))
+ if (!get_map_field_int(map_name, btf, m, &val))
return -EINVAL;
- pr_debug("map '%s': found pinning = %u.\n",
- map->name, val);
-
- if (val != LIBBPF_PIN_NONE &&
- val != LIBBPF_PIN_BY_NAME) {
+ if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map->name, val);
+ map_name, val);
return -EINVAL;
}
- if (val == LIBBPF_PIN_BY_NAME) {
- err = build_map_pin_path(map, pin_root_path);
- if (err) {
- pr_warn("map '%s': couldn't build pin path.\n",
- map->name);
- return err;
- }
- }
+ map_def->pinning = val;
+ map_def->parts |= MAP_DEF_PINNING;
+ } else if (strcmp(name, "map_extra") == 0) {
+ __u64 map_extra;
+
+ if (!get_map_field_long(map_name, btf, m, &map_extra))
+ return -EINVAL;
+ map_def->map_extra = map_extra;
+ map_def->parts |= MAP_DEF_MAP_EXTRA;
} else {
if (strict) {
- pr_warn("map '%s': unknown field '%s'.\n",
- map->name, name);
+ pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
return -ENOTSUP;
}
- pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map->name, name);
+ pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
}
}
- if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map->name);
+ if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
return -EINVAL;
}
return 0;
}
+static size_t adjust_ringbuf_sz(size_t sz)
+{
+ __u32 page_sz = sysconf(_SC_PAGE_SIZE);
+ __u32 mul;
+
+ /* if user forgot to set any size, make sure they see error */
+ if (sz == 0)
+ return 0;
+ /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
+ * a power-of-2 multiple of kernel's page size. If user diligently
+ * satisified these conditions, pass the size through.
+ */
+ if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
+ return sz;
+
+ /* Otherwise find closest (page_sz * power_of_2) product bigger than
+ * user-set size to satisfy both user size request and kernel
+ * requirements and substitute correct max_entries for map creation.
+ */
+ for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
+ if (mul * page_sz > sz)
+ return mul * page_sz;
+ }
+
+ /* if it's impossible to satisfy the conditions (i.e., user size is
+ * very close to UINT_MAX but is not a power-of-2 multiple of
+ * page_size) then just return original size and let kernel reject it
+ */
+ return sz;
+}
+
+static bool map_is_ringbuf(const struct bpf_map *map)
+{
+ return map->def.type == BPF_MAP_TYPE_RINGBUF ||
+ map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
+}
+
+static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
+{
+ map->def.type = def->map_type;
+ map->def.key_size = def->key_size;
+ map->def.value_size = def->value_size;
+ map->def.max_entries = def->max_entries;
+ map->def.map_flags = def->map_flags;
+ map->map_extra = def->map_extra;
+
+ map->numa_node = def->numa_node;
+ map->btf_key_type_id = def->key_type_id;
+ map->btf_value_type_id = def->value_type_id;
+
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
+ if (map_is_ringbuf(map))
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
+
+ if (def->parts & MAP_DEF_MAP_TYPE)
+ pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
+
+ if (def->parts & MAP_DEF_KEY_TYPE)
+ pr_debug("map '%s': found key [%u], sz = %u.\n",
+ map->name, def->key_type_id, def->key_size);
+ else if (def->parts & MAP_DEF_KEY_SIZE)
+ pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
+
+ if (def->parts & MAP_DEF_VALUE_TYPE)
+ pr_debug("map '%s': found value [%u], sz = %u.\n",
+ map->name, def->value_type_id, def->value_size);
+ else if (def->parts & MAP_DEF_VALUE_SIZE)
+ pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
+
+ if (def->parts & MAP_DEF_MAX_ENTRIES)
+ pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
+ if (def->parts & MAP_DEF_MAP_FLAGS)
+ pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
+ if (def->parts & MAP_DEF_MAP_EXTRA)
+ pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
+ (unsigned long long)def->map_extra);
+ if (def->parts & MAP_DEF_PINNING)
+ pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
+ if (def->parts & MAP_DEF_NUMA_NODE)
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
+
+ if (def->parts & MAP_DEF_INNER_MAP)
+ pr_debug("map '%s': found inner map definition.\n", map->name);
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+ switch (linkage) {
+ case BTF_VAR_STATIC: return "static";
+ case BTF_VAR_GLOBAL_ALLOCATED: return "global";
+ case BTF_VAR_GLOBAL_EXTERN: return "extern";
+ default: return "unknown";
+ }
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
const Elf_Data *data, bool strict,
const char *pin_root_path)
{
+ struct btf_map_def map_def = {}, inner_def = {};
const struct btf_type *var, *def;
const struct btf_var_secinfo *vi;
const struct btf_var *var_extra;
const char *map_name;
struct bpf_map *map;
+ int err;
vi = btf_var_secinfos(sec) + var_idx;
var = btf__type_by_id(obj->btf, vi->type);
@@ -2250,10 +2781,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
map_name, btf_kind_str(var));
return -EINVAL;
}
- if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
- var_extra->linkage != BTF_VAR_STATIC) {
- pr_warn("map '%s': unsupported var linkage %u.\n",
- map_name, var_extra->linkage);
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ pr_warn("map '%s': unsupported map linkage %s.\n",
+ map_name, btf_var_linkage_str(var_extra->linkage));
return -EOPNOTSUPP;
}
@@ -2284,7 +2814,67 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
map_name, map->sec_idx, map->sec_offset);
- return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+ err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
+ if (err)
+ return err;
+
+ fill_map_from_def(map, &map_def);
+
+ if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n", map->name);
+ return err;
+ }
+ }
+
+ if (map_def.parts & MAP_DEF_INNER_MAP) {
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->fd = create_placeholder_fd();
+ if (map->inner_map->fd < 0)
+ return map->inner_map->fd;
+ map->inner_map->sec_idx = sec_idx;
+ map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map_name);
+
+ fill_map_from_def(map->inner_map, &inner_def);
+ }
+
+ err = map_fill_btf_type_info(obj, map);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
+ const char *sec_name, int sec_idx,
+ void *data, size_t data_sz)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t mmap_sz;
+
+ mmap_sz = bpf_map_mmap_sz(obj->arena_map);
+ if (roundup(data_sz, page_sz) > mmap_sz) {
+ pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
+ sec_name, mmap_sz, data_sz);
+ return -E2BIG;
+ }
+
+ obj->arena_data = malloc(data_sz);
+ if (!obj->arena_data)
+ return -ENOMEM;
+ memcpy(obj->arena_data, data, data_sz);
+ obj->arena_data_sz = data_sz;
+
+ /* make bpf_map__init_value() work for ARENA maps */
+ map->mmaped = obj->arena_data;
+
+ return 0;
}
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
@@ -2308,8 +2898,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return -EINVAL;
}
- nr_types = btf__get_nr_types(obj->btf);
- for (i = 1; i <= nr_types; i++) {
+ nr_types = btf__type_cnt(obj->btf);
+ for (i = 1; i < nr_types; i++) {
t = btf__type_by_id(obj->btf, i);
if (!btf_is_datasec(t))
continue;
@@ -2336,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return err;
}
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+
+ if (map->def.type != BPF_MAP_TYPE_ARENA)
+ continue;
+
+ if (obj->arena_map) {
+ pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
+ map->name, obj->arena_map->name);
+ return -EINVAL;
+ }
+ obj->arena_map = map;
+
+ if (obj->efile.arena_data) {
+ err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
+ obj->efile.arena_data->d_buf,
+ obj->efile.arena_data->d_size);
+ if (err)
+ return err;
+ }
+ }
+ if (obj->efile.arena_data && !obj->arena_map) {
+ pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
+ ARENA_SEC);
+ return -ENOENT;
+ }
+
return 0;
}
@@ -2344,54 +2961,69 @@ static int bpf_object__init_maps(struct bpf_object *obj,
{
const char *pin_root_path;
bool strict;
- int err;
+ int err = 0;
strict = !OPTS_GET(opts, relaxed_maps, false);
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = bpf_object__init_user_maps(obj, strict);
- err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
- err = err ?: bpf_object__init_struct_ops_maps(obj);
- if (err)
- return err;
+ err = err ?: bpf_object_init_struct_ops(obj);
- return 0;
+ return err;
}
static bool section_have_execinstr(struct bpf_object *obj, int idx)
{
- GElf_Shdr sh;
+ Elf64_Shdr *sh;
- if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
+ sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
+ if (!sh)
return false;
- return sh.sh_flags & SHF_EXECINSTR;
+ return sh->sh_flags & SHF_EXECINSTR;
}
-static bool btf_needs_sanitization(struct bpf_object *obj)
+static bool starts_with_qmark(const char *s)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
-
- return !has_func || !has_datasec || !has_func_global;
+ return s && s[0] == '?';
}
-static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+static bool btf_needs_sanitization(struct bpf_object *obj)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
+ bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
+ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+
+ return !has_func || !has_datasec || !has_func_global || !has_float ||
+ !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
+}
+
+static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+{
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
+ bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
+ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
+ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+ int enum64_placeholder_id = 0;
struct btf_type *t;
int i, j, vlen;
- for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ for (i = 1; i < btf__type_cnt(btf); i++) {
t = (struct btf_type *)btf__type_by_id(btf, i);
- if (!has_datasec && btf_is_var(t)) {
- /* replace VAR with INT */
+ if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
+ /* replace VAR/DECL_TAG with INT */
t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
/*
* using size = 1 is the safest choice, 4 will be too
@@ -2409,7 +3041,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
name = (char *)btf__name_by_offset(btf, t->name_off);
while (*name) {
- if (*name == '.')
+ if (*name == '.' || *name == '?')
*name = '_';
name++;
}
@@ -2424,6 +3056,14 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
vt = (void *)btf__type_by_id(btf, v->type);
m->name_off = vt->name_off;
}
+ } else if (!has_qmark_datasec && btf_is_datasec(t) &&
+ starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
+ /* replace '?' prefix with '_' for DATASEC names */
+ char *name;
+
+ name = (char *)btf__name_by_offset(btf, t->name_off);
+ if (name[0] == '?')
+ name[0] = '_';
} else if (!has_func && btf_is_func_proto(t)) {
/* replace FUNC_PROTO with ENUM */
vlen = btf_vlen(t);
@@ -2435,20 +3075,55 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
} else if (!has_func_global && btf_is_func(t)) {
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+ } else if (!has_float && btf_is_float(t)) {
+ /* replace FLOAT with an equally-sized empty STRUCT;
+ * since C compilers do not accept e.g. "float" as a
+ * valid struct name, make it anonymous
+ */
+ t->name_off = 0;
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
+ } else if (!has_type_tag && btf_is_type_tag(t)) {
+ /* replace TYPE_TAG with a CONST */
+ t->name_off = 0;
+ t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
+ } else if (!has_enum64 && btf_is_enum(t)) {
+ /* clear the kflag */
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
+ } else if (!has_enum64 && btf_is_enum64(t)) {
+ /* replace ENUM64 with a union */
+ struct btf_member *m;
+
+ if (enum64_placeholder_id == 0) {
+ enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
+ if (enum64_placeholder_id < 0)
+ return enum64_placeholder_id;
+
+ t = (struct btf_type *)btf__type_by_id(btf, i);
+ }
+
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
+ for (j = 0; j < vlen; j++, m++) {
+ m->type = enum64_placeholder_id;
+ m->offset = 0;
+ }
}
}
+
+ return 0;
}
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
- obj->efile.st_ops_shndx >= 0 ||
+ obj->efile.has_st_ops ||
obj->nr_extern > 0;
}
static bool kernel_needs_btf(const struct bpf_object *obj)
{
- return obj->efile.st_ops_shndx >= 0;
+ return obj->efile.has_st_ops;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -2459,31 +3134,69 @@ static int bpf_object__init_btf(struct bpf_object *obj,
if (btf_data) {
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
- if (IS_ERR(obj->btf)) {
- err = PTR_ERR(obj->btf);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
obj->btf = NULL;
- pr_warn("Error loading ELF section %s: %d.\n",
- BTF_ELF_SEC, err);
+ pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
goto out;
}
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- err = 0;
}
if (btf_ext_data) {
+ struct btf_ext_info *ext_segs[3];
+ int seg_num, sec_num;
+
if (!obj->btf) {
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
goto out;
}
- obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(obj->btf_ext)) {
- pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
- BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+ BTF_EXT_ELF_SEC, err);
obj->btf_ext = NULL;
goto out;
}
+
+ /* setup .BTF.ext to ELF section mapping */
+ ext_segs[0] = &obj->btf_ext->func_info;
+ ext_segs[1] = &obj->btf_ext->line_info;
+ ext_segs[2] = &obj->btf_ext->core_relo_info;
+ for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
+ struct btf_ext_info *seg = ext_segs[seg_num];
+ const struct btf_ext_info_sec *sec;
+ const char *sec_name;
+ Elf_Scn *scn;
+
+ if (seg->sec_cnt == 0)
+ continue;
+
+ seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
+ if (!seg->sec_idxs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sec_num = 0;
+ for_each_btf_ext_sec(seg, sec) {
+ /* preventively increment index to avoid doing
+ * this before every continue below
+ */
+ sec_num++;
+
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name))
+ continue;
+ scn = elf_sec_by_name(obj, sec_name);
+ if (!scn)
+ continue;
+
+ seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
+ }
+ }
}
out:
if (err && libbpf_needs_btf(obj)) {
@@ -2493,23 +3206,134 @@ out:
return 0;
}
-static int bpf_object__finalize_btf(struct bpf_object *obj)
+static int compare_vsi_off(const void *_a, const void *_b)
{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, i, vars = btf_vlen(t);
+ const char *sec_name = btf__name_by_offset(btf, t->name_off);
+ struct btf_var_secinfo *vsi;
+ bool fixup_offsets = false;
int err;
+ if (!sec_name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
+ * variable offsets set at the previous step. Further, not every
+ * extern BTF VAR has corresponding ELF symbol preserved, so we skip
+ * all fixups altogether for such sections and go straight to sorting
+ * VARs within their DATASEC.
+ */
+ if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
+ goto sort_vars;
+
+ /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
+ * fix this up. But BPF static linker already fixes this up and fills
+ * all the sizes and offsets during static linking. So this step has
+ * to be optional. But the STV_HIDDEN handling is non-optional for any
+ * non-extern DATASEC, so the variable fixup loop below handles both
+ * functions at the same time, paying the cost of BTF VAR <-> ELF
+ * symbol matching just once.
+ */
+ if (t->size == 0) {
+ err = find_elf_sec_sz(obj, sec_name, &size);
+ if (err || !size) {
+ pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
+ sec_name, size, err);
+ return -ENOENT;
+ }
+
+ t->size = size;
+ fixup_offsets = true;
+ }
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+ const struct btf_type *t_var;
+ struct btf_var *var;
+ const char *var_name;
+ Elf64_Sym *sym;
+
+ t_var = btf__type_by_id(btf, vsi->type);
+ if (!t_var || !btf_is_var(t_var)) {
+ pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
+ return -EINVAL;
+ }
+
+ var = btf_var(t_var);
+ if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
+ continue;
+
+ var_name = btf__name_by_offset(btf, t_var->name_off);
+ if (!var_name) {
+ pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
+ sec_name, i);
+ return -ENOENT;
+ }
+
+ sym = find_elf_var_sym(obj, var_name);
+ if (IS_ERR(sym)) {
+ pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
+ sec_name, var_name);
+ return -ENOENT;
+ }
+
+ if (fixup_offsets)
+ vsi->offset = sym->st_value;
+
+ /* if variable is a global/weak symbol, but has restricted
+ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
+ * as static. This follows similar logic for functions (BPF
+ * subprogs) and influences libbpf's further decisions about
+ * whether to make global data BPF array maps as
+ * BPF_F_MMAPABLE.
+ */
+ if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+ || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
+ var->linkage = BTF_VAR_STATIC;
+ }
+
+sort_vars:
+ qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
+ return 0;
+}
+
+static int bpf_object_fixup_btf(struct bpf_object *obj)
+{
+ int i, n, err = 0;
+
if (!obj->btf)
return 0;
- err = btf__finalize_data(obj, obj->btf);
- if (err) {
- pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
- return err;
+ n = btf__type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ struct btf_type *t = btf_type_by_id(obj->btf, i);
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (btf_is_datasec(t)) {
+ err = btf_fixup_datasec(obj, obj->btf, t);
+ if (err)
+ return err;
+ }
}
return 0;
}
-static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
{
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
prog->type == BPF_PROG_TYPE_LSM)
@@ -2524,42 +3348,61 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
return false;
}
-static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+static bool map_needs_vmlinux_btf(struct bpf_map *map)
+{
+ return bpf_map__is_struct_ops(map);
+}
+
+static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
{
- bool need_vmlinux_btf = false;
struct bpf_program *prog;
- int i, err;
+ struct bpf_map *map;
+ int i;
- /* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
- need_vmlinux_btf = true;
+ /* CO-RE relocations need kernel BTF, only when btf_custom_path
+ * is not specified
+ */
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
+ return true;
/* Support for typed ksyms needs kernel BTF */
for (i = 0; i < obj->nr_extern; i++) {
const struct extern_desc *ext;
ext = &obj->externs[i];
- if (ext->type == EXT_KSYM && ext->ksym.type_id) {
- need_vmlinux_btf = true;
- break;
- }
+ if (ext->type == EXT_KSYM && ext->ksym.type_id)
+ return true;
}
bpf_object__for_each_program(prog, obj) {
- if (!prog->load)
+ if (!prog->autoload)
continue;
- if (libbpf_prog_needs_vmlinux_btf(prog)) {
- need_vmlinux_btf = true;
- break;
- }
+ if (prog_needs_vmlinux_btf(prog))
+ return true;
}
- if (!need_vmlinux_btf)
+ bpf_object__for_each_map(map, obj) {
+ if (map_needs_vmlinux_btf(map))
+ return true;
+ }
+
+ return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
+{
+ int err;
+
+ /* btf_vmlinux could be loaded earlier */
+ if (obj->btf_vmlinux || obj->gen_loader)
return 0;
- obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
+ if (!force && !obj_needs_vmlinux_btf(obj))
+ return 0;
+
+ obj->btf_vmlinux = btf__load_vmlinux_btf();
+ err = libbpf_get_error(obj->btf_vmlinux);
+ if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
obj->btf_vmlinux = NULL;
return err;
@@ -2571,12 +3414,12 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
struct btf *kern_btf = obj->btf;
bool btf_mandatory, sanitize;
- int err = 0;
+ int i, err = 0;
if (!obj->btf)
return 0;
- if (!kernel_supports(FEAT_BTF)) {
+ if (!kernel_supports(obj, FEAT_BTF)) {
if (kernel_needs_btf(obj)) {
err = -EOPNOTSUPP;
goto report;
@@ -2585,23 +3428,73 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
+ /* Even though some subprogs are global/weak, user might prefer more
+ * permissive BPF verification process that BPF verifier performs for
+ * static functions, taking into account more context from the caller
+ * functions. In such case, they need to mark such subprogs with
+ * __attribute__((visibility("hidden"))) and libbpf will adjust
+ * corresponding FUNC BTF type to be marked as static and trigger more
+ * involved BPF verification process.
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *prog = &obj->programs[i];
+ struct btf_type *t;
+ const char *name;
+ int j, n;
+
+ if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
+ continue;
+
+ n = btf__type_cnt(obj->btf);
+ for (j = 1; j < n; j++) {
+ t = btf_type_by_id(obj->btf, j);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, prog->name) != 0)
+ continue;
+
+ t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
+ break;
+ }
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
__u32 sz;
/* clone BTF to sanitize a copy and leave the original intact */
- raw_data = btf__get_raw_data(obj->btf, &sz);
+ raw_data = btf__raw_data(obj->btf, &sz);
kern_btf = btf__new(raw_data, sz);
- if (IS_ERR(kern_btf))
- return PTR_ERR(kern_btf);
+ err = libbpf_get_error(kern_btf);
+ if (err)
+ return err;
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- bpf_object__sanitize_btf(obj, kern_btf);
+ err = bpf_object__sanitize_btf(obj, kern_btf);
+ if (err)
+ return err;
}
- err = btf__load(kern_btf);
+ if (obj->gen_loader) {
+ __u32 raw_size = 0;
+ const void *raw_data = btf__raw_data(kern_btf, &raw_size);
+
+ if (!raw_data)
+ return -ENOMEM;
+ bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
+ /* Pretend to have valid FD to pass various fd >= 0 checks.
+ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+ */
+ btf__set_fd(kern_btf, 0);
+ } else {
+ /* currently BPF_BTF_LOAD only supports log_level 1 */
+ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
+ obj->log_level ? 1 : 0, obj->token_fd);
+ }
if (sanitize) {
if (!err) {
/* move fd to libbpf's BTF */
@@ -2682,32 +3575,36 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
return NULL;
}
-static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
{
+ Elf64_Shdr *shdr;
+
if (!scn)
- return -EINVAL;
+ return NULL;
- if (gelf_getshdr(scn, hdr) != hdr) {
+ shdr = elf64_getshdr(scn);
+ if (!shdr) {
pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
- return -EINVAL;
+ return NULL;
}
- return 0;
+ return shdr;
}
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
{
const char *name;
- GElf_Shdr sh;
+ Elf64_Shdr *sh;
if (!scn)
return NULL;
- if (elf_sec_hdr(obj, scn, &sh))
+ sh = elf_sec_hdr(obj, scn);
+ if (!sh)
return NULL;
- name = elf_sec_str(obj, sh.sh_name);
+ name = elf_sec_str(obj, sh->sh_name);
if (!name) {
pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
@@ -2735,40 +3632,36 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
return data;
}
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym)
+static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
{
- Elf_Data *symbols = obj->efile.symbols;
- size_t n = symbols->d_size / sizeof(GElf_Sym);
- int i;
+ if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
+ return NULL;
- for (i = 0; i < n; i++) {
- if (!gelf_getsym(symbols, i, sym))
- continue;
- if (sym->st_shndx != sec_idx || sym->st_value != off)
- continue;
- if (GELF_ST_TYPE(sym->st_info) != sym_type)
- continue;
- return 0;
- }
+ return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
+}
- return -ENOENT;
+static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
+{
+ if (idx >= data->d_size / sizeof(Elf64_Rel))
+ return NULL;
+
+ return (Elf64_Rel *)data->d_buf + idx;
}
static bool is_sec_name_dwarf(const char *name)
{
/* approximation, but the actual list is too long */
- return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+ return str_has_pfx(name, ".debug_");
}
-static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
{
/* no special handling of .strtab */
if (hdr->sh_type == SHT_STRTAB)
return true;
/* ignore .llvm_addrsig section as well */
- if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ if (hdr->sh_type == SHT_LLVM_ADDRSIG)
return true;
/* no subprograms will lead to an empty .text section, ignore it */
@@ -2780,7 +3673,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
if (is_sec_name_dwarf(name))
return true;
- if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ if (str_has_pfx(name, ".rel")) {
name += sizeof(".rel") - 1;
/* DWARF section relocations */
if (is_sec_name_dwarf(name))
@@ -2809,6 +3702,7 @@ static int cmp_progs(const void *_a, const void *_b)
static int bpf_object__elf_collect(struct bpf_object *obj)
{
+ struct elf_sec_desc *sec_desc;
Elf *elf = obj->efile.elf;
Elf_Data *btf_ext_data = NULL;
Elf_Data *btf_data = NULL;
@@ -2816,17 +3710,32 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
const char *name;
Elf_Data *data;
Elf_Scn *scn;
- GElf_Shdr sh;
+ Elf64_Shdr *sh;
+
+ /* ELF section indices are 0-based, but sec #0 is special "invalid"
+ * section. Since section count retrieved by elf_getshdrnum() does
+ * include sec #0, it is already the necessary size of an array to keep
+ * all the sections.
+ */
+ if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
+ pr_warn("elf: failed to get the number of sections for %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
+ if (!obj->efile.secs)
+ return -ENOMEM;
/* a bunch of ELF parsing functionality depends on processing symbols,
* so do the first pass and find the symbol table
*/
scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- if (elf_sec_hdr(obj, scn, &sh))
+ sh = elf_sec_hdr(obj, scn);
+ if (!sh)
return -LIBBPF_ERRNO__FORMAT;
- if (sh.sh_type == SHT_SYMTAB) {
+ if (sh->sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warn("elf: multiple symbol tables in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
@@ -2836,24 +3745,34 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (!data)
return -LIBBPF_ERRNO__FORMAT;
+ idx = elf_ndxscn(scn);
+
obj->efile.symbols = data;
- obj->efile.symbols_shndx = elf_ndxscn(scn);
- obj->efile.strtabidx = sh.sh_link;
+ obj->efile.symbols_shndx = idx;
+ obj->efile.strtabidx = sh->sh_link;
}
}
+ if (!obj->efile.symbols) {
+ pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
+ obj->path);
+ return -ENOENT;
+ }
+
scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- idx++;
+ idx = elf_ndxscn(scn);
+ sec_desc = &obj->efile.secs[idx];
- if (elf_sec_hdr(obj, scn, &sh))
+ sh = elf_sec_hdr(obj, scn);
+ if (!sh)
return -LIBBPF_ERRNO__FORMAT;
- name = elf_sec_str(obj, sh.sh_name);
+ name = elf_sec_str(obj, sh->sh_name);
if (!name)
return -LIBBPF_ERRNO__FORMAT;
- if (ignore_elf_section(&sh, name))
+ if (ignore_elf_section(sh, name))
continue;
data = elf_sec_data(obj, scn);
@@ -2862,8 +3781,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
idx, name, (unsigned long)data->d_size,
- (int)sh.sh_link, (unsigned long)sh.sh_flags,
- (int)sh.sh_type);
+ (int)sh->sh_link, (unsigned long)sh->sh_flags,
+ (int)sh->sh_type);
if (strcmp(name, "license") == 0) {
err = bpf_object__init_license(obj, data->d_buf, data->d_size);
@@ -2874,66 +3793,83 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (err)
return err;
} else if (strcmp(name, "maps") == 0) {
- obj->efile.maps_shndx = idx;
+ pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
+ return -ENOTSUP;
} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
obj->efile.btf_maps_shndx = idx;
} else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ if (sh->sh_type != SHT_PROGBITS)
+ return -LIBBPF_ERRNO__FORMAT;
btf_data = data;
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ if (sh->sh_type != SHT_PROGBITS)
+ return -LIBBPF_ERRNO__FORMAT;
btf_ext_data = data;
- } else if (sh.sh_type == SHT_SYMTAB) {
+ } else if (sh->sh_type == SHT_SYMTAB) {
/* already processed during the first pass above */
- } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
- if (sh.sh_flags & SHF_EXECINSTR) {
+ } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
+ if (sh->sh_flags & SHF_EXECINSTR) {
if (strcmp(name, ".text") == 0)
obj->efile.text_shndx = idx;
err = bpf_object__add_programs(obj, data, name, idx);
if (err)
return err;
- } else if (strcmp(name, DATA_SEC) == 0) {
- obj->efile.data = data;
- obj->efile.data_shndx = idx;
- } else if (strcmp(name, RODATA_SEC) == 0) {
- obj->efile.rodata = data;
- obj->efile.rodata_shndx = idx;
- } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
- obj->efile.st_ops_data = data;
- obj->efile.st_ops_shndx = idx;
+ } else if (strcmp(name, DATA_SEC) == 0 ||
+ str_has_pfx(name, DATA_SEC ".")) {
+ sec_desc->sec_type = SEC_DATA;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ } else if (strcmp(name, RODATA_SEC) == 0 ||
+ str_has_pfx(name, RODATA_SEC ".")) {
+ sec_desc->sec_type = SEC_RODATA;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
+ sec_desc->sec_type = SEC_ST_OPS;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ obj->efile.has_st_ops = true;
+ } else if (strcmp(name, ARENA_SEC) == 0) {
+ obj->efile.arena_data = data;
+ obj->efile.arena_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
}
- } else if (sh.sh_type == SHT_REL) {
- int nr_sects = obj->efile.nr_reloc_sects;
- void *sects = obj->efile.reloc_sects;
- int sec = sh.sh_info; /* points to other section */
+ } else if (sh->sh_type == SHT_REL) {
+ int targ_sec_idx = sh->sh_info; /* points to other section */
+
+ if (sh->sh_entsize != sizeof(Elf64_Rel) ||
+ targ_sec_idx >= obj->efile.sec_cnt)
+ return -LIBBPF_ERRNO__FORMAT;
/* Only do relo for section with exec instructions */
- if (!section_have_execinstr(obj, sec) &&
+ if (!section_have_execinstr(obj, targ_sec_idx) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
- idx, name, sec,
- elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
+ idx, name, targ_sec_idx,
+ elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
continue;
}
- sects = libbpf_reallocarray(sects, nr_sects + 1,
- sizeof(*obj->efile.reloc_sects));
- if (!sects)
- return -ENOMEM;
-
- obj->efile.reloc_sects = sects;
- obj->efile.nr_reloc_sects++;
-
- obj->efile.reloc_sects[nr_sects].shdr = sh;
- obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
- obj->efile.bss = data;
- obj->efile.bss_shndx = idx;
+ sec_desc->sec_type = SEC_RELO;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
+ str_has_pfx(name, BSS_SEC "."))) {
+ sec_desc->sec_type = SEC_BSS;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
} else {
pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
- (size_t)sh.sh_size);
+ (size_t)sh->sh_size);
}
}
@@ -2943,42 +3879,65 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
}
/* sort BPF programs by section name and in-section instruction offset
- * for faster search */
- qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+ * for faster search
+ */
+ if (obj->nr_programs)
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
}
-static bool sym_is_extern(const GElf_Sym *sym)
+static bool sym_is_extern(const Elf64_Sym *sym)
{
- int bind = GELF_ST_BIND(sym->st_info);
+ int bind = ELF64_ST_BIND(sym->st_info);
/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
return sym->st_shndx == SHN_UNDEF &&
(bind == STB_GLOBAL || bind == STB_WEAK) &&
- GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
+ ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
+}
+
+static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
+{
+ int bind = ELF64_ST_BIND(sym->st_info);
+ int type = ELF64_ST_TYPE(sym->st_info);
+
+ /* in .text section */
+ if (sym->st_shndx != text_shndx)
+ return false;
+
+ /* local function */
+ if (bind == STB_LOCAL && type == STT_SECTION)
+ return true;
+
+ /* global function */
+ return bind == STB_GLOBAL && type == STT_FUNC;
}
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
- const char *var_name;
+ const char *tname;
int i, n;
if (!btf)
return -ESRCH;
- n = btf__get_nr_types(btf);
- for (i = 1; i <= n; i++) {
+ n = btf__type_cnt(btf);
+ for (i = 1; i < n; i++) {
t = btf__type_by_id(btf, i);
- if (!btf_is_var(t))
+ if (!btf_is_var(t) && !btf_is_func(t))
continue;
- var_name = btf__name_by_offset(btf, t->name_off);
- if (strcmp(var_name, ext_name))
+ tname = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tname, ext_name))
continue;
- if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ if (btf_is_var(t) &&
+ btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
return -EINVAL;
return i;
@@ -2995,8 +3954,8 @@ static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
if (!btf)
return -ESRCH;
- n = btf__get_nr_types(btf);
- for (i = 1; i <= n; i++) {
+ n = btf__type_cnt(btf);
+ for (i = 1; i < n; i++) {
t = btf__type_by_id(btf, i);
if (!btf_is_datasec(t))
@@ -3043,6 +4002,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
if (strcmp(name, "libbpf_tristate"))
return KCFG_UNKNOWN;
return KCFG_TRISTATE;
+ case BTF_KIND_ENUM64:
+ if (strcmp(name, "libbpf_tristate"))
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
return KCFG_UNKNOWN;
@@ -3080,8 +4043,8 @@ static int find_int_btf_id(const struct btf *btf)
const struct btf_type *t;
int i, n;
- n = btf__get_nr_types(btf);
- for (i = 1; i <= n; i++) {
+ n = btf__type_cnt(btf);
+ for (i = 1; i < n; i++) {
t = btf__type_by_id(btf, i);
if (btf_is_int(t) && btf_int_bits(t) == 32)
@@ -3091,34 +4054,79 @@ static int find_int_btf_id(const struct btf *btf)
return 0;
}
+static int add_dummy_ksym_var(struct btf *btf)
+{
+ int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *sec;
+
+ if (!btf)
+ return 0;
+
+ sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
+ BTF_KIND_DATASEC);
+ if (sec_btf_id < 0)
+ return 0;
+
+ sec = btf__type_by_id(btf, sec_btf_id);
+ vs = btf_var_secinfos(sec);
+ for (i = 0; i < btf_vlen(sec); i++, vs++) {
+ const struct btf_type *vt;
+
+ vt = btf__type_by_id(btf, vs->type);
+ if (btf_is_func(vt))
+ break;
+ }
+
+ /* No func in ksyms sec. No need to add dummy var. */
+ if (i == btf_vlen(sec))
+ return 0;
+
+ int_btf_id = find_int_btf_id(btf);
+ dummy_var_btf_id = btf__add_var(btf,
+ "dummy_ksym",
+ BTF_VAR_GLOBAL_ALLOCATED,
+ int_btf_id);
+ if (dummy_var_btf_id < 0)
+ pr_warn("cannot create a dummy_ksym var\n");
+
+ return dummy_var_btf_id;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off;
+ int i, n, off, dummy_var_btf_id;
const char *ext_name, *sec_name;
+ size_t ext_essent_len;
Elf_Scn *scn;
- GElf_Shdr sh;
+ Elf64_Shdr *sh;
if (!obj->efile.symbols)
return 0;
scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
- if (elf_sec_hdr(obj, scn, &sh))
+ sh = elf_sec_hdr(obj, scn);
+ if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
return -LIBBPF_ERRNO__FORMAT;
- n = sh.sh_size / sh.sh_entsize;
+ dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+ if (dummy_var_btf_id < 0)
+ return dummy_var_btf_id;
+
+ n = sh->sh_size / sh->sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
for (i = 0; i < n; i++) {
- GElf_Sym sym;
+ Elf64_Sym *sym = elf_sym_by_idx(obj, i);
- if (!gelf_getsym(obj->efile.symbols, i, &sym))
+ if (!sym)
return -LIBBPF_ERRNO__FORMAT;
- if (!sym_is_extern(&sym))
+ if (!sym_is_extern(sym))
continue;
- ext_name = elf_sym_str(obj, sym.st_name);
+ ext_name = elf_sym_str(obj, sym->st_name);
if (!ext_name || !ext_name[0])
continue;
@@ -3140,7 +4148,15 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
t = btf__type_by_id(obj->btf, ext->btf_id);
ext->name = btf__name_by_offset(obj->btf, t->name_off);
ext->sym_idx = i;
- ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
+ ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
+
+ ext_essent_len = bpf_core_essential_name_len(ext->name);
+ ext->essent_name = NULL;
+ if (ext_essent_len != strlen(ext->name)) {
+ ext->essent_name = strndup(ext->name, ext_essent_len);
+ if (!ext->essent_name)
+ return -ENOMEM;
+ }
ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
if (ext->sec_btf_id <= 0) {
@@ -3152,6 +4168,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ if (btf_is_func(t)) {
+ pr_warn("extern function %s is unsupported under %s section\n",
+ ext->name, KCONFIG_SEC);
+ return -ENOTSUP;
+ }
kcfg_sec = sec;
ext->type = EXT_KCFG;
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
@@ -3167,9 +4188,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -EINVAL;
}
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
- &ext->kcfg.is_signed);
+ &ext->kcfg.is_signed);
if (ext->kcfg.type == KCFG_UNKNOWN) {
- pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+ pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
@@ -3199,7 +4220,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
* extern variables in DATASEC
*/
int int_btf_id = find_int_btf_id(obj->btf);
+ /* For extern function, a dummy_var added earlier
+ * will be used to replace the vs->type and
+ * its name string will be used to refill
+ * the missing param's name.
+ */
+ const struct btf_type *dummy_var;
+ dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
if (ext->type != EXT_KSYM)
@@ -3218,12 +4246,32 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
ext = find_extern_by_name(obj, ext_name);
if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
+ pr_warn("failed to find extern definition for BTF %s '%s'\n",
+ btf_kind_str(vt), ext_name);
return -ESRCH;
}
- btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
- vt->type = int_btf_id;
+ if (btf_is_func(vt)) {
+ const struct btf_type *func_proto;
+ struct btf_param *param;
+ int j;
+
+ func_proto = btf__type_by_id(obj->btf,
+ vt->type);
+ param = btf_params(func_proto);
+ /* Reuse the dummy_var string if the
+ * func proto does not have param name.
+ */
+ for (j = 0; j < btf_vlen(func_proto); j++)
+ if (param[j].type && !param[j].name_off)
+ param[j].name_off =
+ dummy_var->name_off;
+ vs->type = dummy_var_btf_id;
+ vt->info &= ~0xffff;
+ vt->info |= BTF_FUNC_GLOBAL;
+ } else {
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ }
vs->offset = off;
vs->size = sizeof(int);
}
@@ -3264,23 +4312,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return 0;
}
-struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
- const char *title)
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
{
- struct bpf_program *pos;
-
- bpf_object__for_each_program(pos, obj) {
- if (pos->sec_name && !strcmp(pos->sec_name, title))
- return pos;
- }
- return NULL;
-}
-
-static bool prog_is_subprog(const struct bpf_object *obj,
- const struct bpf_program *prog)
-{
- return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+ return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
}
struct bpf_program *
@@ -3295,43 +4329,50 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
if (!strcmp(prog->name, name))
return prog;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
int shndx)
{
- return shndx == obj->efile.data_shndx ||
- shndx == obj->efile.bss_shndx ||
- shndx == obj->efile.rodata_shndx;
+ switch (obj->efile.secs[shndx].sec_type) {
+ case SEC_BSS:
+ case SEC_DATA:
+ case SEC_RODATA:
+ return true;
+ default:
+ return false;
+ }
}
static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
int shndx)
{
- return shndx == obj->efile.maps_shndx ||
- shndx == obj->efile.btf_maps_shndx;
+ return shndx == obj->efile.btf_maps_shndx;
}
static enum libbpf_map_type
bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
{
- if (shndx == obj->efile.data_shndx)
- return LIBBPF_MAP_DATA;
- else if (shndx == obj->efile.bss_shndx)
+ if (shndx == obj->efile.symbols_shndx)
+ return LIBBPF_MAP_KCONFIG;
+
+ switch (obj->efile.secs[shndx].sec_type) {
+ case SEC_BSS:
return LIBBPF_MAP_BSS;
- else if (shndx == obj->efile.rodata_shndx)
+ case SEC_DATA:
+ return LIBBPF_MAP_DATA;
+ case SEC_RODATA:
return LIBBPF_MAP_RODATA;
- else if (shndx == obj->efile.symbols_shndx)
- return LIBBPF_MAP_KCONFIG;
- else
+ default:
return LIBBPF_MAP_UNSPEC;
+ }
}
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
__u32 insn_idx, const char *sym_name,
- const GElf_Sym *sym, const GElf_Rel *rel)
+ const Elf64_Sym *sym, const Elf64_Rel *rel)
{
struct bpf_insn *insn = &prog->insns[insn_idx];
size_t map_idx, nr_maps = prog->obj->nr_maps;
@@ -3341,10 +4382,40 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
const char *sym_sec_name;
struct bpf_map *map;
- reloc_desc->processed = false;
+ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+ prog->name, sym_name, insn_idx, insn->code);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (sym_is_extern(sym)) {
+ int sym_idx = ELF64_R_SYM(rel->r_info);
+ int i, n = obj->nr_extern;
+ struct extern_desc *ext;
+
+ for (i = 0; i < n; i++) {
+ ext = &obj->externs[i];
+ if (ext->sym_idx == sym_idx)
+ break;
+ }
+ if (i >= n) {
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+ prog->name, sym_name, sym_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ reloc_desc->type = RELO_EXTERN_CALL;
+ else
+ reloc_desc->type = RELO_EXTERN_LD64;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->ext_idx = i;
+ return 0;
+ }
/* sub-program call relocation */
- if (insn->code == (BPF_JMP | BPF_CALL)) {
+ if (is_call_insn(insn)) {
if (insn->src_reg != BPF_PSEUDO_CALL) {
pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
return -LIBBPF_ERRNO__RELOC;
@@ -3367,44 +4438,41 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
- if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
- pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
- prog->name, sym_name, insn_idx, insn->code);
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+ prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
- if (sym_is_extern(sym)) {
- int sym_idx = GELF_R_SYM(rel->r_info);
- int i, n = obj->nr_extern;
- struct extern_desc *ext;
-
- for (i = 0; i < n; i++) {
- ext = &obj->externs[i];
- if (ext->sym_idx == sym_idx)
- break;
- }
- if (i >= n) {
- pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
- prog->name, sym_name, sym_idx);
+ /* loading subprog addresses */
+ if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+ /* global_func: sym->st_value = offset in the section, insn->imm = 0.
+ * local_func: sym->st_value = 0, insn->imm = offset in the section.
+ */
+ if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+ pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+ prog->name, sym_name, (size_t)sym->st_value, insn->imm);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
- prog->name, i, ext->name, ext->sym_idx, insn_idx);
- reloc_desc->type = RELO_EXTERN;
+
+ reloc_desc->type = RELO_SUBPROG_ADDR;
reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = i; /* sym_off stores extern index */
+ reloc_desc->sym_off = sym->st_value;
return 0;
}
- if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
- pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
- prog->name, sym_name, shdr_idx);
- return -LIBBPF_ERRNO__RELOC;
- }
-
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ /* arena data relocation */
+ if (shdr_idx == obj->efile.arena_data_shndx) {
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = obj->arena_map - obj->maps;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -3443,7 +4511,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
- if (map->libbpf_type != type)
+ if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
continue;
pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
prog->name, map_idx, map->name, map->sec_idx,
@@ -3475,6 +4543,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
int l = 0, r = obj->nr_programs - 1, m;
struct bpf_program *prog;
+ if (!obj->nr_programs)
+ return NULL;
+
while (l < r) {
m = l + (r - l + 1) / 2;
prog = &obj->programs[m];
@@ -3495,21 +4566,30 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
}
static int
-bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
+bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
{
- Elf_Data *symbols = obj->efile.symbols;
const char *relo_sec_name, *sec_name;
- size_t sec_idx = shdr->sh_info;
+ size_t sec_idx = shdr->sh_info, sym_idx;
struct bpf_program *prog;
struct reloc_desc *relos;
int err, i, nrels;
const char *sym_name;
__u32 insn_idx;
- GElf_Sym sym;
- GElf_Rel rel;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
+ Elf64_Sym *sym;
+ Elf64_Rel *rel;
+
+ if (sec_idx >= obj->efile.sec_cnt)
+ return -EINVAL;
+
+ scn = elf_sec_by_idx(obj, sec_idx);
+ scn_data = elf_sec_data(obj, scn);
+ if (!scn_data)
+ return -LIBBPF_ERRNO__FORMAT;
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
- sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ sec_name = elf_sec_name(obj, scn);
if (!relo_sec_name || !sec_name)
return -EINVAL;
@@ -3518,32 +4598,43 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
- if (!gelf_getrel(data, i, &rel)) {
+ rel = elf_rel_by_idx(data, i);
+ if (!rel) {
pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
- pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
- relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
+
+ sym_idx = ELF64_R_SYM(rel->r_info);
+ sym = elf_sym_by_idx(obj, sym_idx);
+ if (!sym) {
+ pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
+ relo_sec_name, sym_idx, i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % BPF_INSN_SZ) {
+
+ if (sym->st_shndx >= obj->efile.sec_cnt) {
+ pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
+ relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
- relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
+ relo_sec_name, (size_t)rel->r_offset, i);
return -LIBBPF_ERRNO__FORMAT;
}
- insn_idx = rel.r_offset / BPF_INSN_SZ;
+ insn_idx = rel->r_offset / BPF_INSN_SZ;
/* relocations against static functions are recorded as
* relocations against the section that contains a function;
* in such case, symbol will be STT_SECTION and sym.st_name
* will point to empty string (0), so fetch section name
* instead
*/
- if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
- sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
+ if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
else
- sym_name = elf_sym_str(obj, sym.st_name);
+ sym_name = elf_sym_str(obj, sym->st_name);
sym_name = sym_name ?: "<?";
pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
@@ -3551,9 +4642,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
relo_sec_name, i, sec_name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
+ continue;
}
relos = libbpf_reallocarray(prog->reloc_desc,
@@ -3565,7 +4656,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
/* adjust insn_idx to local BPF program frame of reference */
insn_idx -= prog->sec_insn_off;
err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
- insn_idx, sym_name, &sym, &rel);
+ insn_idx, sym_name, sym, rel);
if (err)
return err;
@@ -3574,76 +4665,126 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
return 0;
}
-static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
+static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
{
- struct bpf_map_def *def = &map->def;
- __u32 key_type_id = 0, value_type_id = 0;
- int ret;
+ int id;
+
+ if (!obj->btf)
+ return -ENOENT;
/* if it's BTF-defined map, we don't need to search for type IDs.
* For struct_ops map, it does not need btf_key_type_id and
* btf_value_type_id.
*/
- if (map->sec_idx == obj->efile.btf_maps_shndx ||
- bpf_map__is_struct_ops(map))
+ if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
return 0;
- if (!bpf_map__is_internal(map)) {
- ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
- def->value_size, &key_type_id,
- &value_type_id);
- } else {
- /*
- * LLVM annotates global data differently in BTF, that is,
- * only as '.data', '.bss' or '.rodata'.
- */
- ret = btf__find_by_name(obj->btf,
- libbpf_type_to_btf_name[map->libbpf_type]);
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ if (!bpf_map__is_internal(map))
+ return -ENOENT;
+
+ id = btf__find_by_name(obj->btf, map->real_name);
+ if (id < 0)
+ return id;
+
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = id;
+ return 0;
+}
+
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+{
+ char file[PATH_MAX], buff[4096];
+ FILE *fp;
+ __u32 val;
+ int err;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+ memset(info, 0, sizeof(*info));
+
+ fp = fopen(file, "re");
+ if (!fp) {
+ err = -errno;
+ pr_warn("failed to open %s: %d. No procfs support?\n", file,
+ err);
+ return err;
+ }
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ info->type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ info->key_size = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ info->value_size = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ info->max_entries = val;
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+ info->map_flags = val;
}
- if (ret < 0)
- return ret;
- map->btf_key_type_id = key_type_id;
- map->btf_value_type_id = bpf_map__is_internal(map) ?
- ret : value_type_id;
+ fclose(fp);
+
+ return 0;
+}
+
+bool bpf_map__autocreate(const struct bpf_map *map)
+{
+ return map->autocreate;
+}
+
+int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
+{
+ if (map->obj->loaded)
+ return libbpf_err(-EBUSY);
+
+ map->autocreate = autocreate;
return 0;
}
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
{
- struct bpf_map_info info = {};
- __u32 len = sizeof(info);
+ struct bpf_map_info info;
+ __u32 len = sizeof(info), name_len;
int new_fd, err;
char *new_name;
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ memset(&info, 0, len);
+ err = bpf_map_get_info_by_fd(fd, &info, &len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(fd, &info);
if (err)
- return err;
+ return libbpf_err(err);
+
+ name_len = strlen(info.name);
+ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
+ new_name = strdup(map->name);
+ else
+ new_name = strdup(info.name);
- new_name = strdup(info.name);
if (!new_name)
- return -errno;
+ return libbpf_err(-errno);
- new_fd = open("/", O_RDONLY | O_CLOEXEC);
+ /*
+ * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
+ * This is similar to what we do in ensure_good_fd(), but without
+ * closing original FD.
+ */
+ new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (new_fd < 0) {
err = -errno;
goto err_free_new_name;
}
- new_fd = dup3(fd, new_fd, O_CLOEXEC);
- if (new_fd < 0) {
- err = -errno;
- goto err_close_new_fd;
- }
+ err = reuse_fd(map->fd, new_fd);
+ if (err)
+ goto err_free_new_name;
- err = zclose(map->fd);
- if (err) {
- err = -errno;
- goto err_close_new_fd;
- }
free(map->name);
- map->fd = new_fd;
map->name = new_name;
map->def.type = info.type;
map->def.key_size = info.key_size;
@@ -3653,14 +4794,13 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
map->btf_key_type_id = info.btf_key_type_id;
map->btf_value_type_id = info.btf_value_type_id;
map->reused = true;
+ map->map_extra = info.map_extra;
return 0;
-err_close_new_fd:
- close(new_fd);
err_free_new_name:
free(new_name);
- return err;
+ return libbpf_err(err);
}
__u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3668,42 +4808,105 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
return map->def.max_entries;
}
+struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return errno = EINVAL, NULL;
+
+ return map->inner_map;
+}
+
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map->obj->loaded)
+ return libbpf_err(-EBUSY);
+
map->def.max_entries = max_entries;
+
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
+ if (map_is_ringbuf(map))
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
+
return 0;
}
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+static int bpf_object_prepare_token(struct bpf_object *obj)
{
- if (!map || !max_entries)
- return -EINVAL;
+ const char *bpffs_path;
+ int bpffs_fd = -1, token_fd, err;
+ bool mandatory;
+ enum libbpf_print_level level;
+
+ /* token is explicitly prevented */
+ if (obj->token_path && obj->token_path[0] == '\0') {
+ pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
+ return 0;
+ }
+
+ mandatory = obj->token_path != NULL;
+ level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
+
+ bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
+ bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
+ if (bpffs_fd < 0) {
+ err = -errno;
+ __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
+ obj->name, err, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? err : 0;
+ }
+
+ token_fd = bpf_token_create(bpffs_fd, 0);
+ close(bpffs_fd);
+ if (token_fd < 0) {
+ if (!mandatory && token_fd == -ENOENT) {
+ pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
+ obj->name, bpffs_path);
+ return 0;
+ }
+ __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
+ obj->name, token_fd, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? token_fd : 0;
+ }
+
+ obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
+ if (!obj->feat_cache) {
+ close(token_fd);
+ return -ENOMEM;
+ }
+
+ obj->token_fd = token_fd;
+ obj->feat_cache->token_fd = token_fd;
- return bpf_map__set_max_entries(map, max_entries);
+ return 0;
}
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
- struct bpf_load_program_attr attr;
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret;
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = obj->token_fd,
+ .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
+ );
- /* make sure basic loading works */
+ if (obj->gen_loader)
+ return 0;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
+ ret = bump_rlimit_memlock();
+ if (ret)
+ pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
- ret = bpf_load_program_xattr(&attr, NULL, 0);
+ /* make sure basic loading works */
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
+ if (ret < 0)
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -3718,322 +4921,34 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int probe_fd(int fd)
-{
- if (fd >= 0)
- close(fd);
- return fd >= 0;
-}
-
-static int probe_kern_prog_name(void)
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
- struct bpf_load_program_attr attr;
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret;
-
- /* make sure loading with name works */
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
- attr.name = "test";
- ret = bpf_load_program_xattr(&attr, NULL, 0);
- return probe_fd(ret);
-}
-
-static int probe_kern_global_data(void)
-{
- struct bpf_load_program_attr prg_attr;
- struct bpf_create_map_attr map_attr;
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map;
-
- memset(&map_attr, 0, sizeof(map_attr));
- map_attr.map_type = BPF_MAP_TYPE_ARRAY;
- map_attr.key_size = sizeof(int);
- map_attr.value_size = 32;
- map_attr.max_entries = 1;
-
- map = bpf_create_map_xattr(&map_attr);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- insns[0].imm = map;
-
- memset(&prg_attr, 0, sizeof(prg_attr));
- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- prg_attr.insns = insns;
- prg_attr.insns_cnt = ARRAY_SIZE(insns);
- prg_attr.license = "GPL";
-
- ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
- close(map);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func_global(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* static void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_datasec(void)
-{
- static const char strs[] = "\0x\0.data";
- /* static int a; */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* DATASEC val */ /* [3] */
- BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
- BTF_VAR_SECINFO_ENC(2, 0, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_array_mmap(void)
-{
- struct bpf_create_map_attr attr = {
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_flags = BPF_F_MMAPABLE,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1,
- };
-
- return probe_fd(bpf_create_map_xattr(&attr));
-}
-
-static int probe_kern_exp_attach_type(void)
-{
- struct bpf_load_program_attr attr;
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
-
- memset(&attr, 0, sizeof(attr));
- /* use any valid combination of program type and (optional)
- * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
- * to see if kernel supports expected_attach_type field for
- * BPF_PROG_LOAD command
- */
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
-
- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
-}
-
-static int probe_kern_probe_read_kernel(void)
-{
- struct bpf_load_program_attr attr;
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
- BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
- BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- };
-
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_KPROBE;
- attr.insns = insns;
- attr.insns_cnt = ARRAY_SIZE(insns);
- attr.license = "GPL";
-
- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
-}
-
-static int probe_prog_bind_map(void)
-{
- struct bpf_load_program_attr prg_attr;
- struct bpf_create_map_attr map_attr;
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, prog;
-
- memset(&map_attr, 0, sizeof(map_attr));
- map_attr.map_type = BPF_MAP_TYPE_ARRAY;
- map_attr.key_size = sizeof(int);
- map_attr.value_size = 32;
- map_attr.max_entries = 1;
-
- map = bpf_create_map_xattr(&map_attr);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- memset(&prg_attr, 0, sizeof(prg_attr));
- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- prg_attr.insns = insns;
- prg_attr.insns_cnt = ARRAY_SIZE(insns);
- prg_attr.license = "GPL";
-
- prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
- if (prog < 0) {
- close(map);
- return 0;
- }
-
- ret = bpf_prog_bind_map(prog, map, NULL);
-
- close(map);
- close(prog);
-
- return ret >= 0;
-}
-
-enum kern_feature_result {
- FEAT_UNKNOWN = 0,
- FEAT_SUPPORTED = 1,
- FEAT_MISSING = 2,
-};
-
-typedef int (*feature_probe_fn)(void);
-
-static struct kern_feature_desc {
- const char *desc;
- feature_probe_fn probe;
- enum kern_feature_result res;
-} feature_probes[__FEAT_CNT] = {
- [FEAT_PROG_NAME] = {
- "BPF program name", probe_kern_prog_name,
- },
- [FEAT_GLOBAL_DATA] = {
- "global variables", probe_kern_global_data,
- },
- [FEAT_BTF] = {
- "minimal BTF", probe_kern_btf,
- },
- [FEAT_BTF_FUNC] = {
- "BTF functions", probe_kern_btf_func,
- },
- [FEAT_BTF_GLOBAL_FUNC] = {
- "BTF global function", probe_kern_btf_func_global,
- },
- [FEAT_BTF_DATASEC] = {
- "BTF data section and variable", probe_kern_btf_datasec,
- },
- [FEAT_ARRAY_MMAP] = {
- "ARRAY map mmap()", probe_kern_array_mmap,
- },
- [FEAT_EXP_ATTACH_TYPE] = {
- "BPF_PROG_LOAD expected_attach_type attribute",
- probe_kern_exp_attach_type,
- },
- [FEAT_PROBE_READ_KERN] = {
- "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
- },
- [FEAT_PROG_BIND_MAP] = {
- "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
- }
-};
-
-static bool kernel_supports(enum kern_feature_id feat_id)
-{
- struct kern_feature_desc *feat = &feature_probes[feat_id];
- int ret;
+ if (obj->gen_loader)
+ /* To generate loader program assume the latest kernel
+ * to avoid doing extra prog_load, map_create syscalls.
+ */
+ return true;
- if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
- ret = feat->probe();
- if (ret > 0) {
- WRITE_ONCE(feat->res, FEAT_SUPPORTED);
- } else if (ret == 0) {
- WRITE_ONCE(feat->res, FEAT_MISSING);
- } else {
- pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
- WRITE_ONCE(feat->res, FEAT_MISSING);
- }
- }
+ if (obj->token_fd)
+ return feat_supported(obj->feat_cache, feat_id);
- return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+ return feat_supported(NULL, feat_id);
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
{
- struct bpf_map_info map_info = {};
+ struct bpf_map_info map_info;
char msg[STRERR_BUFSIZE];
- __u32 map_info_len;
-
- map_info_len = sizeof(map_info);
+ __u32 map_info_len = sizeof(map_info);
+ int err;
- if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
- pr_warn("failed to get map info for map FD %d: %s\n",
- map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+ memset(&map_info, 0, map_info_len);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
+ if (err) {
+ pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
+ libbpf_strerror_r(errno, msg, sizeof(msg)));
return false;
}
@@ -4041,7 +4956,8 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
map_info.key_size == map->def.key_size &&
map_info.value_size == map->def.value_size &&
map_info.max_entries == map->def.max_entries &&
- map_info.map_flags == map->def.map_flags);
+ map_info.map_flags == map->def.map_flags &&
+ map_info.map_extra == map->map_extra);
}
static int
@@ -4073,10 +4989,10 @@ bpf_object__reuse_map(struct bpf_map *map)
}
err = bpf_map__reuse_fd(map, pin_fd);
- if (err) {
- close(pin_fd);
+ close(pin_fd);
+ if (err)
return err;
- }
+
map->pinned = true;
pr_debug("reused pinned map at '%s'\n", map->pin_path);
@@ -4090,6 +5006,14 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
+ if (obj->gen_loader) {
+ bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
+ map->mmaped, map->def.value_size);
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
+ bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
+ return 0;
+ }
+
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -4115,45 +5039,37 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+static bool map_is_created(const struct bpf_map *map)
{
- struct bpf_create_map_attr create_attr;
- struct bpf_map_def *def = &map->def;
+ return map->obj->loaded || map->reused;
+}
- memset(&create_attr, 0, sizeof(create_attr));
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, create_attr);
+ struct bpf_map_def *def = &map->def;
+ const char *map_name = NULL;
+ int err = 0, map_fd;
- if (kernel_supports(FEAT_PROG_NAME))
- create_attr.name = map->name;
+ if (kernel_supports(obj, FEAT_PROG_NAME))
+ map_name = map->name;
create_attr.map_ifindex = map->map_ifindex;
- create_attr.map_type = def->type;
create_attr.map_flags = def->map_flags;
- create_attr.key_size = def->key_size;
- create_attr.value_size = def->value_size;
create_attr.numa_node = map->numa_node;
+ create_attr.map_extra = map->map_extra;
+ create_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ create_attr.map_flags |= BPF_F_TOKEN_FD;
- if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
- int nr_cpus;
-
- nr_cpus = libbpf_num_possible_cpus();
- if (nr_cpus < 0) {
- pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
- map->name, nr_cpus);
- return nr_cpus;
+ if (bpf_map__is_struct_ops(map)) {
+ create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
+ if (map->mod_btf_fd >= 0) {
+ create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
+ create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
}
- pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
- create_attr.max_entries = nr_cpus;
- } else {
- create_attr.max_entries = def->max_entries;
}
- if (bpf_map__is_struct_ops(map))
- create_attr.btf_vmlinux_value_type_id =
- map->btf_vmlinux_value_type_id;
-
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
- if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
+ if (obj->btf && btf__fd(obj->btf) >= 0) {
create_attr.btf_fd = btf__fd(obj->btf);
create_attr.btf_key_type_id = map->btf_key_type_id;
create_attr.btf_value_type_id = map->btf_value_type_id;
@@ -4161,26 +5077,69 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
- int err;
-
- err = bpf_object__create_map(obj, map->inner_map);
+ err = map_set_def_max_entries(map->inner_map);
+ if (err)
+ return err;
+ err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
map->name, err);
return err;
}
- map->inner_map_fd = bpf_map__fd(map->inner_map);
+ map->inner_map_fd = map->inner_map->fd;
}
if (map->inner_map_fd >= 0)
create_attr.inner_map_fd = map->inner_map_fd;
}
- map->fd = bpf_create_map_xattr(&create_attr);
- if (map->fd < 0 && (create_attr.btf_key_type_id ||
- create_attr.btf_value_type_id)) {
+ switch (def->type) {
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_STACK_TRACE:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_CPUMAP:
+ case BPF_MAP_TYPE_XSKMAP:
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_ARENA:
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ break;
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ create_attr.btf_value_type_id = 0;
+ break;
+ default:
+ break;
+ }
+
+ if (obj->gen_loader) {
+ bpf_gen__map_create(obj->gen_loader, def->type, map_name,
+ def->key_size, def->value_size, def->max_entries,
+ &create_attr, is_inner ? -1 : map - obj->maps);
+ /* We keep pretenting we have valid FD to pass various fd >= 0
+ * checks by just keeping original placeholder FDs in place.
+ * See bpf_object__add_map() comment.
+ * This placeholder fd will not be used with any syscall and
+ * will be reset to -1 eventually.
+ */
+ map_fd = map->fd;
+ } else {
+ map_fd = bpf_map_create(def->type, map_name,
+ def->key_size, def->value_size,
+ def->max_entries, &create_attr);
+ }
+ if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
- int err = -errno;
+ err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, err);
@@ -4189,38 +5148,57 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
- map->fd = bpf_create_map_xattr(&create_attr);
+ map_fd = bpf_map_create(def->type, map_name,
+ def->key_size, def->value_size,
+ def->max_entries, &create_attr);
}
- if (map->fd < 0)
- return -errno;
-
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+ if (obj->gen_loader)
+ map->inner_map->fd = -1;
bpf_map__destroy(map->inner_map);
zfree(&map->inner_map);
}
- return 0;
+ if (map_fd < 0)
+ return map_fd;
+
+ /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
+ if (map->fd == map_fd)
+ return 0;
+
+ /* Keep placeholder FD value but now point it to the BPF map object.
+ * This way everything that relied on this map's FD (e.g., relocated
+ * ldimm64 instructions) will stay valid and won't need adjustments.
+ * map->fd stays valid but now point to what map_fd points to.
+ */
+ return reuse_fd(map->fd, map_fd);
}
-static int init_map_slots(struct bpf_map *map)
+static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
{
const struct bpf_map *targ_map;
unsigned int i;
- int fd, err;
+ int fd, err = 0;
for (i = 0; i < map->init_slots_sz; i++) {
if (!map->init_slots[i])
continue;
targ_map = map->init_slots[i];
- fd = bpf_map__fd(targ_map);
- err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ fd = targ_map->fd;
+
+ if (obj->gen_loader) {
+ bpf_gen__populate_outer_map(obj->gen_loader,
+ map - obj->maps, i,
+ targ_map - obj->maps);
+ } else {
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ }
if (err) {
err = -errno;
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
- map->name, i, targ_map->name,
- fd, err);
+ map->name, i, targ_map->name, fd, err);
return err;
}
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
@@ -4233,6 +5211,75 @@ static int init_map_slots(struct bpf_map *map)
return 0;
}
+static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
+{
+ const struct bpf_program *targ_prog;
+ unsigned int i;
+ int fd, err;
+
+ if (obj->gen_loader)
+ return -ENOTSUP;
+
+ for (i = 0; i < map->init_slots_sz; i++) {
+ if (!map->init_slots[i])
+ continue;
+
+ targ_prog = map->init_slots[i];
+ fd = bpf_program__fd(targ_prog);
+
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
+ map->name, i, targ_prog->name, fd, err);
+ return err;
+ }
+ pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
+ map->name, i, targ_prog->name, fd);
+ }
+
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
+ return 0;
+}
+
+static int bpf_object_init_prog_arrays(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int i, err;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
+
+ if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
+ continue;
+
+ err = init_prog_array_slots(obj, map);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static int map_set_def_max_entries(struct bpf_map *map)
+{
+ if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
+ int nr_cpus;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (nr_cpus < 0) {
+ pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
+ map->name, nr_cpus);
+ return nr_cpus;
+ }
+ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
+ map->def.max_entries = nr_cpus;
+ }
+
+ return 0;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
@@ -4240,10 +5287,39 @@ bpf_object__create_maps(struct bpf_object *obj)
char *cp, errmsg[STRERR_BUFSIZE];
unsigned int i, j;
int err;
+ bool retried;
for (i = 0; i < obj->nr_maps; i++) {
map = &obj->maps[i];
+ /* To support old kernels, we skip creating global data maps
+ * (.rodata, .data, .kconfig, etc); later on, during program
+ * loading, if we detect that at least one of the to-be-loaded
+ * programs is referencing any global data map, we'll error
+ * out with program name and relocation index logged.
+ * This approach allows to accommodate Clang emitting
+ * unnecessary .rodata.str1.1 sections for string literals,
+ * but also it allows to have CO-RE applications that use
+ * global variables in some of BPF programs, but not others.
+ * If those global variable-using programs are not loaded at
+ * runtime due to bpf_program__set_autoload(prog, false),
+ * bpf_object loading will succeed just fine even on old
+ * kernels.
+ */
+ if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
+ map->autocreate = false;
+
+ if (!map->autocreate) {
+ pr_debug("map '%s': skipped auto-creating...\n", map->name);
+ continue;
+ }
+
+ err = map_set_def_max_entries(map);
+ if (err)
+ goto err_out;
+
+ retried = false;
+retry:
if (map->pin_path) {
err = bpf_object__reuse_map(map);
if (err) {
@@ -4251,13 +5327,19 @@ bpf_object__create_maps(struct bpf_object *obj)
map->name);
goto err_out;
}
+ if (retried && map->fd < 0) {
+ pr_warn("map '%s': cannot find pinned map\n",
+ map->name);
+ err = -ENOENT;
+ goto err_out;
+ }
}
- if (map->fd >= 0) {
+ if (map->reused) {
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
map->name, map->fd);
} else {
- err = bpf_object__create_map(obj, map);
+ err = bpf_object__create_map(obj, map, false);
if (err)
goto err_out;
@@ -4266,27 +5348,42 @@ bpf_object__create_maps(struct bpf_object *obj)
if (bpf_map__is_internal(map)) {
err = bpf_object__populate_internal_map(obj, map);
- if (err < 0) {
- zclose(map->fd);
+ if (err < 0)
goto err_out;
+ }
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
+ PROT_READ | PROT_WRITE,
+ map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
+ map->fd, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("map '%s': failed to mmap arena: %d\n",
+ map->name, err);
+ return err;
+ }
+ if (obj->arena_data) {
+ memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
+ zfree(&obj->arena_data);
}
}
-
- if (map->init_slots_sz) {
- err = init_map_slots(map);
- if (err < 0) {
- zclose(map->fd);
+ if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
+ err = init_map_in_map_slots(obj, map);
+ if (err < 0)
goto err_out;
- }
}
}
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
+ if (!retried && err == -EEXIST) {
+ retried = true;
+ goto retry;
+ }
pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
map->name, map->pin_path, err);
- zclose(map->fd);
goto err_out;
}
}
@@ -4303,279 +5400,6 @@ err_out:
return err;
}
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
- __u32 type_id; /* struct/union type or array element type */
- __u32 idx; /* field index or array index */
- const char *name; /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
- const struct btf *btf;
- /* high-level spec: named fields and array indices only */
- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
- /* original unresolved (no skip_mods_or_typedefs) root type ID */
- __u32 root_type_id;
- /* CO-RE relocation kind */
- enum bpf_core_relo_kind relo_kind;
- /* high-level spec length */
- int len;
- /* raw, low-level spec: 1-to-1 with accessor spec string */
- int raw_spec[BPF_CORE_SPEC_MAX_LEN];
- /* raw spec length */
- int raw_len;
- /* field bit offset represented by spec */
- __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
- return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
- const struct bpf_core_accessor *acc,
- const struct btf_array *arr)
-{
- const struct btf_type *t;
-
- /* not a flexible array, if not inside a struct or has non-zero size */
- if (!acc->name || arr->nelems > 0)
- return false;
-
- /* has to be the last member of enclosing struct */
- t = btf__type_by_id(btf, acc->type_id);
- return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET: return "byte_off";
- case BPF_FIELD_BYTE_SIZE: return "byte_sz";
- case BPF_FIELD_EXISTS: return "field_exists";
- case BPF_FIELD_SIGNED: return "signed";
- case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
- case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
- case BPF_TYPE_ID_LOCAL: return "local_type_id";
- case BPF_TYPE_ID_TARGET: return "target_type_id";
- case BPF_TYPE_EXISTS: return "type_exists";
- case BPF_TYPE_SIZE: return "type_size";
- case BPF_ENUMVAL_EXISTS: return "enumval_exists";
- case BPF_ENUMVAL_VALUE: return "enumval_value";
- default: return "unknown";
- }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET:
- case BPF_FIELD_BYTE_SIZE:
- case BPF_FIELD_EXISTS:
- case BPF_FIELD_SIGNED:
- case BPF_FIELD_LSHIFT_U64:
- case BPF_FIELD_RSHIFT_U64:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_TYPE_ID_LOCAL:
- case BPF_TYPE_ID_TARGET:
- case BPF_TYPE_EXISTS:
- case BPF_TYPE_SIZE:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_ENUMVAL_EXISTS:
- case BPF_ENUMVAL_VALUE:
- return true;
- default:
- return false;
- }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- * struct sample {
- * int __unimportant;
- * struct {
- * int __1;
- * int __2;
- * int a[7];
- * };
- * };
- *
- * struct sample *s = ...;
- *
- * int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- * - field 'a' access (corresponds to '2' in low-level spec);
- * - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
- __u32 type_id,
- const char *spec_str,
- enum bpf_core_relo_kind relo_kind,
- struct bpf_core_spec *spec)
-{
- int access_idx, parsed_len, i;
- struct bpf_core_accessor *acc;
- const struct btf_type *t;
- const char *name;
- __u32 id;
- __s64 sz;
-
- if (str_is_empty(spec_str) || *spec_str == ':')
- return -EINVAL;
-
- memset(spec, 0, sizeof(*spec));
- spec->btf = btf;
- spec->root_type_id = type_id;
- spec->relo_kind = relo_kind;
-
- /* type-based relocations don't have a field access string */
- if (core_relo_is_type_based(relo_kind)) {
- if (strcmp(spec_str, "0"))
- return -EINVAL;
- return 0;
- }
-
- /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
- while (*spec_str) {
- if (*spec_str == ':')
- ++spec_str;
- if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
- return -EINVAL;
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
- spec_str += parsed_len;
- spec->raw_spec[spec->raw_len++] = access_idx;
- }
-
- if (spec->raw_len == 0)
- return -EINVAL;
-
- t = skip_mods_and_typedefs(btf, type_id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[0];
- acc = &spec->spec[0];
- acc->type_id = id;
- acc->idx = access_idx;
- spec->len++;
-
- if (core_relo_is_enumval_based(relo_kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
- return -EINVAL;
-
- /* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
- return 0;
- }
-
- if (!core_relo_is_field_based(relo_kind))
- return -EINVAL;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset = access_idx * sz * 8;
-
- for (i = 1; i < spec->raw_len; i++) {
- t = skip_mods_and_typedefs(btf, id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[i];
- acc = &spec->spec[spec->len];
-
- if (btf_is_composite(t)) {
- const struct btf_member *m;
- __u32 bit_offset;
-
- if (access_idx >= btf_vlen(t))
- return -EINVAL;
-
- bit_offset = btf_member_bit_offset(t, access_idx);
- spec->bit_offset += bit_offset;
-
- m = btf_members(t) + access_idx;
- if (m->name_off) {
- name = btf__name_by_offset(btf, m->name_off);
- if (str_is_empty(name))
- return -EINVAL;
-
- acc->type_id = id;
- acc->idx = access_idx;
- acc->name = name;
- spec->len++;
- }
-
- id = m->type;
- } else if (btf_is_array(t)) {
- const struct btf_array *a = btf_array(t);
- bool flex;
-
- t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t)
- return -EINVAL;
-
- flex = is_flex_arr(btf, acc - 1, a);
- if (!flex && access_idx >= a->nelems)
- return -EINVAL;
-
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->len++;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset += access_idx * sz * 8;
- } else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- type_id, spec_str, i, id, btf_kind_str(t));
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
static bool bpf_core_is_flavor_sep(const char *s)
{
/* check X___Y name pattern, where X and Y are not underscores */
@@ -4588,7 +5412,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
* before last triple underscore. Struct name part after last triple
* underscore is ignored by BPF CO-RE relocation during relocation matching.
*/
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
{
size_t n = strlen(name);
int i;
@@ -4600,46 +5424,35 @@ static size_t bpf_core_essential_name_len(const char *name)
return n;
}
-/* dynamically sized list of type IDs */
-struct ids_vec {
- __u32 *data;
- int len;
-};
-
-static void bpf_core_free_cands(struct ids_vec *cand_ids)
+void bpf_core_free_cands(struct bpf_core_cand_list *cands)
{
- free(cand_ids->data);
- free(cand_ids);
+ if (!cands)
+ return;
+
+ free(cands->cands);
+ free(cands);
}
-static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
- __u32 local_type_id,
- const struct btf *targ_btf)
+int bpf_core_add_cands(struct bpf_core_cand *local_cand,
+ size_t local_essent_len,
+ const struct btf *targ_btf,
+ const char *targ_btf_name,
+ int targ_start_id,
+ struct bpf_core_cand_list *cands)
{
- size_t local_essent_len, targ_essent_len;
- const char *local_name, *targ_name;
+ struct bpf_core_cand *new_cands, *cand;
const struct btf_type *t, *local_t;
- struct ids_vec *cand_ids;
- __u32 *new_ids;
- int i, err, n;
-
- local_t = btf__type_by_id(local_btf, local_type_id);
- if (!local_t)
- return ERR_PTR(-EINVAL);
-
- local_name = btf__name_by_offset(local_btf, local_t->name_off);
- if (str_is_empty(local_name))
- return ERR_PTR(-EINVAL);
- local_essent_len = bpf_core_essential_name_len(local_name);
+ const char *targ_name, *local_name;
+ size_t targ_essent_len;
+ int n, i;
- cand_ids = calloc(1, sizeof(*cand_ids));
- if (!cand_ids)
- return ERR_PTR(-ENOMEM);
+ local_t = btf__type_by_id(local_cand->btf, local_cand->id);
+ local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
- n = btf__get_nr_types(targ_btf);
- for (i = 1; i <= n; i++) {
+ n = btf__type_cnt(targ_btf);
+ for (i = targ_start_id; i < n; i++) {
t = btf__type_by_id(targ_btf, i);
- if (btf_kind(t) != btf_kind(local_t))
+ if (!btf_kind_core_compat(t, local_t))
continue;
targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -4650,1031 +5463,275 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (targ_essent_len != local_essent_len)
continue;
- if (strncmp(local_name, targ_name, local_essent_len) == 0) {
- pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
- local_type_id, btf_kind_str(local_t),
- local_name, i, btf_kind_str(t), targ_name);
- new_ids = libbpf_reallocarray(cand_ids->data,
- cand_ids->len + 1,
- sizeof(*cand_ids->data));
- if (!new_ids) {
- err = -ENOMEM;
- goto err_out;
- }
- cand_ids->data = new_ids;
- cand_ids->data[cand_ids->len++] = i;
- }
- }
- return cand_ids;
-err_out:
- bpf_core_free_cands(cand_ids);
- return ERR_PTR(err);
-}
-
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- * - any two STRUCTs/UNIONs are compatible and can be mixed;
- * - any two FWDs are compatible, if their names match (modulo flavor suffix);
- * - any two PTRs are always compatible;
- * - for ENUMs, names should be the same (ignoring flavor suffix) or at
- * least one of enums should be anonymous;
- * - for ENUMs, check sizes, names are ignored;
- * - for INT, size and signedness are ignored;
- * - for ARRAY, dimensionality is ignored, element types are checked for
- * compatibility recursively;
- * - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
- __u32 local_id,
- const struct btf *targ_btf,
- __u32 targ_id)
-{
- const struct btf_type *local_type, *targ_type;
-
-recur:
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_is_composite(local_type) && btf_is_composite(targ_type))
- return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_PTR:
- return 1;
- case BTF_KIND_FWD:
- case BTF_KIND_ENUM: {
- const char *local_name, *targ_name;
- size_t local_len, targ_len;
-
- local_name = btf__name_by_offset(local_btf,
- local_type->name_off);
- targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
- local_len = bpf_core_essential_name_len(local_name);
- targ_len = bpf_core_essential_name_len(targ_name);
- /* one of them is anonymous or both w/ same flavor-less names */
- return local_len == 0 || targ_len == 0 ||
- (local_len == targ_len &&
- strncmp(local_name, targ_name, local_len) == 0);
- }
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 &&
- btf_int_offset(targ_type) == 0;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- default:
- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
- btf_kind(local_type), local_id, targ_id);
- return 0;
- }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
- const struct bpf_core_accessor *local_acc,
- const struct btf *targ_btf,
- __u32 targ_id,
- struct bpf_core_spec *spec,
- __u32 *next_targ_id)
-{
- const struct btf_type *local_type, *targ_type;
- const struct btf_member *local_member, *m;
- const char *local_name, *targ_name;
- __u32 local_id;
- int i, n, found;
-
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!targ_type)
- return -EINVAL;
- if (!btf_is_composite(targ_type))
- return 0;
-
- local_id = local_acc->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
- local_member = btf_members(local_type) + local_acc->idx;
- local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
- n = btf_vlen(targ_type);
- m = btf_members(targ_type);
- for (i = 0; i < n; i++, m++) {
- __u32 bit_offset;
+ if (strncmp(local_name, targ_name, local_essent_len) != 0)
+ continue;
- bit_offset = btf_member_bit_offset(targ_type, i);
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
+ local_cand->id, btf_kind_str(local_t),
+ local_name, i, btf_kind_str(t), targ_name,
+ targ_btf_name);
+ new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
+ sizeof(*cands->cands));
+ if (!new_cands)
+ return -ENOMEM;
- /* too deep struct/union/array nesting */
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
+ cand = &new_cands[cands->len];
+ cand->btf = targ_btf;
+ cand->id = i;
- /* speculate this member will be the good one */
- spec->bit_offset += bit_offset;
- spec->raw_spec[spec->raw_len++] = i;
-
- targ_name = btf__name_by_offset(targ_btf, m->name_off);
- if (str_is_empty(targ_name)) {
- /* embedded struct/union, we need to go deeper */
- found = bpf_core_match_member(local_btf, local_acc,
- targ_btf, m->type,
- spec, next_targ_id);
- if (found) /* either found or error */
- return found;
- } else if (strcmp(local_name, targ_name) == 0) {
- /* matching named field */
- struct bpf_core_accessor *targ_acc;
-
- targ_acc = &spec->spec[spec->len++];
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
-
- *next_targ_id = m->type;
- found = bpf_core_fields_are_compat(local_btf,
- local_member->type,
- targ_btf, m->type);
- if (!found)
- spec->len--; /* pop accessor */
- return found;
- }
- /* member turned out not to be what we looked for */
- spec->bit_offset -= bit_offset;
- spec->raw_len--;
+ cands->cands = new_cands;
+ cands->len++;
}
-
return 0;
}
-/* Check local and target types for compatibility. This check is used for
- * type-based CO-RE relocations and follow slightly different rules than
- * field-based relocations. This function assumes that root types were already
- * checked for name match. Beyond that initial root-level name check, names
- * are completely ignored. Compatibility rules are as follows:
- * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
- * kind should match for local and target types (i.e., STRUCT is not
- * compatible with UNION);
- * - for ENUMs, the size is ignored;
- * - for INT, size and signedness are ignored;
- * - for ARRAY, dimensionality is ignored, element types are checked for
- * compatibility recursively;
- * - CONST/VOLATILE/RESTRICT modifiers are ignored;
- * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
- * - FUNC_PROTOs are compatible if they have compatible signature: same
- * number of input args and compatible return and argument types.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id)
+static int load_module_btfs(struct bpf_object *obj)
{
- const struct btf_type *local_type, *targ_type;
- int depth = 32; /* max recursion depth */
+ struct bpf_btf_info info;
+ struct module_btf *mod_btf;
+ struct btf *btf;
+ char name[64];
+ __u32 id = 0, len;
+ int err, fd;
- /* caller made sure that names match (ignoring flavor suffix) */
- local_type = btf__type_by_id(local_btf, local_id);
- targ_type = btf__type_by_id(targ_btf, targ_id);
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (obj->btf_modules_loaded)
return 0;
-recur:
- depth--;
- if (depth < 0)
- return -EINVAL;
-
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_kind(local_type) != btf_kind(targ_type))
+ if (obj->gen_loader)
return 0;
- switch (btf_kind(local_type)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- case BTF_KIND_ENUM:
- case BTF_KIND_FWD:
- return 1;
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
- case BTF_KIND_PTR:
- local_id = local_type->type;
- targ_id = targ_type->type;
- goto recur;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *local_p = btf_params(local_type);
- struct btf_param *targ_p = btf_params(targ_type);
- __u16 local_vlen = btf_vlen(local_type);
- __u16 targ_vlen = btf_vlen(targ_type);
- int i, err;
-
- if (local_vlen != targ_vlen)
- return 0;
-
- for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
- skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
- err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
- if (err <= 0)
- return err;
- }
+ /* don't do this again, even if we find no module BTFs */
+ obj->btf_modules_loaded = true;
- /* tail recurse for return type check */
- skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
- goto recur;
- }
- default:
- pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
- btf_kind_str(local_type), local_id, targ_id);
+ /* kernel too old to support module BTFs */
+ if (!kernel_supports(obj, FEAT_MODULE_BTF))
return 0;
- }
-}
-
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
- const struct btf *targ_btf, __u32 targ_id,
- struct bpf_core_spec *targ_spec)
-{
- const struct btf_type *targ_type;
- const struct bpf_core_accessor *local_acc;
- struct bpf_core_accessor *targ_acc;
- int i, sz, matched;
-
- memset(targ_spec, 0, sizeof(*targ_spec));
- targ_spec->btf = targ_btf;
- targ_spec->root_type_id = targ_id;
- targ_spec->relo_kind = local_spec->relo_kind;
-
- if (core_relo_is_type_based(local_spec->relo_kind)) {
- return bpf_core_types_are_compat(local_spec->btf,
- local_spec->root_type_id,
- targ_btf, targ_id);
- }
-
- local_acc = &local_spec->spec[0];
- targ_acc = &targ_spec->spec[0];
-
- if (core_relo_is_enumval_based(local_spec->relo_kind)) {
- size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
- const char *targ_name;
- /* has to resolve to an enum */
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err && errno == ENOENT)
+ return 0;
+ if (err && errno == EPERM) {
+ pr_debug("skipping module BTFs loading, missing privileges\n");
return 0;
-
- local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
- targ_essent_len = bpf_core_essential_name_len(targ_name);
- if (targ_essent_len != local_essent_len)
- continue;
- if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
- return 1;
- }
}
- return 0;
- }
-
- if (!core_relo_is_field_based(local_spec->relo_kind))
- return -EINVAL;
-
- for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
- &targ_id);
- if (!targ_type)
- return -EINVAL;
-
- if (local_acc->name) {
- matched = bpf_core_match_member(local_spec->btf,
- local_acc,
- targ_btf, targ_id,
- targ_spec, &targ_id);
- if (matched <= 0)
- return matched;
- } else {
- /* for i=0, targ_id is already treated as array element
- * type (because it's the original struct), for others
- * we should find array element type first
- */
- if (i > 0) {
- const struct btf_array *a;
- bool flex;
-
- if (!btf_is_array(targ_type))
- return 0;
-
- a = btf_array(targ_type);
- flex = is_flex_arr(targ_btf, targ_acc - 1, a);
- if (!flex && local_acc->idx >= a->nelems)
- return 0;
- if (!skip_mods_and_typedefs(targ_btf, a->type,
- &targ_id))
- return -EINVAL;
- }
-
- /* too deep struct/union/array nesting */
- if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- targ_acc->type_id = targ_id;
- targ_acc->idx = local_acc->idx;
- targ_acc->name = NULL;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
-
- sz = btf__resolve_size(targ_btf, targ_id);
- if (sz < 0)
- return sz;
- targ_spec->bit_offset += local_acc->idx * sz * 8;
+ if (err) {
+ err = -errno;
+ pr_warn("failed to iterate BTF objects: %d\n", err);
+ return err;
}
- }
-
- return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
- bool *validate)
-{
- const struct bpf_core_accessor *acc;
- const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
- const struct btf_member *m;
- const struct btf_type *mt;
- bool bitfield;
- __s64 sz;
- *field_sz = 0;
-
- if (relo->kind == BPF_FIELD_EXISTS) {
- *val = spec ? 1 : 0;
- return 0;
- }
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue; /* expected race: BTF was unloaded */
+ err = -errno;
+ pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
+ return err;
+ }
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
+ len = sizeof(info);
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
- acc = &spec->spec[spec->len - 1];
- t = btf__type_by_id(spec->btf, acc->type_id);
+ err = bpf_btf_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("failed to get BTF object #%d info: %d\n", id, err);
+ goto err_out;
+ }
- /* a[n] accessor needs special handling */
- if (!acc->name) {
- if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
- *val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *field_sz = sz;
- *type_id = acc->type_id;
- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- } else {
- pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -EINVAL;
+ /* ignore non-module BTFs */
+ if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
+ close(fd);
+ continue;
}
- if (validate)
- *validate = true;
- return 0;
- }
- m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
- bit_off = spec->bit_offset;
- bit_sz = btf_member_bitfield_size(t, acc->idx);
-
- bitfield = bit_sz > 0;
- if (bitfield) {
- byte_sz = mt->size;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- /* figure out smallest int size necessary for bitfield load */
- while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
- if (byte_sz >= 8) {
- /* bitfield can't be read with 64-bit read */
- pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -E2BIG;
- }
- byte_sz *= 2;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
+ btf = btf_get_from_fd(fd, obj->btf_vmlinux);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+ name, id, err);
+ goto err_out;
}
- } else {
- sz = btf__resolve_size(spec->btf, field_type_id);
- if (sz < 0)
- return -EINVAL;
- byte_sz = sz;
- byte_off = spec->bit_offset / 8;
- bit_sz = byte_sz * 8;
- }
- /* for bitfields, all the relocatable aspects are ambiguous and we
- * might disagree with compiler, so turn off validation of expected
- * value, except for signedness
- */
- if (validate)
- *validate = !bitfield;
+ err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+ sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+ if (err)
+ goto err_out;
- switch (relo->kind) {
- case BPF_FIELD_BYTE_OFFSET:
- *val = byte_off;
- if (!bitfield) {
- *field_sz = byte_sz;
- *type_id = field_type_id;
+ mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
+
+ mod_btf->btf = btf;
+ mod_btf->id = id;
+ mod_btf->fd = fd;
+ mod_btf->name = strdup(name);
+ if (!mod_btf->name) {
+ err = -ENOMEM;
+ goto err_out;
}
- break;
- case BPF_FIELD_BYTE_SIZE:
- *val = byte_sz;
- break;
- case BPF_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
- (btf_int_encoding(mt) & BTF_INT_SIGNED);
- if (validate)
- *validate = true; /* signedness is never ambiguous */
- break;
- case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- *val = 64 - (bit_off + bit_sz - byte_off * 8);
-#else
- *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
- break;
- case BPF_FIELD_RSHIFT_U64:
- *val = 64 - bit_sz;
- if (validate)
- *validate = true; /* right shift is never ambiguous */
- break;
- case BPF_FIELD_EXISTS:
- default:
- return -EOPNOTSUPP;
+ continue;
+
+err_out:
+ close(fd);
+ return err;
}
return 0;
}
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
+static struct bpf_core_cand_list *
+bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
{
- __s64 sz;
-
- /* type-based relos return zero when target type is not found */
- if (!spec) {
- *val = 0;
- return 0;
- }
+ struct bpf_core_cand local_cand = {};
+ struct bpf_core_cand_list *cands;
+ const struct btf *main_btf;
+ const struct btf_type *local_t;
+ const char *local_name;
+ size_t local_essent_len;
+ int err, i;
- switch (relo->kind) {
- case BPF_TYPE_ID_TARGET:
- *val = spec->root_type_id;
- break;
- case BPF_TYPE_EXISTS:
- *val = 1;
- break;
- case BPF_TYPE_SIZE:
- sz = btf__resolve_size(spec->btf, spec->root_type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- break;
- case BPF_TYPE_ID_LOCAL:
- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
- default:
- return -EOPNOTSUPP;
- }
+ local_cand.btf = local_btf;
+ local_cand.id = local_type_id;
+ local_t = btf__type_by_id(local_btf, local_type_id);
+ if (!local_t)
+ return ERR_PTR(-EINVAL);
- return 0;
-}
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
+ if (str_is_empty(local_name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(local_name);
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
+ cands = calloc(1, sizeof(*cands));
+ if (!cands)
+ return ERR_PTR(-ENOMEM);
- switch (relo->kind) {
- case BPF_ENUMVAL_EXISTS:
- *val = spec ? 1 : 0;
- break;
- case BPF_ENUMVAL_VALUE:
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
- t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
- break;
- default:
- return -EOPNOTSUPP;
- }
+ /* Attempt to find target candidates in vmlinux BTF first */
+ main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
+ err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
+ if (err)
+ goto err_out;
- return 0;
-}
+ /* if vmlinux BTF has any candidate, don't got for module BTFs */
+ if (cands->len)
+ return cands;
-struct bpf_core_relo_res
-{
- /* expected value in the instruction, unless validate == false */
- __u32 orig_val;
- /* new value that needs to be patched up to */
- __u32 new_val;
- /* relocation unsuccessful, poison instruction, but don't fail load */
- bool poison;
- /* some relocations can't be validated against orig_val */
- bool validate;
- /* for field byte offset relocations or the forms:
- * *(T *)(rX + <off>) = rY
- * rX = *(T *)(rY + <off>),
- * we remember original and resolved field size to adjust direct
- * memory loads of pointers and integers; this is necessary for 32-bit
- * host kernel architectures, but also allows to automatically
- * relocate fields that were resized from, e.g., u32 to u64, etc.
- */
- bool fail_memsz_adjust;
- __u32 orig_sz;
- __u32 orig_type_id;
- __u32 new_sz;
- __u32 new_type_id;
-};
+ /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
+ if (obj->btf_vmlinux_override)
+ return cands;
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec,
- struct bpf_core_relo_res *res)
-{
- int err = -EOPNOTSUPP;
-
- res->orig_val = 0;
- res->new_val = 0;
- res->poison = false;
- res->validate = true;
- res->fail_memsz_adjust = false;
- res->orig_sz = res->new_sz = 0;
- res->orig_type_id = res->new_type_id = 0;
-
- if (core_relo_is_field_based(relo->kind)) {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &res->orig_val, &res->orig_sz,
- &res->orig_type_id, &res->validate);
- err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
- &res->new_val, &res->new_sz,
- &res->new_type_id, NULL);
+ /* now look through module BTFs, trying to still find candidates */
+ err = load_module_btfs(obj);
+ if (err)
+ goto err_out;
+
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ err = bpf_core_add_cands(&local_cand, local_essent_len,
+ obj->btf_modules[i].btf,
+ obj->btf_modules[i].name,
+ btf__type_cnt(obj->btf_vmlinux),
+ cands);
if (err)
- goto done;
- /* Validate if it's safe to adjust load/store memory size.
- * Adjustments are performed only if original and new memory
- * sizes differ.
- */
- res->fail_memsz_adjust = false;
- if (res->orig_sz != res->new_sz) {
- const struct btf_type *orig_t, *new_t;
-
- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
- /* There are two use cases in which it's safe to
- * adjust load/store's mem size:
- * - reading a 32-bit kernel pointer, while on BPF
- * size pointers are always 64-bit; in this case
- * it's safe to "downsize" instruction size due to
- * pointer being treated as unsigned integer with
- * zero-extended upper 32-bits;
- * - reading unsigned integers, again due to
- * zero-extension is preserving the value correctly.
- *
- * In all other cases it's incorrect to attempt to
- * load/store field because read value will be
- * incorrect, so we poison relocated instruction.
- */
- if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
- goto done;
- if (btf_is_int(orig_t) && btf_is_int(new_t) &&
- btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
- btf_int_encoding(new_t) != BTF_INT_SIGNED)
- goto done;
-
- /* mark as invalid mem size adjustment, but this will
- * only be checked for LDX/STX/ST insns
- */
- res->fail_memsz_adjust = true;
- }
- } else if (core_relo_is_type_based(relo->kind)) {
- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
- } else if (core_relo_is_enumval_based(relo->kind)) {
- err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
- }
-
-done:
- if (err == -EUCLEAN) {
- /* EUCLEAN is used to signal instruction poisoning request */
- res->poison = true;
- err = 0;
- } else if (err == -EOPNOTSUPP) {
- /* EOPNOTSUPP means unknown/unsupported relocation */
- pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind),
- relo->kind, relo->insn_off / 8);
+ goto err_out;
}
- return err;
+ return cands;
+err_out:
+ bpf_core_free_cands(cands);
+ return ERR_PTR(err);
}
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
*/
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
- int insn_idx, struct bpf_insn *insn)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
{
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- prog->name, relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
- */
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
}
-static bool is_ldimm64(struct bpf_insn *insn)
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+ return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
}
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+static size_t bpf_core_hash_fn(const long key, void *ctx)
{
- switch (BPF_SIZE(insn->code)) {
- case BPF_DW: return 8;
- case BPF_W: return 4;
- case BPF_H: return 2;
- case BPF_B: return 1;
- default: return -1;
- }
+ return key;
}
-static int insn_bytes_to_bpf_size(__u32 sz)
+static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
{
- switch (sz) {
- case 8: return BPF_DW;
- case 4: return BPF_W;
- case 2: return BPF_H;
- case 1: return BPF_B;
- default: return -1;
- }
+ return k1 == k2;
}
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_relo_res *res)
+static int record_relo_core(struct bpf_program *prog,
+ const struct bpf_core_relo *core_relo, int insn_idx)
{
- __u32 orig_val, new_val;
- struct bpf_insn *insn;
- int insn_idx;
- __u8 class;
-
- if (relo->insn_off % BPF_INSN_SZ)
- return -EINVAL;
- insn_idx = relo->insn_off / BPF_INSN_SZ;
- /* adjust insn_idx from section frame of reference to the local
- * program's frame of reference; (sub-)program code is not yet
- * relocated, so it's enough to just subtract in-section offset
- */
- insn_idx = insn_idx - prog->sec_insn_off;
- insn = &prog->insns[insn_idx];
- class = BPF_CLASS(insn->code);
-
- if (res->poison) {
-poison:
- /* poison second part of ldimm64 to avoid confusing error from
- * verifier about "unknown opcode 00"
- */
- if (is_ldimm64(insn))
- bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
- bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
- return 0;
- }
-
- orig_val = res->orig_val;
- new_val = res->new_val;
-
- switch (class) {
- case BPF_ALU:
- case BPF_ALU64:
- if (BPF_SRC(insn->code) != BPF_K)
- return -EINVAL;
- if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
- return -EINVAL;
- }
- orig_val = insn->imm;
- insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- prog->name, relo_idx, insn_idx,
- orig_val, new_val);
- break;
- case BPF_LDX:
- case BPF_ST:
- case BPF_STX:
- if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
- return -EINVAL;
- }
- if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog->name, relo_idx, insn_idx, new_val);
- return -ERANGE;
- }
- if (res->fail_memsz_adjust) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
- "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
- prog->name, relo_idx, insn_idx);
- goto poison;
- }
-
- orig_val = insn->off;
- insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog->name, relo_idx, insn_idx, orig_val, new_val);
-
- if (res->new_sz != res->orig_sz) {
- int insn_bytes_sz, insn_bpf_sz;
-
- insn_bytes_sz = insn_bpf_size_to_bytes(insn);
- if (insn_bytes_sz != res->orig_sz) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
- prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
- return -EINVAL;
- }
-
- insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
- if (insn_bpf_sz < 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
- prog->name, relo_idx, insn_idx, res->new_sz);
- return -EINVAL;
- }
-
- insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
- prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
- }
- break;
- case BPF_LD: {
- __u64 imm;
-
- if (!is_ldimm64(insn) ||
- insn[0].src_reg != 0 || insn[0].off != 0 ||
- insn_idx + 1 >= prog->insns_cnt ||
- insn[1].code != 0 || insn[1].dst_reg != 0 ||
- insn[1].src_reg != 0 || insn[1].off != 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
- prog->name, relo_idx, insn_idx);
- return -EINVAL;
- }
-
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
- if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, (unsigned long long)imm,
- orig_val, new_val);
- return -EINVAL;
- }
-
- insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
- prog->name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
- break;
- }
- default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
- prog->name, relo_idx, insn_idx, insn->code,
- insn->src_reg, insn->dst_reg, insn->off, insn->imm);
- return -EINVAL;
- }
+ struct reloc_desc *relos, *relo;
+ relos = libbpf_reallocarray(prog->reloc_desc,
+ prog->nr_reloc + 1, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ relo = &relos[prog->nr_reloc];
+ relo->type = RELO_CORE;
+ relo->insn_idx = insn_idx;
+ relo->core_relo = core_relo;
+ prog->reloc_desc = relos;
+ prog->nr_reloc++;
return 0;
}
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
{
- const struct btf_type *t;
- const struct btf_enum *e;
- const char *s;
- __u32 type_id;
+ struct reloc_desc *relo;
int i;
- type_id = spec->root_type_id;
- t = btf__type_by_id(spec->btf, type_id);
- s = btf__name_by_offset(spec->btf, t->name_off);
-
- libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
- if (core_relo_is_type_based(spec->relo_kind))
- return;
-
- if (core_relo_is_enumval_based(spec->relo_kind)) {
- t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
-
- libbpf_print(level, "::%s = %u", s, e->val);
- return;
- }
-
- if (core_relo_is_field_based(spec->relo_kind)) {
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else if (i > 0 || spec->spec[i].idx > 0)
- libbpf_print(level, "[%u]", spec->spec[i].idx);
- }
-
- libbpf_print(level, " (");
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+ for (i = 0; i < prog->nr_reloc; i++) {
+ relo = &prog->reloc_desc[i];
+ if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
+ continue;
- if (spec->bit_offset % 8)
- libbpf_print(level, " @ offset %u.%u)",
- spec->bit_offset / 8, spec->bit_offset % 8);
- else
- libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
- return;
+ return relo->core_relo;
}
-}
-
-static size_t bpf_core_hash_fn(const void *key, void *ctx)
-{
- return (size_t)key;
-}
-static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
-{
- return k1 == k2;
+ return NULL;
}
-static void *u32_as_hash_key(__u32 x)
+static int bpf_core_resolve_relo(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ struct hashmap *cand_cache,
+ struct bpf_core_relo_res *targ_res)
{
- return (void *)(uintptr_t)x;
-}
-
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- * Candidate type is a type with the same "essential" name, ignoring
- * everything after last triple underscore (___). E.g., `sample`,
- * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- * for each other. Names with triple underscore are referred to as
- * "flavors" and are useful, among other things, to allow to
- * specify/support incompatible variations of the same kernel struct, which
- * might differ between different kernel versions and/or build
- * configurations.
- *
- * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- * converter, when deduplicated BTF of a kernel still contains more than
- * one different types with the same name. In that case, ___2, ___3, etc
- * are appended starting from second name conflict. But start flavors are
- * also useful to be defined "locally", in BPF program, to extract same
- * data from incompatible changes between different kernel
- * versions/configurations. For instance, to handle field renames between
- * kernel versions, one can use two flavors of the struct name with the
- * same common name and use conditional relocations to extract that field,
- * depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- * candidate target type. Matching involves finding corresponding
- * high-level spec accessors, meaning that all named fields should match,
- * as well as all array accesses should be within the actual bounds. Also,
- * types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- * matching the spec. As long as all the specs resolve to the same set of
- * offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
- * the same BTF type, if some directly or indirectly referenced (by
- * pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
- * with two (or more) structurally identical types, which differ only in
- * types they refer to through pointer. This should be OK in most cases and
- * is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- * types in target BTF. It is anticipated that this is overall more
- * efficient memory-wise and not significantly worse (if not better)
- * CPU-wise compared to prebuilding a map from all local type names to
- * a list of candidate type names. It's also sped up by caching resolved
- * list of matching candidates per each local "root" type ID, that has at
- * least one bpf_core_relo associated with it. This list is shared
- * between multiple relocations for the same type ID and is updated as some
- * of the candidates are pruned due to structural incompatibility.
- */
-static int bpf_core_apply_relo(struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct btf *local_btf,
- const struct btf *targ_btf,
- struct hashmap *cand_cache)
-{
- struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
- const void *type_key = u32_as_hash_key(relo->type_id);
- struct bpf_core_relo_res cand_res, targ_res;
+ struct bpf_core_spec specs_scratch[3] = {};
+ struct bpf_core_cand_list *cands = NULL;
+ const char *prog_name = prog->name;
const struct btf_type *local_type;
const char *local_name;
- struct ids_vec *cand_ids;
- __u32 local_id, cand_id;
- const char *spec_str;
- int i, j, err;
+ __u32 local_id = relo->type_id;
+ int err;
- local_id = relo->type_id;
local_type = btf__type_by_id(local_btf, local_id);
if (!local_type)
return -EINVAL;
@@ -5683,171 +5740,50 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
if (!local_name)
return -EINVAL;
- spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
- if (str_is_empty(spec_str))
- return -EINVAL;
-
- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
- if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
- str_is_empty(local_name) ? "<anon>" : local_name,
- spec_str, err);
- return -EINVAL;
- }
-
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
- relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
- if (relo->kind == BPF_TYPE_ID_LOCAL) {
- targ_res.validate = true;
- targ_res.poison = false;
- targ_res.orig_val = local_spec.root_type_id;
- targ_res.new_val = local_spec.root_type_id;
- goto patch_insn;
- }
-
- /* libbpf doesn't support candidate search for anonymous types */
- if (str_is_empty(spec_str)) {
- pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- return -EOPNOTSUPP;
- }
-
- if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
- cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
- if (IS_ERR(cand_ids)) {
- pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
- local_name, PTR_ERR(cand_ids));
- return PTR_ERR(cand_ids);
+ if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
+ !hashmap__find(cand_cache, local_id, &cands)) {
+ cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
+ if (IS_ERR(cands)) {
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ local_name, PTR_ERR(cands));
+ return PTR_ERR(cands);
}
- err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+ err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
if (err) {
- bpf_core_free_cands(cand_ids);
- return err;
- }
- }
-
- for (i = 0, j = 0; i < cand_ids->len; i++) {
- cand_id = cand_ids->data[i];
- err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
- if (err < 0) {
- pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog->name, relo_idx, i);
- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
- libbpf_print(LIBBPF_WARN, ": %d\n", err);
- return err;
- }
-
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
- relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- if (err == 0)
- continue;
-
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
- if (err)
+ bpf_core_free_cands(cands);
return err;
-
- if (j == 0) {
- targ_res = cand_res;
- targ_spec = cand_spec;
- } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many field relo candidates, they
- * should all resolve to the same bit offset
- */
- pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog->name, relo_idx, cand_spec.bit_offset,
- targ_spec.bit_offset);
- return -EINVAL;
- } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
- /* all candidates should result in the same relocation
- * decision and value, otherwise it's dangerous to
- * proceed due to ambiguity
- */
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
- prog->name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res.poison ? "failure" : "success", targ_res.new_val);
- return -EINVAL;
}
-
- cand_ids->data[j++] = cand_spec.root_type_id;
- }
-
- /*
- * For BPF_FIELD_EXISTS relo or when used BPF program has field
- * existence checks or kernel version/config checks, it's expected
- * that we might not find any candidates. In this case, if field
- * wasn't found in any candidate, the list of candidates shouldn't
- * change at all, we'll just handle relocating appropriately,
- * depending on relo's kind.
- */
- if (j > 0)
- cand_ids->len = j;
-
- /*
- * If no candidates were found, it might be both a programmer error,
- * as well as expected case, depending whether instruction w/
- * relocation is guarded in some way that makes it unreachable (dead
- * code) if relocation can't be resolved. This is handled in
- * bpf_core_patch_insn() uniformly by replacing that instruction with
- * BPF helper call insn (using invalid helper ID). If that instruction
- * is indeed unreachable, then it will be ignored and eliminated by
- * verifier. If it was an error, then verifier will complain and point
- * to a specific instruction number in its log.
- */
- if (j == 0) {
- pr_debug("prog '%s': relo #%d: no matching targets found\n",
- prog->name, relo_idx);
-
- /* calculate single target relo result explicitly */
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
- if (err)
- return err;
- }
-
-patch_insn:
- /* bpf_core_patch_insn() should know how to handle missing targ_spec */
- err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
- if (err) {
- pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
- prog->name, relo_idx, relo->insn_off, err);
- return -EINVAL;
}
- return 0;
+ return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
+ targ_res);
}
static int
bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
{
const struct btf_ext_info_sec *sec;
+ struct bpf_core_relo_res targ_res;
const struct bpf_core_relo *rec;
const struct btf_ext_info *seg;
struct hashmap_entry *entry;
struct hashmap *cand_cache = NULL;
struct bpf_program *prog;
- struct btf *targ_btf;
+ struct bpf_insn *insn;
const char *sec_name;
- int i, err = 0, insn_idx, sec_idx;
+ int i, err = 0, insn_idx, sec_idx, sec_num;
if (obj->btf_ext->core_relo_info.len == 0)
return 0;
- if (targ_btf_path)
- targ_btf = btf__parse(targ_btf_path, NULL);
- else
- targ_btf = obj->btf_vmlinux;
- if (IS_ERR_OR_NULL(targ_btf)) {
- pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
- return PTR_ERR(targ_btf);
+ if (targ_btf_path) {
+ obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
+ err = libbpf_get_error(obj->btf_vmlinux_override);
+ if (err) {
+ pr_warn("failed to parse target BTF: %d\n", err);
+ return err;
+ }
}
cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
@@ -5857,71 +5793,146 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
}
seg = &obj->btf_ext->core_relo_info;
+ sec_num = 0;
for_each_btf_ext_sec(seg, sec) {
+ sec_idx = seg->sec_idxs[sec_num];
+ sec_num++;
+
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
if (str_is_empty(sec_name)) {
err = -EINVAL;
goto out;
}
- /* bpf_object's ELF is gone by now so it's not easy to find
- * section index by section name, but we can find *any*
- * bpf_program within desired section name and use it's
- * prog->sec_idx to do a proper search by section index and
- * instruction offset
- */
- prog = NULL;
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
- if (strcmp(prog->sec_name, sec_name) == 0)
- break;
- }
- if (!prog) {
- pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
- return -ENOENT;
- }
- sec_idx = prog->sec_idx;
- pr_debug("sec '%s': found %d CO-RE relocations\n",
- sec_name, sec->num_info);
+ pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
+ if (rec->insn_off % BPF_INSN_SZ)
+ return -EINVAL;
insn_idx = rec->insn_off / BPF_INSN_SZ;
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
- sec_name, insn_idx, i);
- err = -EINVAL;
- goto out;
+ /* When __weak subprog is "overridden" by another instance
+ * of the subprog from a different object file, linker still
+ * appends all the .BTF.ext info that used to belong to that
+ * eliminated subprogram.
+ * This is similar to what x86-64 linker does for relocations.
+ * So just ignore such relocations just like we ignore
+ * subprog instructions when discovering subprograms.
+ */
+ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
+ sec_name, i, insn_idx);
+ continue;
}
/* no need to apply CO-RE relocation if the program is
* not going to be loaded
*/
- if (!prog->load)
+ if (!prog->autoload)
+ continue;
+
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx >= prog->insns_cnt)
+ return -EINVAL;
+ insn = &prog->insns[insn_idx];
+
+ err = record_relo_core(prog, rec, insn_idx);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
+ prog->name, i, err);
+ goto out;
+ }
+
+ if (prog->obj->gen_loader)
continue;
- err = bpf_core_apply_relo(prog, rec, i, obj->btf,
- targ_btf, cand_cache);
+ err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
prog->name, i, err);
goto out;
}
+
+ err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+ prog->name, i, insn_idx, err);
+ goto out;
+ }
}
}
out:
- /* obj->btf_vmlinux is freed at the end of object load phase */
- if (targ_btf != obj->btf_vmlinux)
- btf__free(targ_btf);
+ /* obj->btf_vmlinux and module BTFs are freed after object load */
+ btf__free(obj->btf_vmlinux_override);
+ obj->btf_vmlinux_override = NULL;
+
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
- bpf_core_free_cands(entry->value);
+ bpf_core_free_cands(entry->pvalue);
}
hashmap__free(cand_cache);
}
return err;
}
+/* base map load ldimm64 special constant, used also for log fixup logic */
+#define POISON_LDIMM64_MAP_BASE 2001000000
+#define POISON_LDIMM64_MAP_PFX "200100"
+
+static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn,
+ int map_idx, const struct bpf_map *map)
+{
+ int i;
+
+ pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
+ prog->name, relo_idx, insn_idx, map_idx, map->name);
+
+ /* we turn single ldimm64 into two identical invalid calls */
+ for (i = 0; i < 2; i++) {
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with something like:
+ * invalid func unknown#2001000123
+ * where lower 123 is map index into obj->maps[] array
+ */
+ insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
+
+ insn++;
+ }
+}
+
+/* unresolved kfunc call special constant, used also for log fixup logic */
+#define POISON_CALL_KFUNC_BASE 2002000000
+#define POISON_CALL_KFUNC_PFX "2002"
+
+static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn,
+ int ext_idx, const struct extern_desc *ext)
+{
+ pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
+ prog->name, relo_idx, insn_idx, ext->name);
+
+ /* we turn kfunc call into invalid helper call with identifiable constant */
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with something like:
+ * invalid func unknown#2001000123
+ * where lower 123 is extern index into obj->externs[] array
+ */
+ insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
+}
+
/* Relocate data references within program code:
* - map references;
* - global variable references;
@@ -5935,39 +5946,83 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ const struct bpf_map *map;
struct extern_desc *ext;
switch (relo->type) {
case RELO_LD64:
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
+ map = &obj->maps[relo->map_idx];
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
+ insn[0].imm = relo->map_idx;
+ } else if (map->autocreate) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = map->fd;
+ } else {
+ poison_map_ldimm64(prog, i, relo->insn_idx, insn,
+ relo->map_idx, map);
+ }
break;
case RELO_DATA:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ map = &obj->maps[relo->map_idx];
insn[1].imm = insn[0].imm + relo->sym_off;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = relo->map_idx;
+ } else if (map->autocreate) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = map->fd;
+ } else {
+ poison_map_ldimm64(prog, i, relo->insn_idx, insn,
+ relo->map_idx, map);
+ }
break;
- case RELO_EXTERN:
- ext = &obj->externs[relo->sym_off];
+ case RELO_EXTERN_LD64:
+ ext = &obj->externs[relo->ext_idx];
if (ext->type == EXT_KCFG) {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = obj->kconfig_map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ }
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
- if (ext->ksym.type_id) { /* typed ksyms */
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
- insn[0].imm = ext->ksym.vmlinux_btf_id;
- } else { /* typeless ksyms */
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[1].imm = ext->ksym.kernel_btf_obj_fd;
+ } else { /* typeless ksyms or unresolved typed ksyms */
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
}
- relo->processed = true;
+ break;
+ case RELO_EXTERN_CALL:
+ ext = &obj->externs[relo->ext_idx];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ if (ext->is_set) {
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[0].off = ext->ksym.btf_fd_idx;
+ } else { /* unresolved weak kfunc call */
+ poison_kfunc_call(prog, i, relo->insn_idx, insn,
+ relo->ext_idx, ext);
+ }
+ break;
+ case RELO_SUBPROG_ADDR:
+ if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
+ pr_warn("prog '%s': relo #%d: bad insn\n",
+ prog->name, i);
+ return -EINVAL;
+ }
+ /* handled already */
break;
case RELO_CALL:
- /* will be handled as a follow up pass */
+ /* handled already */
+ break;
+ case RELO_CORE:
+ /* will be handled by bpf_program_record_relos() */
break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
@@ -5989,14 +6044,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
void *rec, *rec_end, *new_prog_info;
const struct btf_ext_info_sec *sec;
size_t old_sz, new_sz;
- const char *sec_name;
- int i, off_adj;
+ int i, sec_num, sec_idx, off_adj;
+ sec_num = 0;
for_each_btf_ext_sec(ext_info, sec) {
- sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
- if (!sec_name)
- return -EINVAL;
- if (strcmp(sec_name, prog->sec_name) != 0)
+ sec_idx = ext_info->sec_idxs[sec_num];
+ sec_num++;
+ if (prog->sec_idx != sec_idx)
continue;
for_each_btf_ext_rec(ext_info, sec, i, rec) {
@@ -6056,9 +6110,9 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
int err;
/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
- * supprot func/line info
+ * support func/line info
*/
- if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
return 0;
/* only attempt func info relocation if main program's func_info
@@ -6132,18 +6186,81 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem)
static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
{
+ if (!prog->nr_reloc)
+ return NULL;
return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
}
+static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+ struct reloc_desc *relos;
+ int i;
+
+ if (main_prog == subprog)
+ return 0;
+ relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (!relos && new_cnt)
+ return -ENOMEM;
+ if (subprog->nr_reloc)
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
+
+ for (i = main_prog->nr_reloc; i < new_cnt; i++)
+ relos[i].insn_idx += subprog->sub_insn_off;
+ /* After insn_idx adjustment the 'relos' array is still sorted
+ * by insn_idx and doesn't break bsearch.
+ */
+ main_prog->reloc_desc = relos;
+ main_prog->nr_reloc = new_cnt;
+ return 0;
+}
+
+static int
+bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
+ struct bpf_program *subprog)
+{
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insns_cnt;
+
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
+
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
+
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
+
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
+ return 0;
+}
+
static int
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *prog)
{
- size_t sub_insn_idx, insn_idx, new_cnt;
+ size_t sub_insn_idx, insn_idx;
struct bpf_program *subprog;
- struct bpf_insn *insns, *insn;
struct reloc_desc *relo;
+ struct bpf_insn *insn;
int err;
err = reloc_prog_func_and_line_info(obj, main_prog, prog);
@@ -6152,11 +6269,16 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
- if (!insn_is_subprog_call(insn))
+ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue;
relo = find_prog_insn_relo(prog, insn_idx);
- if (relo && relo->type != RELO_CALL) {
+ if (relo && relo->type == RELO_EXTERN_CALL)
+ /* kfunc relocations will be handled later
+ * in bpf_object__relocate_data()
+ */
+ continue;
+ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
@@ -6168,8 +6290,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm
* points to a start of a static function
+ *
+ * for subprog addr relocation, the relo->sym_off + insn->imm is
+ * the byte offset in the corresponding section.
+ */
+ if (relo->type == RELO_CALL)
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ else
+ sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+ } else if (insn_is_pseudo_func(insn)) {
+ /*
+ * RELO_SUBPROG_ADDR relo is always emitted even if both
+ * functions are in the same section, so it shouldn't reach here.
*/
- sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ prog->name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
} else {
/* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation
@@ -6199,23 +6335,9 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* and relocate.
*/
if (subprog->sub_insn_off == 0) {
- subprog->sub_insn_off = main_prog->insns_cnt;
-
- new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
- insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
- if (!insns) {
- pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
- return -ENOMEM;
- }
- main_prog->insns = insns;
- main_prog->insns_cnt = new_cnt;
-
- memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
- subprog->insns_cnt * sizeof(*insns));
-
- pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
- main_prog->name, subprog->insns_cnt, subprog->name);
-
+ err = bpf_object__append_subprog_code(obj, main_prog, subprog);
+ if (err)
+ return err;
err = bpf_object__reloc_code(obj, main_prog, subprog);
if (err)
return err;
@@ -6229,12 +6351,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* prog; each main prog can have a different set of
* subprograms appended (potentially in different order as
* well), so position of any subprog can be different for
- * different main programs */
+ * different main programs
+ */
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
- if (relo)
- relo->processed = true;
-
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
@@ -6327,7 +6447,7 @@ static int
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_program *subprog;
- int i, j, err;
+ int i, err;
/* mark all subprogs as not relocated (yet) within the context of
* current main program
@@ -6338,24 +6458,463 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
continue;
subprog->sub_insn_off = 0;
- for (j = 0; j < subprog->nr_reloc; j++)
- if (subprog->reloc_desc[j].type == RELO_CALL)
- subprog->reloc_desc[j].processed = false;
}
err = bpf_object__reloc_code(obj, prog, prog);
if (err)
return err;
+ return 0;
+}
+
+static void
+bpf_object__free_relocs(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ int i;
+
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
+}
+
+static int cmp_relocs(const void *_a, const void *_b)
+{
+ const struct reloc_desc *a = _a;
+ const struct reloc_desc *b = _b;
+
+ if (a->insn_idx != b->insn_idx)
+ return a->insn_idx < b->insn_idx ? -1 : 1;
+
+ /* no two relocations should have the same insn_idx, but ... */
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
return 0;
}
-static int
-bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+static void bpf_object__sort_relos(struct bpf_object *obj)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *p = &obj->programs[i];
+
+ if (!p->nr_reloc)
+ continue;
+
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+ }
+}
+
+static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
+{
+ const char *str = "exception_callback:";
+ size_t pfx_len = strlen(str);
+ int i, j, n;
+
+ if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
+ return 0;
+
+ n = btf__type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ const char *name;
+ struct btf_type *t;
+
+ t = btf_type_by_id(obj->btf, i);
+ if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strncmp(name, str, pfx_len) != 0)
+ continue;
+
+ t = btf_type_by_id(obj->btf, t->type);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+ prog->name);
+ return -EINVAL;
+ }
+ if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
+ continue;
+ /* Multiple callbacks are specified for the same prog,
+ * the verifier will eventually return an error for this
+ * case, hence simply skip appending a subprog.
+ */
+ if (prog->exception_cb_idx >= 0) {
+ prog->exception_cb_idx = -1;
+ break;
+ }
+
+ name += pfx_len;
+ if (str_is_empty(name)) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+ prog->name);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < obj->nr_programs; j++) {
+ struct bpf_program *subprog = &obj->programs[j];
+
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+ if (strcmp(name, subprog->name) != 0)
+ continue;
+ /* Enforce non-hidden, as from verifier point of
+ * view it expects global functions, whereas the
+ * mark_btf_static fixes up linkage as static.
+ */
+ if (!subprog->sym_global || subprog->mark_btf_static) {
+ pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ /* Let's see if we already saw a static exception callback with the same name */
+ if (prog->exception_cb_idx >= 0) {
+ pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ prog->exception_cb_idx = j;
+ break;
+ }
+
+ if (prog->exception_cb_idx >= 0)
+ continue;
+
+ pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static struct {
+ enum bpf_prog_type prog_type;
+ const char *ctx_name;
+} global_ctx_map[] = {
+ { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
+ { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
+ { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
+ { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
+ { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
+ { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
+ { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
+ { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
+ { BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
+ { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
+ { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
+ { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
+ { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
+ { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
+ { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
+ { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
+ { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
+ { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
+ { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
+ { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
+ { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
+ { BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
+ { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
+ { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
+ { BPF_PROG_TYPE_XDP, "xdp_md" },
+ /* all other program types don't have "named" context structs */
+};
+
+/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
+ * for below __builtin_types_compatible_p() checks;
+ * with this approach we don't need any extra arch-specific #ifdef guards
+ */
+struct pt_regs;
+struct user_pt_regs;
+struct user_regs_struct;
+
+static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
+ const char *subprog_name, int arg_idx,
+ int arg_type_id, const char *ctx_name)
+{
+ const struct btf_type *t;
+ const char *tname;
+
+ /* check if existing parameter already matches verifier expectations */
+ t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
+ if (!btf_is_ptr(t))
+ goto out_warn;
+
+ /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
+ * and perf_event programs, so check this case early on and forget
+ * about it for subsequent checks
+ */
+ while (btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (btf_is_typedef(t) &&
+ (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
+ tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
+ if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
+ return false; /* canonical type for kprobe/perf_event */
+ }
+
+ /* now we can ignore typedefs moving forward */
+ t = skip_mods_and_typedefs(btf, t->type, NULL);
+
+ /* if it's `void *`, definitely fix up BTF info */
+ if (btf_is_void(t))
+ return true;
+
+ /* if it's already proper canonical type, no need to fix up */
+ tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
+ if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
+ return false;
+
+ /* special cases */
+ switch (prog->type) {
+ case BPF_PROG_TYPE_KPROBE:
+ /* `struct pt_regs *` is expected, but we need to fix up */
+ if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
+ return true;
+ break;
+ case BPF_PROG_TYPE_PERF_EVENT:
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
+ btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
+ return true;
+ break;
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+ /* allow u64* as ctx */
+ if (btf_is_int(t) && t->size == 8)
+ return true;
+ break;
+ default:
+ break;
+ }
+
+out_warn:
+ pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
+ prog->name, subprog_name, arg_idx, ctx_name);
+ return false;
+}
+
+static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
+{
+ int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
+ int i, err, arg_cnt, fn_name_off, linkage;
+ struct btf_type *fn_t, *fn_proto_t, *t;
+ struct btf_param *p;
+
+ /* caller already validated FUNC -> FUNC_PROTO validity */
+ fn_t = btf_type_by_id(btf, orig_fn_id);
+ fn_proto_t = btf_type_by_id(btf, fn_t->type);
+
+ /* Note that each btf__add_xxx() operation invalidates
+ * all btf_type and string pointers, so we need to be
+ * very careful when cloning BTF types. BTF type
+ * pointers have to be always refetched. And to avoid
+ * problems with invalidated string pointers, we
+ * add empty strings initially, then just fix up
+ * name_off offsets in place. Offsets are stable for
+ * existing strings, so that works out.
+ */
+ fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
+ linkage = btf_func_linkage(fn_t);
+ orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
+ ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
+ arg_cnt = btf_vlen(fn_proto_t);
+
+ /* clone FUNC_PROTO and its params */
+ fn_proto_id = btf__add_func_proto(btf, ret_type_id);
+ if (fn_proto_id < 0)
+ return -EINVAL;
+
+ for (i = 0; i < arg_cnt; i++) {
+ int name_off;
+
+ /* copy original parameter data */
+ t = btf_type_by_id(btf, orig_proto_id);
+ p = &btf_params(t)[i];
+ name_off = p->name_off;
+
+ err = btf__add_func_param(btf, "", p->type);
+ if (err)
+ return err;
+
+ fn_proto_t = btf_type_by_id(btf, fn_proto_id);
+ p = &btf_params(fn_proto_t)[i];
+ p->name_off = name_off; /* use remembered str offset */
+ }
+
+ /* clone FUNC now, btf__add_func() enforces non-empty name, so use
+ * entry program's name as a placeholder, which we replace immediately
+ * with original name_off
+ */
+ fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
+ if (fn_id < 0)
+ return -EINVAL;
+
+ fn_t = btf_type_by_id(btf, fn_id);
+ fn_t->name_off = fn_name_off; /* reuse original string */
+
+ return fn_id;
+}
+
+/* Check if main program or global subprog's function prototype has `arg:ctx`
+ * argument tags, and, if necessary, substitute correct type to match what BPF
+ * verifier would expect, taking into account specific program type. This
+ * allows to support __arg_ctx tag transparently on old kernels that don't yet
+ * have a native support for it in the verifier, making user's life much
+ * easier.
+ */
+static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
+{
+ const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
+ struct bpf_func_info_min *func_rec;
+ struct btf_type *fn_t, *fn_proto_t;
+ struct btf *btf = obj->btf;
+ const struct btf_type *t;
+ struct btf_param *p;
+ int ptr_id = 0, struct_id, tag_id, orig_fn_id;
+ int i, n, arg_idx, arg_cnt, err, rec_idx;
+ int *orig_ids;
+
+ /* no .BTF.ext, no problem */
+ if (!obj->btf_ext || !prog->func_info)
+ return 0;
+
+ /* don't do any fix ups if kernel natively supports __arg_ctx */
+ if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
+ return 0;
+
+ /* some BPF program types just don't have named context structs, so
+ * this fallback mechanism doesn't work for them
+ */
+ for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
+ if (global_ctx_map[i].prog_type != prog->type)
+ continue;
+ ctx_name = global_ctx_map[i].ctx_name;
+ break;
+ }
+ if (!ctx_name)
+ return 0;
+
+ /* remember original func BTF IDs to detect if we already cloned them */
+ orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
+ if (!orig_ids)
+ return -ENOMEM;
+ for (i = 0; i < prog->func_info_cnt; i++) {
+ func_rec = prog->func_info + prog->func_info_rec_size * i;
+ orig_ids[i] = func_rec->type_id;
+ }
+
+ /* go through each DECL_TAG with "arg:ctx" and see if it points to one
+ * of our subprogs; if yes and subprog is global and needs adjustment,
+ * clone and adjust FUNC -> FUNC_PROTO combo
+ */
+ for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
+ /* only DECL_TAG with "arg:ctx" value are interesting */
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_decl_tag(t))
+ continue;
+ if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
+ continue;
+
+ /* only global funcs need adjustment, if at all */
+ orig_fn_id = t->type;
+ fn_t = btf_type_by_id(btf, orig_fn_id);
+ if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
+ continue;
+
+ /* sanity check FUNC -> FUNC_PROTO chain, just in case */
+ fn_proto_t = btf_type_by_id(btf, fn_t->type);
+ if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
+ continue;
+
+ /* find corresponding func_info record */
+ func_rec = NULL;
+ for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
+ if (orig_ids[rec_idx] == t->type) {
+ func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
+ break;
+ }
+ }
+ /* current main program doesn't call into this subprog */
+ if (!func_rec)
+ continue;
+
+ /* some more sanity checking of DECL_TAG */
+ arg_cnt = btf_vlen(fn_proto_t);
+ arg_idx = btf_decl_tag(t)->component_idx;
+ if (arg_idx < 0 || arg_idx >= arg_cnt)
+ continue;
+
+ /* check if we should fix up argument type */
+ p = &btf_params(fn_proto_t)[arg_idx];
+ fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
+ if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
+ continue;
+
+ /* clone fn/fn_proto, unless we already did it for another arg */
+ if (func_rec->type_id == orig_fn_id) {
+ int fn_id;
+
+ fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
+ if (fn_id < 0) {
+ err = fn_id;
+ goto err_out;
+ }
+
+ /* point func_info record to a cloned FUNC type */
+ func_rec->type_id = fn_id;
+ }
+
+ /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
+ * we do it just once per main BPF program, as all global
+ * funcs share the same program type, so need only PTR ->
+ * STRUCT type chain
+ */
+ if (ptr_id == 0) {
+ struct_id = btf__add_struct(btf, ctx_name, 0);
+ ptr_id = btf__add_ptr(btf, struct_id);
+ if (ptr_id < 0 || struct_id < 0) {
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+
+ /* for completeness, clone DECL_TAG and point it to cloned param */
+ tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
+ if (tag_id < 0) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ /* all the BTF manipulations invalidated pointers, refetch them */
+ fn_t = btf_type_by_id(btf, func_rec->type_id);
+ fn_proto_t = btf_type_by_id(btf, fn_t->type);
+
+ /* fix up type ID pointed to by param */
+ p = &btf_params(fn_proto_t)[arg_idx];
+ p->type = ptr_id;
+ }
+
+ free(orig_ids);
+ return 0;
+err_out:
+ free(orig_ids);
+ return err;
+}
+
+static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
struct bpf_program *prog;
- size_t i;
+ size_t i, j;
int err;
if (obj->btf_ext) {
@@ -6365,24 +6924,34 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
err);
return err;
}
+ bpf_object__sort_relos(obj);
}
- /* relocate data references first for all programs and sub-programs,
- * as they don't change relative to code locations, so subsequent
- * subprogram processing won't need to re-calculate any of them
+
+ /* Before relocating calls pre-process relocations and mark
+ * few ld_imm64 instructions that points to subprogs.
+ * Otherwise bpf_object__reloc_code() later would have to consider
+ * all ld_imm64 insns as relocation candidates. That would
+ * reduce relocation speed, since amount of find_prog_insn_relo()
+ * would increase and most of them will fail to find a relo.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_object__relocate_data(obj, prog);
- if (err) {
- pr_warn("prog '%s': failed to relocate data references: %d\n",
- prog->name, err);
- return err;
+ for (j = 0; j < prog->nr_reloc; j++) {
+ struct reloc_desc *relo = &prog->reloc_desc[j];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+ /* mark the insn, so it's recognized by insn_is_pseudo_func() */
+ if (relo->type == RELO_SUBPROG_ADDR)
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
}
}
- /* now relocate subprogram calls and append used subprograms to main
+
+ /* relocate subprogram calls and append used subprograms to main
* programs; each copy of subprogram code needs to be relocated
* differently for each main program, because its code location might
- * have changed
+ * have changed.
+ * Append subprog relos to main programs to allow data relos to be
+ * processed after text is completely relocated.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
@@ -6391,6 +6960,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
*/
if (prog_is_subprog(obj, prog))
continue;
+ if (!prog->autoload)
+ continue;
err = bpf_object__relocate_calls(obj, prog);
if (err) {
@@ -6398,33 +6969,74 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
prog->name, err);
return err;
}
+
+ err = bpf_prog_assign_exc_cb(obj, prog);
+ if (err)
+ return err;
+ /* Now, also append exception callback if it has not been done already. */
+ if (prog->exception_cb_idx >= 0) {
+ struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
+
+ /* Calling exception callback directly is disallowed, which the
+ * verifier will reject later. In case it was processed already,
+ * we can skip this step, otherwise for all other valid cases we
+ * have to append exception callback now.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = bpf_object__append_subprog_code(obj, prog, subprog);
+ if (err)
+ return err;
+ err = bpf_object__reloc_code(obj, prog, subprog);
+ if (err)
+ return err;
+ }
+ }
}
- /* free up relocation descriptors */
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ if (prog_is_subprog(obj, prog))
+ continue;
+ if (!prog->autoload)
+ continue;
+
+ /* Process data relos for main programs */
+ err = bpf_object__relocate_data(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
+ return err;
+ }
+
+ /* Fix up .BTF.ext information, if necessary */
+ err = bpf_program_fixup_func_info(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
+ prog->name, err);
+ return err;
+ }
}
+
return 0;
}
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
- GElf_Shdr *shdr, Elf_Data *data);
+ Elf64_Shdr *shdr, Elf_Data *data);
static int bpf_object__collect_map_relos(struct bpf_object *obj,
- GElf_Shdr *shdr, Elf_Data *data)
+ Elf64_Shdr *shdr, Elf_Data *data)
{
const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
int i, j, nrels, new_sz;
const struct btf_var_secinfo *vi = NULL;
const struct btf_type *sec, *var, *def;
- struct bpf_map *map = NULL, *targ_map;
+ struct bpf_map *map = NULL, *targ_map = NULL;
+ struct bpf_program *targ_prog = NULL;
+ bool is_prog_array, is_map_in_map;
const struct btf_member *member;
- const char *name, *mname;
- Elf_Data *symbols;
+ const char *name, *mname, *type;
unsigned int moff;
- GElf_Sym sym;
- GElf_Rel rel;
+ Elf64_Sym *sym;
+ Elf64_Rel *rel;
void *tmp;
if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
@@ -6433,28 +7045,25 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
if (!sec)
return -EINVAL;
- symbols = obj->efile.symbols;
nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
- if (!gelf_getrel(data, i, &rel)) {
+ rel = elf_rel_by_idx(data, i);
+ if (!rel) {
pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+
+ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
+ if (!sym) {
pr_warn(".maps relo #%d: symbol %zx not found\n",
- i, (size_t)GELF_R_SYM(rel.r_info));
+ i, (size_t)ELF64_R_SYM(rel->r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_sym_str(obj, sym.st_name) ?: "<?>";
- if (sym.st_shndx != obj->efile.btf_maps_shndx) {
- pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
- i, name);
- return -LIBBPF_ERRNO__RELOC;
- }
+ name = elf_sym_str(obj, sym->st_name) ?: "<?>";
- pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
- i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
- (size_t)rel.r_offset, sym.st_name, name);
+ pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
+ i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
+ (size_t)rel->r_offset, sym->st_name, name);
for (j = 0; j < obj->nr_maps; j++) {
map = &obj->maps[j];
@@ -6462,29 +7071,55 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
continue;
vi = btf_var_secinfos(sec) + map->btf_var_idx;
- if (vi->offset <= rel.r_offset &&
- rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
+ if (vi->offset <= rel->r_offset &&
+ rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
break;
}
if (j == obj->nr_maps) {
- pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
- i, name, (size_t)rel.r_offset);
+ pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
+ i, name, (size_t)rel->r_offset);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type))
- return -EINVAL;
- if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
- map->def.key_size != sizeof(int)) {
- pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
- i, map->name, sizeof(int));
+ is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
+ is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
+ type = is_map_in_map ? "map" : "prog";
+ if (is_map_in_map) {
+ if (sym->st_shndx != obj->efile.btf_maps_shndx) {
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+ map->def.key_size != sizeof(int)) {
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+ i, map->name, sizeof(int));
+ return -EINVAL;
+ }
+ targ_map = bpf_object__find_map_by_name(obj, name);
+ if (!targ_map) {
+ pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
+ i, name);
+ return -ESRCH;
+ }
+ } else if (is_prog_array) {
+ targ_prog = bpf_object__find_program_by_name(obj, name);
+ if (!targ_prog) {
+ pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
+ i, name);
+ return -ESRCH;
+ }
+ if (targ_prog->sec_idx != sym->st_shndx ||
+ targ_prog->sec_insn_off * 8 != sym->st_value ||
+ prog_is_subprog(obj, targ_prog)) {
+ pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ } else {
return -EINVAL;
}
- targ_map = bpf_object__find_map_by_name(obj, name);
- if (!targ_map)
- return -ESRCH;
-
var = btf__type_by_id(obj->btf, vi->type);
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
if (btf_vlen(def) == 0)
@@ -6495,10 +7130,10 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return -EINVAL;
moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
- if (rel.r_offset - vi->offset < moff)
+ if (rel->r_offset - vi->offset < moff)
return -EINVAL;
- moff = rel.r_offset - vi->offset - moff;
+ moff = rel->r_offset - vi->offset - moff;
/* here we use BPF pointer size, which is always 64 bit, as we
* are parsing ELF that was built for BPF target
*/
@@ -6515,45 +7150,38 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
(new_sz - map->init_slots_sz) * host_ptr_sz);
map->init_slots_sz = new_sz;
}
- map->init_slots[moff] = targ_map;
+ map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
- pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
- i, map->name, moff, name);
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
+ i, map->name, moff, type, name);
}
return 0;
}
-static int cmp_relocs(const void *_a, const void *_b)
-{
- const struct reloc_desc *a = _a;
- const struct reloc_desc *b = _b;
-
- if (a->insn_idx != b->insn_idx)
- return a->insn_idx < b->insn_idx ? -1 : 1;
-
- /* no two relocations should have the same insn_idx, but ... */
- if (a->type != b->type)
- return a->type < b->type ? -1 : 1;
-
- return 0;
-}
-
static int bpf_object__collect_relos(struct bpf_object *obj)
{
int i, err;
- for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
- GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
- Elf_Data *data = obj->efile.reloc_sects[i].data;
- int idx = shdr->sh_info;
+ for (i = 0; i < obj->efile.sec_cnt; i++) {
+ struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+ int idx;
- if (shdr->sh_type != SHT_REL) {
+ if (sec_desc->sec_type != SEC_RELO)
+ continue;
+
+ shdr = sec_desc->shdr;
+ data = sec_desc->data;
+ idx = shdr->sh_info;
+
+ if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx)
+ if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
@@ -6563,14 +7191,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
return err;
}
- for (i = 0; i < obj->nr_programs; i++) {
- struct bpf_program *p = &obj->programs[i];
-
- if (!p->nr_reloc)
- continue;
-
- qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
- }
+ bpf_object__sort_relos(obj);
return 0;
}
@@ -6587,12 +7208,15 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id
return false;
}
-static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_insn *insn = prog->insns;
enum bpf_func_id func_id;
int i;
+ if (obj->gen_loader)
+ return 0;
+
for (i = 0; i < prog->insns_cnt; i++, insn++) {
if (!insn_is_helper_call(insn, &func_id))
continue;
@@ -6604,12 +7228,12 @@ static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program
switch (func_id) {
case BPF_FUNC_probe_read_kernel:
case BPF_FUNC_probe_read_user:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read;
break;
case BPF_FUNC_probe_read_kernel_str:
case BPF_FUNC_probe_read_user_str:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read_str;
break;
default:
@@ -6619,46 +7243,108 @@ static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program
return 0;
}
-static int
-load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
- char *license, __u32 kern_version, int *pfd)
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
+ int *btf_obj_fd, int *btf_type_id);
+
+/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
+static int libbpf_prepare_prog_load(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts, long cookie)
+{
+ enum sec_def_flags def = cookie;
+
+ /* old kernels might not support specifying expected_attach_type */
+ if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
+ opts->expected_attach_type = 0;
+
+ if (def & SEC_SLEEPABLE)
+ opts->prog_flags |= BPF_F_SLEEPABLE;
+
+ if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+ opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
+ /* special check for usdt to use uprobe_multi link */
+ if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
+ prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
+
+ if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
+ int btf_obj_fd = 0, btf_type_id = 0, err;
+ const char *attach_name;
+
+ attach_name = strchr(prog->sec_name, '/');
+ if (!attach_name) {
+ /* if BPF program is annotated with just SEC("fentry")
+ * (or similar) without declaratively specifying
+ * target, then it is expected that target will be
+ * specified with bpf_program__set_attach_target() at
+ * runtime before BPF object load step. If not, then
+ * there is nothing to load into the kernel as BPF
+ * verifier won't be able to validate BPF program
+ * correctness anyways.
+ */
+ pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
+ prog->name);
+ return -EINVAL;
+ }
+ attach_name++; /* skip over / */
+
+ err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
+ if (err)
+ return err;
+
+ /* cache resolved BTF FD and BTF type ID in the prog */
+ prog->attach_btf_obj_fd = btf_obj_fd;
+ prog->attach_btf_id = btf_type_id;
+
+ /* but by now libbpf common logic is not utilizing
+ * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
+ * this callback is called after opts were populated by
+ * libbpf, so this callback has to update opts explicitly here
+ */
+ opts->attach_btf_obj_fd = btf_obj_fd;
+ opts->attach_btf_id = btf_type_id;
+ }
+ return 0;
+}
+
+static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
+
+static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
+ struct bpf_insn *insns, int insns_cnt,
+ const char *license, __u32 kern_version, int *prog_fd)
{
- struct bpf_load_program_attr load_attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
+ const char *prog_name = NULL;
char *cp, errmsg[STRERR_BUFSIZE];
size_t log_buf_size = 0;
- char *log_buf = NULL;
- int btf_fd, ret;
+ char *log_buf = NULL, *tmp;
+ int btf_fd, ret, err;
+ bool own_log_buf = true;
+ __u32 log_level = prog->log_level;
+
+ if (prog->type == BPF_PROG_TYPE_UNSPEC) {
+ /*
+ * The program type must be set. Most likely we couldn't find a proper
+ * section definition at load time, and thus we didn't infer the type.
+ */
+ pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
+ prog->name, prog->sec_name);
+ return -EINVAL;
+ }
if (!insns || !insns_cnt)
return -EINVAL;
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = prog->type;
- /* old kernels might not support specifying expected_attach_type */
- if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
- prog->sec_def->is_exp_attach_type_optional)
- load_attr.expected_attach_type = 0;
- else
- load_attr.expected_attach_type = prog->expected_attach_type;
- if (kernel_supports(FEAT_PROG_NAME))
- load_attr.name = prog->name;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
- prog->type == BPF_PROG_TYPE_LSM) {
- load_attr.attach_btf_id = prog->attach_btf_id;
- } else if (prog->type == BPF_PROG_TYPE_TRACING ||
- prog->type == BPF_PROG_TYPE_EXT) {
- load_attr.attach_prog_fd = prog->attach_prog_fd;
- load_attr.attach_btf_id = prog->attach_btf_id;
- } else {
- load_attr.kern_version = kern_version;
- load_attr.prog_ifindex = prog->prog_ifindex;
- }
+ if (kernel_supports(obj, FEAT_PROG_NAME))
+ prog_name = prog->name;
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
+ load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
+ load_attr.attach_btf_id = prog->attach_btf_id;
+ load_attr.kern_version = kern_version;
+ load_attr.prog_ifindex = prog->prog_ifindex;
+
/* specify func_info/line_info only if kernel supports them */
- btf_fd = bpf_object__btf_fd(prog->obj);
- if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+ btf_fd = btf__fd(obj->btf);
+ if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
load_attr.prog_btf_fd = btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -6667,170 +7353,380 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
}
- load_attr.log_level = prog->log_level;
+ load_attr.log_level = log_level;
load_attr.prog_flags = prog->prog_flags;
+ load_attr.fd_array = obj->fd_array;
-retry_load:
- if (log_buf_size) {
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return -ENOMEM;
+ load_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ load_attr.prog_flags |= BPF_F_TOKEN_FD;
- *log_buf = 0;
+ /* adjust load_attr if sec_def provides custom preload callback */
+ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
+ err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
+ if (err < 0) {
+ pr_warn("prog '%s': failed to prepare load attributes: %d\n",
+ prog->name, err);
+ return err;
+ }
+ insns = prog->insns;
+ insns_cnt = prog->insns_cnt;
}
- ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+ /* allow prog_prepare_load_fn to change expected_attach_type */
+ load_attr.expected_attach_type = prog->expected_attach_type;
+ if (obj->gen_loader) {
+ bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
+ license, insns, insns_cnt, &load_attr,
+ prog - obj->programs);
+ *prog_fd = -1;
+ return 0;
+ }
+
+retry_load:
+ /* if log_level is zero, we don't request logs initially even if
+ * custom log_buf is specified; if the program load fails, then we'll
+ * bump log_level to 1 and use either custom log_buf or we'll allocate
+ * our own and retry the load to get details on what failed
+ */
+ if (log_level) {
+ if (prog->log_buf) {
+ log_buf = prog->log_buf;
+ log_buf_size = prog->log_size;
+ own_log_buf = false;
+ } else if (obj->log_buf) {
+ log_buf = obj->log_buf;
+ log_buf_size = obj->log_size;
+ own_log_buf = false;
+ } else {
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
+ tmp = realloc(log_buf, log_buf_size);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ log_buf = tmp;
+ log_buf[0] = '\0';
+ own_log_buf = true;
+ }
+ }
+
+ load_attr.log_buf = log_buf;
+ load_attr.log_size = log_buf_size;
+ load_attr.log_level = log_level;
+
+ ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
if (ret >= 0) {
- if (log_buf && load_attr.log_level)
- pr_debug("verifier log:\n%s", log_buf);
-
- if (prog->obj->rodata_map_idx >= 0 &&
- kernel_supports(FEAT_PROG_BIND_MAP)) {
- struct bpf_map *rodata_map =
- &prog->obj->maps[prog->obj->rodata_map_idx];
-
- if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("prog '%s': failed to bind .rodata map: %s\n",
- prog->name, cp);
- /* Don't fail hard if can't bind rodata. */
+ if (log_level && own_log_buf) {
+ pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
+ prog->name, log_buf);
+ }
+
+ if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
+ struct bpf_map *map;
+ int i;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &prog->obj->maps[i];
+ if (map->libbpf_type != LIBBPF_MAP_RODATA)
+ continue;
+
+ if (bpf_prog_bind_map(ret, map->fd, NULL)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("prog '%s': failed to bind map '%s': %s\n",
+ prog->name, map->real_name, cp);
+ /* Don't fail hard if can't bind rodata. */
+ }
}
}
- *pfd = ret;
+ *prog_fd = ret;
ret = 0;
goto out;
}
- if (!log_buf || errno == ENOSPC) {
- log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
- log_buf_size << 1);
-
- free(log_buf);
+ if (log_level == 0) {
+ log_level = 1;
goto retry_load;
}
- ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
+ /* On ENOSPC, increase log buffer size and retry, unless custom
+ * log_buf is specified.
+ * Be careful to not overflow u32, though. Kernel's log buf size limit
+ * isn't part of UAPI so it can always be bumped to full 4GB. So don't
+ * multiply by 2 unless we are sure we'll fit within 32 bits.
+ * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
+ */
+ if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
+ goto retry_load;
+
+ ret = -errno;
+
+ /* post-process verifier log to improve error descriptions */
+ fixup_verifier_log(prog, log_buf, log_buf_size);
+
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("load bpf program failed: %s\n", cp);
+ pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
pr_perm_msg(ret);
- if (log_buf && log_buf[0] != '\0') {
- ret = -LIBBPF_ERRNO__VERIFY;
- pr_warn("-- BEGIN DUMP LOG ---\n");
- pr_warn("\n%s\n", log_buf);
- pr_warn("-- END LOG --\n");
- } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
- pr_warn("Program too large (%zu insns), at most %d insns\n",
- load_attr.insns_cnt, BPF_MAXINSNS);
- ret = -LIBBPF_ERRNO__PROG2BIG;
- } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
- /* Wrong program type? */
- int fd;
-
- load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
- load_attr.expected_attach_type = 0;
- fd = bpf_load_program_xattr(&load_attr, NULL, 0);
- if (fd >= 0) {
- close(fd);
- ret = -LIBBPF_ERRNO__PROGTYPE;
- goto out;
- }
+ if (own_log_buf && log_buf && log_buf[0] != '\0') {
+ pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
+ prog->name, log_buf);
}
out:
- free(log_buf);
+ if (own_log_buf)
+ free(log_buf);
return ret;
}
-static int libbpf_find_attach_btf_id(struct bpf_program *prog);
-
-int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
+static char *find_prev_line(char *buf, char *cur)
{
- int err = 0, fd, i, btf_id;
+ char *p;
- if (prog->obj->loaded) {
- pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
- return -EINVAL;
- }
+ if (cur == buf) /* end of a log buf */
+ return NULL;
- if ((prog->type == BPF_PROG_TYPE_TRACING ||
- prog->type == BPF_PROG_TYPE_LSM ||
- prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
- btf_id = libbpf_find_attach_btf_id(prog);
- if (btf_id <= 0)
- return btf_id;
- prog->attach_btf_id = btf_id;
- }
+ p = cur - 1;
+ while (p - 1 >= buf && *(p - 1) != '\n')
+ p--;
- if (prog->instances.nr < 0 || !prog->instances.fds) {
- if (prog->preprocessor) {
- pr_warn("Internal error: can't load program '%s'\n",
- prog->name);
- return -LIBBPF_ERRNO__INTERNAL;
- }
+ return p;
+}
- prog->instances.fds = malloc(sizeof(int));
- if (!prog->instances.fds) {
- pr_warn("Not enough memory for BPF fds\n");
- return -ENOMEM;
+static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
+ char *orig, size_t orig_sz, const char *patch)
+{
+ /* size of the remaining log content to the right from the to-be-replaced part */
+ size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
+ size_t patch_sz = strlen(patch);
+
+ if (patch_sz != orig_sz) {
+ /* If patch line(s) are longer than original piece of verifier log,
+ * shift log contents by (patch_sz - orig_sz) bytes to the right
+ * starting from after to-be-replaced part of the log.
+ *
+ * If patch line(s) are shorter than original piece of verifier log,
+ * shift log contents by (orig_sz - patch_sz) bytes to the left
+ * starting from after to-be-replaced part of the log
+ *
+ * We need to be careful about not overflowing available
+ * buf_sz capacity. If that's the case, we'll truncate the end
+ * of the original log, as necessary.
+ */
+ if (patch_sz > orig_sz) {
+ if (orig + patch_sz >= buf + buf_sz) {
+ /* patch is big enough to cover remaining space completely */
+ patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
+ rem_sz = 0;
+ } else if (patch_sz - orig_sz > buf_sz - log_sz) {
+ /* patch causes part of remaining log to be truncated */
+ rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
+ }
}
- prog->instances.nr = 1;
- prog->instances.fds[0] = -1;
+ /* shift remaining log to the right by calculated amount */
+ memmove(orig + patch_sz, orig + orig_sz, rem_sz);
}
- if (!prog->preprocessor) {
- if (prog->instances.nr != 1) {
- pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
- prog->name, prog->instances.nr);
+ memcpy(orig, patch, patch_sz);
+}
+
+static void fixup_log_failed_core_relo(struct bpf_program *prog,
+ char *buf, size_t buf_sz, size_t log_sz,
+ char *line1, char *line2, char *line3)
+{
+ /* Expected log for failed and not properly guarded CO-RE relocation:
+ * line1 -> 123: (85) call unknown#195896080
+ * line2 -> invalid func unknown#195896080
+ * line3 -> <anything else or end of buffer>
+ *
+ * "123" is the index of the instruction that was poisoned. We extract
+ * instruction index to find corresponding CO-RE relocation and
+ * replace this part of the log with more relevant information about
+ * failed CO-RE relocation.
+ */
+ const struct bpf_core_relo *relo;
+ struct bpf_core_spec spec;
+ char patch[512], spec_buf[256];
+ int insn_idx, err, spec_len;
+
+ if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
+ return;
+
+ relo = find_relo_core(prog, insn_idx);
+ if (!relo)
+ return;
+
+ err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
+ if (err)
+ return;
+
+ spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
+ snprintf(patch, sizeof(patch),
+ "%d: <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation %s%s\n",
+ insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
+
+ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
+}
+
+static void fixup_log_missing_map_load(struct bpf_program *prog,
+ char *buf, size_t buf_sz, size_t log_sz,
+ char *line1, char *line2, char *line3)
+{
+ /* Expected log for failed and not properly guarded map reference:
+ * line1 -> 123: (85) call unknown#2001000345
+ * line2 -> invalid func unknown#2001000345
+ * line3 -> <anything else or end of buffer>
+ *
+ * "123" is the index of the instruction that was poisoned.
+ * "345" in "2001000345" is a map index in obj->maps to fetch map name.
+ */
+ struct bpf_object *obj = prog->obj;
+ const struct bpf_map *map;
+ int insn_idx, map_idx;
+ char patch[128];
+
+ if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
+ return;
+
+ map_idx -= POISON_LDIMM64_MAP_BASE;
+ if (map_idx < 0 || map_idx >= obj->nr_maps)
+ return;
+ map = &obj->maps[map_idx];
+
+ snprintf(patch, sizeof(patch),
+ "%d: <invalid BPF map reference>\n"
+ "BPF map '%s' is referenced but wasn't created\n",
+ insn_idx, map->name);
+
+ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
+}
+
+static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
+ char *buf, size_t buf_sz, size_t log_sz,
+ char *line1, char *line2, char *line3)
+{
+ /* Expected log for failed and not properly guarded kfunc call:
+ * line1 -> 123: (85) call unknown#2002000345
+ * line2 -> invalid func unknown#2002000345
+ * line3 -> <anything else or end of buffer>
+ *
+ * "123" is the index of the instruction that was poisoned.
+ * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
+ */
+ struct bpf_object *obj = prog->obj;
+ const struct extern_desc *ext;
+ int insn_idx, ext_idx;
+ char patch[128];
+
+ if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
+ return;
+
+ ext_idx -= POISON_CALL_KFUNC_BASE;
+ if (ext_idx < 0 || ext_idx >= obj->nr_extern)
+ return;
+ ext = &obj->externs[ext_idx];
+
+ snprintf(patch, sizeof(patch),
+ "%d: <invalid kfunc call>\n"
+ "kfunc '%s' is referenced but wasn't resolved\n",
+ insn_idx, ext->name);
+
+ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
+}
+
+static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
+{
+ /* look for familiar error patterns in last N lines of the log */
+ const size_t max_last_line_cnt = 10;
+ char *prev_line, *cur_line, *next_line;
+ size_t log_sz;
+ int i;
+
+ if (!buf)
+ return;
+
+ log_sz = strlen(buf) + 1;
+ next_line = buf + log_sz - 1;
+
+ for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
+ cur_line = find_prev_line(buf, next_line);
+ if (!cur_line)
+ return;
+
+ if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
+ prev_line = find_prev_line(buf, cur_line);
+ if (!prev_line)
+ continue;
+
+ /* failed CO-RE relocation case */
+ fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
+ prev_line, cur_line, next_line);
+ return;
+ } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
+ prev_line = find_prev_line(buf, cur_line);
+ if (!prev_line)
+ continue;
+
+ /* reference to uncreated BPF map */
+ fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
+ prev_line, cur_line, next_line);
+ return;
+ } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
+ prev_line = find_prev_line(buf, cur_line);
+ if (!prev_line)
+ continue;
+
+ /* reference to unresolved kfunc */
+ fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
+ prev_line, cur_line, next_line);
+ return;
}
- err = load_program(prog, prog->insns, prog->insns_cnt,
- license, kern_ver, &fd);
- if (!err)
- prog->instances.fds[0] = fd;
- goto out;
}
+}
- for (i = 0; i < prog->instances.nr; i++) {
- struct bpf_prog_prep_result result;
- bpf_program_prep_t preprocessor = prog->preprocessor;
+static int bpf_program_record_relos(struct bpf_program *prog)
+{
+ struct bpf_object *obj = prog->obj;
+ int i;
- memset(&result, 0, sizeof(result));
- err = preprocessor(prog, i, prog->insns,
- prog->insns_cnt, &result);
- if (err) {
- pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
- i, prog->name);
- goto out;
- }
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct extern_desc *ext = &obj->externs[relo->ext_idx];
+ int kind;
- if (!result.new_insn_ptr || !result.new_insn_cnt) {
- pr_debug("Skip loading the %dth instance of program '%s'\n",
- i, prog->name);
- prog->instances.fds[i] = -1;
- if (result.pfd)
- *result.pfd = -1;
- continue;
+ switch (relo->type) {
+ case RELO_EXTERN_LD64:
+ if (ext->type != EXT_KSYM)
+ continue;
+ kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
+ BTF_KIND_VAR : BTF_KIND_FUNC;
+ bpf_gen__record_extern(obj->gen_loader, ext->name,
+ ext->is_weak, !ext->ksym.type_id,
+ true, kind, relo->insn_idx);
+ break;
+ case RELO_EXTERN_CALL:
+ bpf_gen__record_extern(obj->gen_loader, ext->name,
+ ext->is_weak, false, false, BTF_KIND_FUNC,
+ relo->insn_idx);
+ break;
+ case RELO_CORE: {
+ struct bpf_core_relo cr = {
+ .insn_off = relo->insn_idx * 8,
+ .type_id = relo->core_relo->type_id,
+ .access_str_off = relo->core_relo->access_str_off,
+ .kind = relo->core_relo->kind,
+ };
+
+ bpf_gen__record_relo_core(obj->gen_loader, &cr);
+ break;
}
-
- err = load_program(prog, result.new_insn_ptr,
- result.new_insn_cnt, license, kern_ver, &fd);
- if (err) {
- pr_warn("Loading the %dth instance of program '%s' failed\n",
- i, prog->name);
- goto out;
+ default:
+ continue;
}
-
- if (result.pfd)
- *result.pfd = fd;
- prog->instances.fds[i] = fd;
}
-out:
- if (err)
- pr_warn("failed to load program '%s'\n", prog->name);
- zfree(&prog->insns);
- prog->insns_cnt = 0;
- return err;
+ return 0;
}
static int
@@ -6851,29 +7747,72 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
- if (!prog->load) {
+ if (!prog->autoload) {
pr_debug("prog '%s': skipped loading\n", prog->name);
continue;
}
prog->log_level |= log_level;
- err = bpf_program__load(prog, obj->license, obj->kern_version);
- if (err)
+
+ if (obj->gen_loader)
+ bpf_program_record_relos(prog);
+
+ err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
+ obj->license, obj->kern_version, &prog->fd);
+ if (err) {
+ pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
return err;
+ }
}
+
+ bpf_object__free_relocs(obj);
return 0;
}
static const struct bpf_sec_def *find_sec_def(const char *sec_name);
-static struct bpf_object *
-__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
- const struct bpf_object_open_opts *opts)
+static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig;
struct bpf_program *prog;
+ int err;
+
+ bpf_object__for_each_program(prog, obj) {
+ prog->sec_def = find_sec_def(prog->sec_name);
+ if (!prog->sec_def) {
+ /* couldn't guess, but user might manually specify */
+ pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
+ prog->name, prog->sec_name);
+ continue;
+ }
+
+ prog->type = prog->sec_def->prog_type;
+ prog->expected_attach_type = prog->sec_def->expected_attach_type;
+
+ /* sec_def can have custom callback which should be called
+ * after bpf_program is initialized to adjust its properties
+ */
+ if (prog->sec_def->prog_setup_fn) {
+ err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
+ if (err < 0) {
+ pr_warn("prog '%s': failed to initialize: %d\n",
+ prog->name, err);
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+ const struct bpf_object_open_opts *opts)
+{
+ const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
+ char *log_buf;
+ size_t log_size;
+ __u32 log_level;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn("failed to init libelf for %s\n",
@@ -6896,44 +7835,74 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
pr_debug("loading object '%s' from buffer\n", obj_name);
}
+ log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
+ log_size = OPTS_GET(opts, kernel_log_size, 0);
+ log_level = OPTS_GET(opts, kernel_log_level, 0);
+ if (log_size > UINT_MAX)
+ return ERR_PTR(-EINVAL);
+ if (log_size && !log_buf)
+ return ERR_PTR(-EINVAL);
+
+ token_path = OPTS_GET(opts, bpf_token_path, NULL);
+ /* if user didn't specify bpf_token_path explicitly, check if
+ * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
+ * option
+ */
+ if (!token_path)
+ token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
+ if (token_path && strlen(token_path) >= PATH_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
+ obj->log_buf = log_buf;
+ obj->log_size = log_size;
+ obj->log_level = log_level;
+
+ if (token_path) {
+ obj->token_path = strdup(token_path);
+ if (!obj->token_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+ if (btf_tmp_path) {
+ if (strlen(btf_tmp_path) >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ obj->btf_custom_path = strdup(btf_tmp_path);
+ if (!obj->btf_custom_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
kconfig = OPTS_GET(opts, kconfig, NULL);
if (kconfig) {
obj->kconfig = strdup(kconfig);
- if (!obj->kconfig)
- return ERR_PTR(-ENOMEM);
+ if (!obj->kconfig) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = bpf_object__elf_init(obj);
err = err ? : bpf_object__check_endianness(obj);
err = err ? : bpf_object__elf_collect(obj);
err = err ? : bpf_object__collect_externs(obj);
- err = err ? : bpf_object__finalize_btf(obj);
+ err = err ? : bpf_object_fixup_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts);
+ err = err ? : bpf_object_init_progs(obj, opts);
err = err ? : bpf_object__collect_relos(obj);
if (err)
goto out;
- bpf_object__elf_finish(obj);
-
- bpf_object__for_each_program(prog, obj) {
- prog->sec_def = find_sec_def(prog->sec_name);
- if (!prog->sec_def)
- /* couldn't guess, but user might manually specify */
- continue;
-
- if (prog->sec_def->is_sleepable)
- prog->prog_flags |= BPF_F_SLEEPABLE;
- bpf_program__set_type(prog, prog->sec_def->prog_type);
- bpf_program__set_expected_attach_type(prog,
- prog->sec_def->expected_attach_type);
- if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
- prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
- prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
- }
+ bpf_object__elf_finish(obj);
return obj;
out:
@@ -6941,80 +7910,38 @@ out:
return ERR_PTR(err);
}
-static struct bpf_object *
-__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
-{
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_maps = flags & MAPS_RELAX_COMPAT,
- );
-
- /* param validation */
- if (!attr->file)
- return NULL;
-
- pr_debug("loading %s\n", attr->file);
- return __bpf_object__open(attr->file, NULL, 0, &opts);
-}
-
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
-{
- return __bpf_object__open_xattr(attr, 0);
-}
-
-struct bpf_object *bpf_object__open(const char *path)
-{
- struct bpf_object_open_attr attr = {
- .file = path,
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
-
- return bpf_object__open_xattr(&attr);
-}
-
struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
if (!path)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
pr_debug("loading %s\n", path);
- return __bpf_object__open(path, NULL, 0, opts);
+ return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
}
-struct bpf_object *
-bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
- const struct bpf_object_open_opts *opts)
+struct bpf_object *bpf_object__open(const char *path)
{
- if (!obj_buf || obj_buf_sz == 0)
- return ERR_PTR(-EINVAL);
-
- return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+ return bpf_object__open_file(path, NULL);
}
struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
- const char *name)
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+ const struct bpf_object_open_opts *opts)
{
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .object_name = name,
- /* wrong default, but backwards-compatible */
- .relaxed_maps = true,
- );
-
- /* returning NULL is wrong, but backwards-compatible */
if (!obj_buf || obj_buf_sz == 0)
- return NULL;
+ return libbpf_err_ptr(-EINVAL);
- return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
}
-int bpf_object__unload(struct bpf_object *obj)
+static int bpf_object_unload(struct bpf_object *obj)
{
size_t i;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
for (i = 0; i < obj->nr_maps; i++) {
zclose(obj->maps[i].fd);
@@ -7035,26 +7962,21 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!kernel_supports(FEAT_GLOBAL_DATA)) {
- pr_warn("kernel doesn't support global data\n");
- return -ENOTSUP;
- }
- if (!kernel_supports(FEAT_ARRAY_MMAP))
- m->def.map_flags ^= BPF_F_MMAPABLE;
+ if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
+ m->def.map_flags &= ~BPF_F_MMAPABLE;
}
return 0;
}
-static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
{
char sym_type, sym_name[500];
unsigned long long sym_addr;
- struct extern_desc *ext;
int ret, err = 0;
FILE *f;
- f = fopen("/proc/kallsyms", "r");
+ f = fopen("/proc/kallsyms", "re");
if (!f) {
err = -errno;
pr_warn("failed to open /proc/kallsyms: %d\n", err);
@@ -7069,88 +7991,234 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
if (ret != 3) {
pr_warn("failed to read kallsyms entry: %d\n", ret);
err = -EINVAL;
- goto out;
+ break;
}
- ext = find_extern_by_name(obj, sym_name);
- if (!ext || ext->type != EXT_KSYM)
- continue;
+ err = cb(sym_addr, sym_type, sym_name, ctx);
+ if (err)
+ break;
+ }
- if (ext->is_set && ext->ksym.addr != sym_addr) {
- pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
- sym_name, ext->ksym.addr, sym_addr);
- err = -EINVAL;
- goto out;
+ fclose(f);
+ return err;
+}
+
+static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
+ const char *sym_name, void *ctx)
+{
+ struct bpf_object *obj = ctx;
+ const struct btf_type *t;
+ struct extern_desc *ext;
+
+ ext = find_extern_by_name(obj, sym_name);
+ if (!ext || ext->type != EXT_KSYM)
+ return 0;
+
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (!btf_is_var(t))
+ return 0;
+
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
+ pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
+ sym_name, ext->ksym.addr, sym_addr);
+ return -EINVAL;
+ }
+ if (!ext->is_set) {
+ ext->is_set = true;
+ ext->ksym.addr = sym_addr;
+ pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
+ }
+ return 0;
+}
+
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+ return libbpf_kallsyms_parse(kallsyms_cb, obj);
+}
+
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ struct module_btf **res_mod_btf)
+{
+ struct module_btf *mod_btf;
+ struct btf *btf;
+ int i, id, err;
+
+ btf = obj->btf_vmlinux;
+ mod_btf = NULL;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
+
+ if (id == -ENOENT) {
+ err = load_module_btfs(obj);
+ if (err)
+ return err;
+
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ /* we assume module_btf's BTF FD is always >0 */
+ mod_btf = &obj->btf_modules[i];
+ btf = mod_btf->btf;
+ id = btf__find_by_name_kind_own(btf, ksym_name, kind);
+ if (id != -ENOENT)
+ break;
}
- if (!ext->is_set) {
- ext->is_set = true;
- ext->ksym.addr = sym_addr;
- pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+ }
+ if (id <= 0)
+ return -ESRCH;
+
+ *res_btf = btf;
+ *res_mod_btf = mod_btf;
+ return id;
+}
+
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ struct module_btf *mod_btf = NULL;
+ const char *targ_var_name;
+ struct btf *btf = NULL;
+ int id, err;
+
+ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
+ if (id < 0) {
+ if (id == -ESRCH && ext->is_weak)
+ return 0;
+ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+ ext->name);
+ return id;
+ }
+
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
+
+ /* find target type_id */
+ targ_var = btf__type_by_id(btf, id);
+ targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+
+ err = bpf_core_types_are_compat(obj->btf, local_type_id,
+ btf, targ_type_id);
+ if (err <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
+
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf, local_type->name_off);
+ targ_name = btf__name_by_offset(btf, targ_type->name_off);
+
+ pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
+ ext->ksym.kernel_btf_id = id;
+ pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ int local_func_proto_id, kfunc_proto_id, kfunc_id;
+ struct module_btf *mod_btf = NULL;
+ const struct btf_type *kern_func;
+ struct btf *kern_btf = NULL;
+ int ret;
+
+ local_func_proto_id = ext->ksym.type_id;
+
+ kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
+ &mod_btf);
+ if (kfunc_id < 0) {
+ if (kfunc_id == -ESRCH && ext->is_weak)
+ return 0;
+ pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
+ ext->name);
+ return kfunc_id;
+ }
+
+ kern_func = btf__type_by_id(kern_btf, kfunc_id);
+ kfunc_proto_id = kern_func->type;
+
+ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+ kern_btf, kfunc_proto_id);
+ if (ret <= 0) {
+ if (ext->is_weak)
+ return 0;
+
+ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
+ ext->name, local_func_proto_id,
+ mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
+ return -EINVAL;
+ }
+
+ /* set index for module BTF fd in fd_array, if unset */
+ if (mod_btf && !mod_btf->fd_array_idx) {
+ /* insn->off is s16 */
+ if (obj->fd_array_cnt == INT16_MAX) {
+ pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
+ ext->name, mod_btf->fd_array_idx);
+ return -E2BIG;
}
+ /* Cannot use index 0 for module BTF fd */
+ if (!obj->fd_array_cnt)
+ obj->fd_array_cnt = 1;
+
+ ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
+ obj->fd_array_cnt + 1);
+ if (ret)
+ return ret;
+ mod_btf->fd_array_idx = obj->fd_array_cnt;
+ /* we assume module BTF FD is always >0 */
+ obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
}
-out:
- fclose(f);
- return err;
+ ext->is_set = true;
+ ext->ksym.kernel_btf_id = kfunc_id;
+ ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
+ /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
+ * populates FD into ld_imm64 insn when it's used to point to kfunc.
+ * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
+ * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
+ */
+ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
+ pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
+ ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
+
+ return 0;
}
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
{
+ const struct btf_type *t;
struct extern_desc *ext;
- int i, id;
+ int i, err;
for (i = 0; i < obj->nr_extern; i++) {
- const struct btf_type *targ_var, *targ_type;
- __u32 targ_type_id, local_type_id;
- const char *targ_var_name;
- int ret;
-
ext = &obj->externs[i];
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
continue;
- id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
- BTF_KIND_VAR);
- if (id <= 0) {
- pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
- ext->name);
- return -ESRCH;
- }
-
- /* find local type_id */
- local_type_id = ext->ksym.type_id;
-
- /* find target type_id */
- targ_var = btf__type_by_id(obj->btf_vmlinux, id);
- targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
- targ_var->name_off);
- targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
- targ_var->type,
- &targ_type_id);
-
- ret = bpf_core_types_are_compat(obj->btf, local_type_id,
- obj->btf_vmlinux, targ_type_id);
- if (ret <= 0) {
- const struct btf_type *local_type;
- const char *targ_name, *local_name;
-
- local_type = btf__type_by_id(obj->btf, local_type_id);
- local_name = btf__name_by_offset(obj->btf,
- local_type->name_off);
- targ_name = btf__name_by_offset(obj->btf_vmlinux,
- targ_type->name_off);
-
- pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
- ext->name, local_type_id,
- btf_kind_str(local_type), local_name, targ_type_id,
- btf_kind_str(targ_type), targ_name);
- return -EINVAL;
+ if (obj->gen_loader) {
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = 0;
+ ext->ksym.kernel_btf_id = 0;
+ continue;
}
-
- ext->is_set = true;
- ext->ksym.vmlinux_btf_id = id;
- pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
- ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (btf_is_var(t))
+ err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+ else
+ err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
+ if (err)
+ return err;
}
return 0;
}
@@ -7173,29 +8241,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (ext->type == EXT_KCFG &&
- strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
- void *ext_val = kcfg_data + ext->kcfg.data_off;
- __u32 kver = get_kernel_version();
+ if (ext->type == EXT_KSYM) {
+ if (ext->ksym.type_id)
+ need_vmlinux_btf = true;
+ else
+ need_kallsyms = true;
+ continue;
+ } else if (ext->type == EXT_KCFG) {
+ void *ext_ptr = kcfg_data + ext->kcfg.data_off;
+ __u64 value = 0;
- if (!kver) {
- pr_warn("failed to get kernel version\n");
+ /* Kconfig externs need actual /proc/config.gz */
+ if (str_has_pfx(ext->name, "CONFIG_")) {
+ need_config = true;
+ continue;
+ }
+
+ /* Virtual kcfg externs are customly handled by libbpf */
+ if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ value = get_kernel_version();
+ if (!value) {
+ pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
+ return -EINVAL;
+ }
+ } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
+ value = kernel_supports(obj, FEAT_BPF_COOKIE);
+ } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
+ value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
+ } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
+ /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
+ * __kconfig externs, where LINUX_ ones are virtual and filled out
+ * customly by libbpf (their values don't come from Kconfig).
+ * If LINUX_xxx variable is not recognized by libbpf, but is marked
+ * __weak, it defaults to zero value, just like for CONFIG_xxx
+ * externs.
+ */
+ pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
return -EINVAL;
}
- err = set_kcfg_value_num(ext, ext_val, kver);
+
+ err = set_kcfg_value_num(ext, ext_ptr, value);
if (err)
return err;
- pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
- } else if (ext->type == EXT_KCFG &&
- strncmp(ext->name, "CONFIG_", 7) == 0) {
- need_config = true;
- } else if (ext->type == EXT_KSYM) {
- if (ext->ksym.type_id)
- need_vmlinux_btf = true;
- else
- need_kallsyms = true;
+ pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
+ ext->name, (long long)value);
} else {
- pr_warn("unrecognized extern '%s'\n", ext->name);
+ pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
return -EINVAL;
}
}
@@ -7231,10 +8322,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
ext = &obj->externs[i];
if (!ext->is_set && !ext->is_weak) {
- pr_warn("extern %s (strong) not resolved\n", ext->name);
+ pr_warn("extern '%s' (strong): not resolved\n", ext->name);
return -ESRCH;
} else if (!ext->is_set) {
- pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
+ pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
ext->name);
}
}
@@ -7242,32 +8333,95 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
return 0;
}
-int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
+static void bpf_map_prepare_vdata(const struct bpf_map *map)
+{
+ struct bpf_struct_ops *st_ops;
+ __u32 i;
+
+ st_ops = map->st_ops;
+ for (i = 0; i < btf_vlen(st_ops->type); i++) {
+ struct bpf_program *prog = st_ops->progs[i];
+ void *kern_data;
+ int prog_fd;
+
+ if (!prog)
+ continue;
+
+ prog_fd = bpf_program__fd(prog);
+ kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
+ *(unsigned long *)kern_data = prog_fd;
+ }
+}
+
+static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int i;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
+
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ if (!map->autocreate)
+ continue;
+
+ bpf_map_prepare_vdata(map);
+ }
+
+ return 0;
+}
+
+static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
{
- struct bpf_object *obj;
int err, i;
- if (!attr)
- return -EINVAL;
- obj = attr->obj;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (obj->loaded) {
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
- err = bpf_object__probe_loading(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj);
+ if (obj->gen_loader)
+ bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
+
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
- err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+ err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
- err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ err = err ? : bpf_object__load_progs(obj, extra_log_level);
+ err = err ? : bpf_object_init_prog_arrays(obj);
+ err = err ? : bpf_object_prepare_struct_ops(obj);
+
+ if (obj->gen_loader) {
+ /* reset FDs */
+ if (obj->btf)
+ btf__set_fd(obj->btf, -1);
+ if (!err)
+ err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
+ }
+ /* clean up fd_array */
+ zfree(&obj->fd_array);
+
+ /* clean up module BTFs */
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ close(obj->btf_modules[i].fd);
+ btf__free(obj->btf_modules[i].btf);
+ free(obj->btf_modules[i].name);
+ }
+ free(obj->btf_modules);
+
+ /* clean up vmlinux BTF */
btf__free(obj->btf_vmlinux);
obj->btf_vmlinux = NULL;
@@ -7283,18 +8437,14 @@ out:
if (obj->maps[i].pinned && !obj->maps[i].reused)
bpf_map__unpin(&obj->maps[i], NULL);
- bpf_object__unload(obj);
+ bpf_object_unload(obj);
pr_warn("failed to load object '%s'\n", obj->path);
- return err;
+ return libbpf_err(err);
}
int bpf_object__load(struct bpf_object *obj)
{
- struct bpf_object_load_attr attr = {
- .obj = obj,
- };
-
- return bpf_object__load_xattr(&attr);
+ return bpf_object_load(obj, 0, NULL);
}
static int make_parent_dir(const char *path)
@@ -7349,178 +8499,53 @@ static int check_path(const char *path)
return err;
}
-int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
- int instance)
+int bpf_program__pin(struct bpf_program *prog, const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
int err;
+ if (prog->fd < 0) {
+ pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
+ return libbpf_err(-EINVAL);
+ }
+
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return -EINVAL;
- }
-
- if (instance < 0 || instance >= prog->instances.nr) {
- pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->name, prog->instances.nr);
- return -EINVAL;
- }
-
- if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+ if (bpf_obj_pin(prog->fd, path)) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to pin program: %s\n", cp);
- return err;
+ pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
+ return libbpf_err(err);
}
- pr_debug("pinned program '%s'\n", path);
+ pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
return 0;
}
-int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
- int instance)
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
{
int err;
- err = check_path(path);
- if (err)
- return err;
-
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return -EINVAL;
- }
-
- if (instance < 0 || instance >= prog->instances.nr) {
- pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->name, prog->instances.nr);
- return -EINVAL;
+ if (prog->fd < 0) {
+ pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
+ return libbpf_err(-EINVAL);
}
- err = unlink(path);
- if (err != 0)
- return -errno;
- pr_debug("unpinned program '%s'\n", path);
-
- return 0;
-}
-
-int bpf_program__pin(struct bpf_program *prog, const char *path)
-{
- int i, err;
-
- err = make_parent_dir(path);
- if (err)
- return err;
-
err = check_path(path);
if (err)
- return err;
-
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return -EINVAL;
- }
-
- if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
- }
-
- if (prog->instances.nr == 1) {
- /* don't create subdirs when pinning single instance */
- return bpf_program__pin_instance(prog, path, 0);
- }
-
- for (i = 0; i < prog->instances.nr; i++) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
- goto err_unpin;
- }
-
- err = bpf_program__pin_instance(prog, buf, i);
- if (err)
- goto err_unpin;
- }
-
- return 0;
-
-err_unpin:
- for (i = i - 1; i >= 0; i--) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0)
- continue;
- else if (len >= PATH_MAX)
- continue;
-
- bpf_program__unpin_instance(prog, buf, i);
- }
-
- rmdir(path);
-
- return err;
-}
+ return libbpf_err(err);
-int bpf_program__unpin(struct bpf_program *prog, const char *path)
-{
- int i, err;
-
- err = check_path(path);
- if (err)
- return err;
-
- if (prog == NULL) {
- pr_warn("invalid program pointer\n");
- return -EINVAL;
- }
-
- if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
- }
-
- if (prog->instances.nr == 1) {
- /* don't create subdirs when pinning single instance */
- return bpf_program__unpin_instance(prog, path, 0);
- }
-
- for (i = 0; i < prog->instances.nr; i++) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
- if (len < 0)
- return -EINVAL;
- else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
-
- err = bpf_program__unpin_instance(prog, buf, i);
- if (err)
- return err;
- }
-
- err = rmdir(path);
+ err = unlink(path);
if (err)
- return -errno;
+ return libbpf_err(-errno);
+ pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
return 0;
}
@@ -7531,14 +8556,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
bpf_map__name(map), map->pin_path);
@@ -7548,10 +8573,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (!path) {
pr_warn("missing a path to pin map '%s' at\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
- return -EEXIST;
+ return libbpf_err(-EEXIST);
}
map->pin_path = strdup(path);
@@ -7563,11 +8588,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
err = make_parent_dir(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
if (bpf_obj_pin(map->fd, map->pin_path)) {
err = -errno;
@@ -7582,7 +8607,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
out_err:
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
pr_warn("failed to pin map: %s\n", cp);
- return err;
+ return libbpf_err(err);
}
int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -7591,29 +8616,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
path = map->pin_path;
} else if (!path) {
pr_warn("no path to unpin map '%s' from\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
err = unlink(path);
if (err != 0)
- return -errno;
+ return libbpf_err(-errno);
map->pinned = false;
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -7628,7 +8653,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
if (path) {
new = strdup(path);
if (!new)
- return -errno;
+ return libbpf_err(-errno);
}
free(map->pin_path);
@@ -7636,7 +8661,10 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
return 0;
}
-const char *bpf_map__get_pin_path(const struct bpf_map *map)
+__alias(bpf_map__pin_path)
+const char *bpf_map__get_pin_path(const struct bpf_map *map);
+
+const char *bpf_map__pin_path(const struct bpf_map *map)
{
return map->pin_path;
}
@@ -7646,35 +8674,41 @@ bool bpf_map__is_pinned(const struct bpf_map *map)
return map->pinned;
}
+static void sanitize_pin_path(char *s)
+{
+ /* bpffs disallows periods in path names */
+ while (*s) {
+ if (*s == '.')
+ *s = '_';
+ s++;
+ }
+}
+
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
{
struct bpf_map *map;
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_map(map, obj) {
char *pin_path = NULL;
char buf[PATH_MAX];
- if (path) {
- int len;
+ if (!map->autocreate)
+ continue;
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- bpf_map__name(map));
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin_maps;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
+ if (path) {
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
goto err_unpin_maps;
- }
+ sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
continue;
@@ -7688,14 +8722,14 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
return 0;
err_unpin_maps:
- while ((map = bpf_map__prev(map, obj))) {
+ while ((map = bpf_object__prev_map(obj, map))) {
if (!map->pin_path)
continue;
bpf_map__unpin(map, NULL);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -7704,21 +8738,17 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_map(map, obj) {
char *pin_path = NULL;
char buf[PATH_MAX];
if (path) {
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- bpf_map__name(map));
- if (len < 0)
- return -EINVAL;
- else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
+ return libbpf_err(err);
+ sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
continue;
@@ -7726,7 +8756,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
err = bpf_map__unpin(map, pin_path);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -7735,29 +8765,21 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
{
struct bpf_program *prog;
+ char buf[PATH_MAX];
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_program(prog, obj) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin_programs;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, prog->name);
+ if (err)
goto err_unpin_programs;
- }
err = bpf_program__pin(prog, buf);
if (err)
@@ -7767,21 +8789,14 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
return 0;
err_unpin_programs:
- while ((prog = bpf_program__prev(prog, obj))) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
- if (len < 0)
- continue;
- else if (len >= PATH_MAX)
+ while ((prog = bpf_object__prev_program(obj, prog))) {
+ if (pathname_concat(buf, sizeof(buf), path, prog->name))
continue;
bpf_program__unpin(prog, buf);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -7790,22 +8805,18 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
- int len;
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- prog->pin_name);
- if (len < 0)
- return -EINVAL;
- else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, prog->name);
+ if (err)
+ return libbpf_err(err);
err = bpf_program__unpin(prog, buf);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -7817,24 +8828,34 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
err = bpf_object__pin_maps(obj, path);
if (err)
- return err;
+ return libbpf_err(err);
err = bpf_object__pin_programs(obj, path);
if (err) {
bpf_object__unpin_maps(obj, path);
- return err;
+ return libbpf_err(err);
}
return 0;
}
-static void bpf_map__destroy(struct bpf_map *map)
+int bpf_object__unpin(struct bpf_object *obj, const char *path)
{
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- map->priv = NULL;
- map->clear_priv = NULL;
+ int err;
+
+ err = bpf_object__unpin_programs(obj, path);
+ if (err)
+ return libbpf_err(err);
+
+ err = bpf_object__unpin_maps(obj, path);
+ if (err)
+ return libbpf_err(err);
+
+ return 0;
+}
+static void bpf_map__destroy(struct bpf_map *map)
+{
if (map->inner_map) {
bpf_map__destroy(map->inner_map);
zfree(&map->inner_map);
@@ -7843,10 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map)
zfree(&map->init_slots);
map->init_slots_sz = 0;
- if (map->mmaped) {
+ if (map->mmaped && map->mmaped != map->obj->arena_data)
munmap(map->mmaped, bpf_map_mmap_sz(map));
- map->mmaped = NULL;
- }
+ map->mmaped = NULL;
if (map->st_ops) {
zfree(&map->st_ops->data);
@@ -7856,6 +8876,7 @@ static void bpf_map__destroy(struct bpf_map *map)
}
zfree(&map->name);
+ zfree(&map->real_name);
zfree(&map->pin_path);
if (map->fd >= 0)
@@ -7869,18 +8890,25 @@ void bpf_object__close(struct bpf_object *obj)
if (IS_ERR_OR_NULL(obj))
return;
- if (obj->clear_priv)
- obj->clear_priv(obj, obj->priv);
+ usdt_manager_free(obj->usdt_man);
+ obj->usdt_man = NULL;
+ bpf_gen__free(obj->gen_loader);
bpf_object__elf_finish(obj);
- bpf_object__unload(obj);
+ bpf_object_unload(obj);
btf__free(obj->btf);
+ btf__free(obj->btf_vmlinux);
btf_ext__free(obj->btf_ext);
for (i = 0; i < obj->nr_maps; i++)
bpf_map__destroy(&obj->maps[i]);
+ zfree(&obj->btf_custom_path);
zfree(&obj->kconfig);
+
+ for (i = 0; i < obj->nr_extern; i++)
+ zfree(&obj->externs[i].essent_name);
+
zfree(&obj->externs);
obj->nr_extern = 0;
@@ -7893,32 +8921,19 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
- list_del(&obj->list);
- free(obj);
-}
-
-struct bpf_object *
-bpf_object__next(struct bpf_object *prev)
-{
- struct bpf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ zfree(&obj->feat_cache);
+ zfree(&obj->token_path);
+ if (obj->token_fd > 0)
+ close(obj->token_fd);
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
- return NULL;
+ zfree(&obj->arena_data);
- return next;
+ free(obj);
}
const char *bpf_object__name(const struct bpf_object *obj)
{
- return obj ? obj->name : ERR_PTR(-EINVAL);
+ return obj ? obj->name : libbpf_err_ptr(-EINVAL);
}
unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -7936,20 +8951,30 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
return obj->btf ? btf__fd(obj->btf) : -1;
}
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
- bpf_object_clear_priv_t clear_priv)
+int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
{
- if (obj->priv && obj->clear_priv)
- obj->clear_priv(obj, obj->priv);
+ if (obj->loaded)
+ return libbpf_err(-EINVAL);
+
+ obj->kern_version = kern_version;
- obj->priv = priv;
- obj->clear_priv = clear_priv;
return 0;
}
-void *bpf_object__priv(const struct bpf_object *obj)
+int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
{
- return obj ? obj->priv : ERR_PTR(-EINVAL);
+ struct bpf_gen *gen;
+
+ if (!opts)
+ return -EFAULT;
+ if (!OPTS_VALID(opts, gen_loader_opts))
+ return -EINVAL;
+ gen = calloc(sizeof(*gen), 1);
+ if (!gen)
+ return -ENOMEM;
+ gen->opts = opts;
+ obj->gen_loader = gen;
+ return 0;
}
static struct bpf_program *
@@ -7969,7 +8994,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
if (p->obj != obj) {
pr_warn("error: program handler doesn't match object\n");
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -7979,7 +9004,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
}
struct bpf_program *
-bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
+bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
struct bpf_program *prog = prev;
@@ -7991,7 +9016,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
}
struct bpf_program *
-bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
+bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
{
struct bpf_program *prog = next;
@@ -8002,22 +9027,6 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
return prog;
}
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv)
-{
- if (prog->priv && prog->clear_priv)
- prog->clear_priv(prog, prog->priv);
-
- prog->priv = priv;
- prog->clear_priv = clear_priv;
- return 0;
-}
-
-void *bpf_program__priv(const struct bpf_program *prog)
-{
- return prog ? prog->priv : ERR_PTR(-EINVAL);
-}
-
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
{
prog->prog_ifindex = ifindex;
@@ -8033,369 +9042,441 @@ const char *bpf_program__section_name(const struct bpf_program *prog)
return prog->sec_name;
}
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
-{
- const char *title;
-
- title = prog->sec_name;
- if (needs_copy) {
- title = strdup(title);
- if (!title) {
- pr_warn("failed to strdup program title\n");
- return ERR_PTR(-ENOMEM);
- }
- }
-
- return title;
-}
-
bool bpf_program__autoload(const struct bpf_program *prog)
{
- return prog->load;
+ return prog->autoload;
}
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
{
if (prog->obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- prog->load = autoload;
+ prog->autoload = autoload;
return 0;
}
-int bpf_program__fd(const struct bpf_program *prog)
+bool bpf_program__autoattach(const struct bpf_program *prog)
{
- return bpf_program__nth_fd(prog, 0);
+ return prog->autoattach;
}
-size_t bpf_program__size(const struct bpf_program *prog)
+void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
{
- return prog->insns_cnt * BPF_INSN_SZ;
+ prog->autoattach = autoattach;
}
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
- bpf_program_prep_t prep)
+const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
{
- int *instances_fds;
+ return prog->insns;
+}
- if (nr_instances <= 0 || !prep)
- return -EINVAL;
+size_t bpf_program__insn_cnt(const struct bpf_program *prog)
+{
+ return prog->insns_cnt;
+}
- if (prog->instances.nr > 0 || prog->instances.fds) {
- pr_warn("Can't set pre-processor after loading\n");
- return -EINVAL;
- }
+int bpf_program__set_insns(struct bpf_program *prog,
+ struct bpf_insn *new_insns, size_t new_insn_cnt)
+{
+ struct bpf_insn *insns;
+
+ if (prog->obj->loaded)
+ return -EBUSY;
- instances_fds = malloc(sizeof(int) * nr_instances);
- if (!instances_fds) {
- pr_warn("alloc memory failed for fds\n");
+ insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
+ /* NULL is a valid return from reallocarray if the new count is zero */
+ if (!insns && new_insn_cnt) {
+ pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
return -ENOMEM;
}
+ memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
- /* fill all fd with -1 */
- memset(instances_fds, -1, sizeof(int) * nr_instances);
-
- prog->instances.nr = nr_instances;
- prog->instances.fds = instances_fds;
- prog->preprocessor = prep;
+ prog->insns = insns;
+ prog->insns_cnt = new_insn_cnt;
return 0;
}
-int bpf_program__nth_fd(const struct bpf_program *prog, int n)
+int bpf_program__fd(const struct bpf_program *prog)
{
- int fd;
-
if (!prog)
- return -EINVAL;
-
- if (n >= prog->instances.nr || n < 0) {
- pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
- n, prog->name, prog->instances.nr);
- return -EINVAL;
- }
+ return libbpf_err(-EINVAL);
- fd = prog->instances.fds[n];
- if (fd < 0) {
- pr_warn("%dth instance of program '%s' is invalid\n",
- n, prog->name);
- return -ENOENT;
- }
+ if (prog->fd < 0)
+ return libbpf_err(-ENOENT);
- return fd;
+ return prog->fd;
}
-enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+__alias(bpf_program__type)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+
+enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
{
return prog->type;
}
-void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+static size_t custom_sec_def_cnt;
+static struct bpf_sec_def *custom_sec_defs;
+static struct bpf_sec_def custom_fallback_def;
+static bool has_custom_fallback_def;
+static int last_custom_sec_def_handler_id;
+
+int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
+ /* if type is not changed, do nothing */
+ if (prog->type == type)
+ return 0;
+
prog->type = type;
+
+ /* If a program type was changed, we need to reset associated SEC()
+ * handler, as it will be invalid now. The only exception is a generic
+ * fallback handler, which by definition is program type-agnostic and
+ * is a catch-all custom handler, optionally set by the application,
+ * so should be able to handle any type of BPF program.
+ */
+ if (prog->sec_def != &custom_fallback_def)
+ prog->sec_def = NULL;
+ return 0;
}
-static bool bpf_program__is_type(const struct bpf_program *prog,
- enum bpf_prog_type type)
-{
- return prog ? (prog->type == type) : false;
-}
-
-#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
-int bpf_program__set_##NAME(struct bpf_program *prog) \
-{ \
- if (!prog) \
- return -EINVAL; \
- bpf_program__set_type(prog, TYPE); \
- return 0; \
-} \
- \
-bool bpf_program__is_##NAME(const struct bpf_program *prog) \
-{ \
- return bpf_program__is_type(prog, TYPE); \
-} \
-
-BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
-BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
-BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
-BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
-BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
-BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
-BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
-BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
-BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
-BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
-BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
-BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
-
-enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog)
+__alias(bpf_program__expected_attach_type)
+enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+
+enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
{
return prog->expected_attach_type;
}
-void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+int bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
prog->expected_attach_type = type;
+ return 0;
}
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \
- attachable, attach_btf) \
- { \
- .sec = string, \
- .len = sizeof(string) - 1, \
- .prog_type = ptype, \
- .expected_attach_type = eatype, \
- .is_exp_attach_type_optional = eatype_optional, \
- .is_attachable = attachable, \
- .is_attach_btf = attach_btf, \
- }
+__u32 bpf_program__flags(const struct bpf_program *prog)
+{
+ return prog->prog_flags;
+}
-/* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
+int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
+{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
-/* Programs that can be attached. */
-#define BPF_APROG_SEC(string, ptype, atype) \
- BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
+ prog->prog_flags = flags;
+ return 0;
+}
-/* Programs that must specify expected attach type at load time. */
-#define BPF_EAPROG_SEC(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
+__u32 bpf_program__log_level(const struct bpf_program *prog)
+{
+ return prog->log_level;
+}
-/* Programs that use BTF to identify attach point */
-#define BPF_PROG_BTF(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
+int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
+{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
-/* Programs that can be attached but attach type can't be identified by section
- * name. Kept for backward compatibility.
- */
-#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
+ prog->log_level = log_level;
+ return 0;
+}
-#define SEC_DEF(sec_pfx, ptype, ...) { \
- .sec = sec_pfx, \
- .len = sizeof(sec_pfx) - 1, \
+const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
+{
+ *log_size = prog->log_size;
+ return prog->log_buf;
+}
+
+int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
+{
+ if (log_size && !log_buf)
+ return -EINVAL;
+ if (prog->log_size > UINT_MAX)
+ return -EINVAL;
+ if (prog->obj->loaded)
+ return -EBUSY;
+
+ prog->log_buf = log_buf;
+ prog->log_size = log_size;
+ return 0;
+}
+
+#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
+ .sec = (char *)sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
+ .expected_attach_type = atype, \
+ .cookie = (long)(flags), \
+ .prog_prepare_load_fn = libbpf_prepare_prog_load, \
__VA_ARGS__ \
}
-static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
+static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static const struct bpf_sec_def section_defs[] = {
- BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
- BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
- SEC_DEF("kprobe/", KPROBE,
- .attach_fn = attach_kprobe),
- BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
- SEC_DEF("kretprobe/", KPROBE,
- .attach_fn = attach_kprobe),
- BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
- BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
- BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
- SEC_DEF("tracepoint/", TRACEPOINT,
- .attach_fn = attach_tp),
- SEC_DEF("tp/", TRACEPOINT,
- .attach_fn = attach_tp),
- SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
- .attach_fn = attach_raw_tp),
- SEC_DEF("raw_tp/", RAW_TRACEPOINT,
- .attach_fn = attach_raw_tp),
- SEC_DEF("tp_btf/", TRACING,
- .expected_attach_type = BPF_TRACE_RAW_TP,
- .is_attach_btf = true,
- .attach_fn = attach_trace),
- SEC_DEF("fentry/", TRACING,
- .expected_attach_type = BPF_TRACE_FENTRY,
- .is_attach_btf = true,
- .attach_fn = attach_trace),
- SEC_DEF("fmod_ret/", TRACING,
- .expected_attach_type = BPF_MODIFY_RETURN,
- .is_attach_btf = true,
- .attach_fn = attach_trace),
- SEC_DEF("fexit/", TRACING,
- .expected_attach_type = BPF_TRACE_FEXIT,
- .is_attach_btf = true,
- .attach_fn = attach_trace),
- SEC_DEF("fentry.s/", TRACING,
- .expected_attach_type = BPF_TRACE_FENTRY,
- .is_attach_btf = true,
- .is_sleepable = true,
- .attach_fn = attach_trace),
- SEC_DEF("fmod_ret.s/", TRACING,
- .expected_attach_type = BPF_MODIFY_RETURN,
- .is_attach_btf = true,
- .is_sleepable = true,
- .attach_fn = attach_trace),
- SEC_DEF("fexit.s/", TRACING,
- .expected_attach_type = BPF_TRACE_FEXIT,
- .is_attach_btf = true,
- .is_sleepable = true,
- .attach_fn = attach_trace),
- SEC_DEF("freplace/", EXT,
- .is_attach_btf = true,
- .attach_fn = attach_trace),
- SEC_DEF("lsm/", LSM,
- .is_attach_btf = true,
- .expected_attach_type = BPF_LSM_MAC,
- .attach_fn = attach_lsm),
- SEC_DEF("lsm.s/", LSM,
- .is_attach_btf = true,
- .is_sleepable = true,
- .expected_attach_type = BPF_LSM_MAC,
- .attach_fn = attach_lsm),
- SEC_DEF("iter/", TRACING,
- .expected_attach_type = BPF_TRACE_ITER,
- .is_attach_btf = true,
- .attach_fn = attach_iter),
- BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
- BPF_XDP_DEVMAP),
- BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
- BPF_XDP_CPUMAP),
- BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
- BPF_XDP),
- BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
- BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
- BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
- BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
- BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
- BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
- BPF_CGROUP_INET_INGRESS),
- BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
- BPF_CGROUP_INET_EGRESS),
- BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
- BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_CGROUP_INET_SOCK_CREATE),
- BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_CGROUP_INET_SOCK_RELEASE),
- BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_CGROUP_INET_SOCK_CREATE),
- BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_CGROUP_INET4_POST_BIND),
- BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_CGROUP_INET6_POST_BIND),
- BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE,
- BPF_CGROUP_DEVICE),
- BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS,
- BPF_CGROUP_SOCK_OPS),
- BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB,
- BPF_SK_SKB_STREAM_PARSER),
- BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB,
- BPF_SK_SKB_STREAM_VERDICT),
- BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB),
- BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG,
- BPF_SK_MSG_VERDICT),
- BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2,
- BPF_LIRC_MODE2),
- BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR,
- BPF_FLOW_DISSECTOR),
- BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET4_BIND),
- BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET6_BIND),
- BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET4_CONNECT),
- BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET6_CONNECT),
- BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_UDP4_SENDMSG),
- BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_UDP6_SENDMSG),
- BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_UDP4_RECVMSG),
- BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_UDP6_RECVMSG),
- BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET4_GETPEERNAME),
- BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET6_GETPEERNAME),
- BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET4_GETSOCKNAME),
- BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_CGROUP_INET6_GETSOCKNAME),
- BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
- BPF_CGROUP_SYSCTL),
- BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
- BPF_CGROUP_GETSOCKOPT),
- BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
- BPF_CGROUP_SETSOCKOPT),
- BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
- BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
- BPF_SK_LOOKUP),
+ SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
+ SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
+ SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
+ SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
+ SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
+ SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
+ SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
+ SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
+ SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
+ SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
+ SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
+ SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
+ SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
+ SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
+ SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
+ SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
+ SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
+ SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
+ SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
+ SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
+ SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
+ SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
+ SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
+ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
+ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
+ SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
+ SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
+ SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
+ SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
+ SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
+ SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
+ SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
+ SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
+ SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
+ SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
+ SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
+ SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
+ SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+ SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
+ SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+ SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
+ SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
+ SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
+ SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
+ SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+ SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
+ SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
+ SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
+ SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
+ SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
+ SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
+ SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
+ SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
+ SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
+ SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
+ SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
+ SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
+ SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
+ SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
+ SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
+ SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
+ SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
+ SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
};
-#undef BPF_PROG_SEC_IMPL
-#undef BPF_PROG_SEC
-#undef BPF_APROG_SEC
-#undef BPF_EAPROG_SEC
-#undef BPF_APROG_COMPAT
-#undef SEC_DEF
+int libbpf_register_prog_handler(const char *sec,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type exp_attach_type,
+ const struct libbpf_prog_handler_opts *opts)
+{
+ struct bpf_sec_def *sec_def;
+
+ if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
+ return libbpf_err(-EINVAL);
-#define MAX_TYPE_NAME_SIZE 32
+ if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
+ return libbpf_err(-E2BIG);
+
+ if (sec) {
+ sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
+ sizeof(*sec_def));
+ if (!sec_def)
+ return libbpf_err(-ENOMEM);
+
+ custom_sec_defs = sec_def;
+ sec_def = &custom_sec_defs[custom_sec_def_cnt];
+ } else {
+ if (has_custom_fallback_def)
+ return libbpf_err(-EBUSY);
+
+ sec_def = &custom_fallback_def;
+ }
+
+ sec_def->sec = sec ? strdup(sec) : NULL;
+ if (sec && !sec_def->sec)
+ return libbpf_err(-ENOMEM);
+
+ sec_def->prog_type = prog_type;
+ sec_def->expected_attach_type = exp_attach_type;
+ sec_def->cookie = OPTS_GET(opts, cookie, 0);
+
+ sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
+ sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
+ sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
+
+ sec_def->handler_id = ++last_custom_sec_def_handler_id;
+
+ if (sec)
+ custom_sec_def_cnt++;
+ else
+ has_custom_fallback_def = true;
+
+ return sec_def->handler_id;
+}
+
+int libbpf_unregister_prog_handler(int handler_id)
+{
+ struct bpf_sec_def *sec_defs;
+ int i;
+
+ if (handler_id <= 0)
+ return libbpf_err(-EINVAL);
+
+ if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
+ memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
+ has_custom_fallback_def = false;
+ return 0;
+ }
+
+ for (i = 0; i < custom_sec_def_cnt; i++) {
+ if (custom_sec_defs[i].handler_id == handler_id)
+ break;
+ }
+
+ if (i == custom_sec_def_cnt)
+ return libbpf_err(-ENOENT);
+
+ free(custom_sec_defs[i].sec);
+ for (i = i + 1; i < custom_sec_def_cnt; i++)
+ custom_sec_defs[i - 1] = custom_sec_defs[i];
+ custom_sec_def_cnt--;
+
+ /* try to shrink the array, but it's ok if we couldn't */
+ sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (sec_defs || custom_sec_def_cnt == 0)
+ custom_sec_defs = sec_defs;
+
+ return 0;
+}
+
+static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
+{
+ size_t len = strlen(sec_def->sec);
+
+ /* "type/" always has to have proper SEC("type/extras") form */
+ if (sec_def->sec[len - 1] == '/') {
+ if (str_has_pfx(sec_name, sec_def->sec))
+ return true;
+ return false;
+ }
+
+ /* "type+" means it can be either exact SEC("type") or
+ * well-formed SEC("type/extras") with proper '/' separator
+ */
+ if (sec_def->sec[len - 1] == '+') {
+ len--;
+ /* not even a prefix */
+ if (strncmp(sec_name, sec_def->sec, len) != 0)
+ return false;
+ /* exact match or has '/' separator */
+ if (sec_name[len] == '\0' || sec_name[len] == '/')
+ return true;
+ return false;
+ }
+
+ return strcmp(sec_name, sec_def->sec) == 0;
+}
static const struct bpf_sec_def *find_sec_def(const char *sec_name)
{
- int i, n = ARRAY_SIZE(section_defs);
+ const struct bpf_sec_def *sec_def;
+ int i, n;
+ n = custom_sec_def_cnt;
for (i = 0; i < n; i++) {
- if (strncmp(sec_name,
- section_defs[i].sec, section_defs[i].len))
- continue;
- return &section_defs[i];
+ sec_def = &custom_sec_defs[i];
+ if (sec_def_matches(sec_def, sec_name))
+ return sec_def;
+ }
+
+ n = ARRAY_SIZE(section_defs);
+ for (i = 0; i < n; i++) {
+ sec_def = &section_defs[i];
+ if (sec_def_matches(sec_def, sec_name))
+ return sec_def;
}
+
+ if (has_custom_fallback_def)
+ return &custom_fallback_def;
+
return NULL;
}
+#define MAX_TYPE_NAME_SIZE 32
+
static char *libbpf_get_type_names(bool attach_type)
{
int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
@@ -8408,8 +9489,15 @@ static char *libbpf_get_type_names(bool attach_type)
buf[0] = '\0';
/* Forge string buf with all available names */
for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
- if (attach_type && !section_defs[i].is_attachable)
- continue;
+ const struct bpf_sec_def *sec_def = &section_defs[i];
+
+ if (attach_type) {
+ if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
+ continue;
+
+ if (!(sec_def->cookie & SEC_ATTACHABLE))
+ continue;
+ }
if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
free(buf);
@@ -8429,7 +9517,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
char *type_names;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
sec_def = find_sec_def(name);
if (sec_def) {
@@ -8445,10 +9533,43 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
free(type_names);
}
- return -ESRCH;
+ return libbpf_err(-ESRCH);
+}
+
+const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
+ return NULL;
+
+ return attach_type_name[t];
+}
+
+const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(link_type_name))
+ return NULL;
+
+ return link_type_name[t];
+}
+
+const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(map_type_name))
+ return NULL;
+
+ return map_type_name[t];
+}
+
+const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
+{
+ if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
+ return NULL;
+
+ return prog_type_name[t];
}
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
+ int sec_idx,
size_t offset)
{
struct bpf_map *map;
@@ -8458,7 +9579,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
map = &obj->maps[i];
if (!bpf_map__is_struct_ops(map))
continue;
- if (map->sec_offset <= offset &&
+ if (map->sec_idx == sec_idx &&
+ map->sec_offset <= offset &&
offset - map->sec_offset < map->def.value_size)
return map;
}
@@ -8466,9 +9588,11 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
return NULL;
}
-/* Collect the reloc from ELF and populate the st_ops->progs[] */
+/* Collect the reloc from ELF, populate the st_ops->progs[], and update
+ * st_ops->data for shadow type.
+ */
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
- GElf_Shdr *shdr, Elf_Data *data)
+ Elf64_Shdr *shdr, Elf_Data *data)
{
const struct btf_member *member;
struct bpf_struct_ops *st_ops;
@@ -8476,58 +9600,58 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
unsigned int shdr_idx;
const struct btf *btf;
struct bpf_map *map;
- Elf_Data *symbols;
unsigned int moff, insn_idx;
const char *name;
__u32 member_idx;
- GElf_Sym sym;
- GElf_Rel rel;
+ Elf64_Sym *sym;
+ Elf64_Rel *rel;
int i, nrels;
- symbols = obj->efile.symbols;
btf = obj->btf;
nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
- if (!gelf_getrel(data, i, &rel)) {
+ rel = elf_rel_by_idx(data, i);
+ if (!rel) {
pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
+ if (!sym) {
pr_warn("struct_ops reloc: symbol %zx not found\n",
- (size_t)GELF_R_SYM(rel.r_info));
+ (size_t)ELF64_R_SYM(rel->r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_sym_str(obj, sym.st_name) ?: "<?>";
- map = find_struct_ops_map_by_offset(obj, rel.r_offset);
+ name = elf_sym_str(obj, sym->st_name) ?: "<?>";
+ map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
if (!map) {
- pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
- (size_t)rel.r_offset);
+ pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
+ (size_t)rel->r_offset);
return -EINVAL;
}
- moff = rel.r_offset - map->sec_offset;
- shdr_idx = sym.st_shndx;
+ moff = rel->r_offset - map->sec_offset;
+ shdr_idx = sym->st_shndx;
st_ops = map->st_ops;
- pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
+ pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
map->name,
- (long long)(rel.r_info >> 32),
- (long long)sym.st_value,
- shdr_idx, (size_t)rel.r_offset,
- map->sec_offset, sym.st_name, name);
+ (long long)(rel->r_info >> 32),
+ (long long)sym->st_value,
+ shdr_idx, (size_t)rel->r_offset,
+ map->sec_offset, sym->st_name, name);
if (shdr_idx >= SHN_LORESERVE) {
- pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
- map->name, (size_t)rel.r_offset, shdr_idx);
+ pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
+ map->name, (size_t)rel->r_offset, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
- if (sym.st_value % BPF_INSN_SZ) {
+ if (sym->st_value % BPF_INSN_SZ) {
pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
- map->name, (unsigned long long)sym.st_value);
+ map->name, (unsigned long long)sym->st_value);
return -LIBBPF_ERRNO__FORMAT;
}
- insn_idx = sym.st_value / BPF_INSN_SZ;
+ insn_idx = sym->st_value / BPF_INSN_SZ;
member = find_member_by_offset(st_ops->type, moff * 8);
if (!member) {
@@ -8551,35 +9675,25 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -EINVAL;
}
- if (prog->type == BPF_PROG_TYPE_UNSPEC) {
- const struct bpf_sec_def *sec_def;
-
- sec_def = find_sec_def(prog->sec_name);
- if (sec_def &&
- sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
- /* for pr_warn */
- prog->type = sec_def->prog_type;
- goto invalid_prog;
- }
-
- prog->type = BPF_PROG_TYPE_STRUCT_OPS;
- prog->attach_btf_id = st_ops->type_id;
- prog->expected_attach_type = member_idx;
- } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
- prog->attach_btf_id != st_ops->type_id ||
- prog->expected_attach_type != member_idx) {
- goto invalid_prog;
+ /* prevent the use of BPF prog with invalid type */
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
+ pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
+ map->name, prog->name);
+ return -EINVAL;
}
+
st_ops->progs[member_idx] = prog;
+
+ /* st_ops->data will be exposed to users, being returned by
+ * bpf_map__initial_value() as a pointer to the shadow
+ * type. All function pointers in the original struct type
+ * should be converted to a pointer to struct bpf_program
+ * in the shadow type.
+ */
+ *((struct bpf_program **)(st_ops->data + moff)) = prog;
}
return 0;
-
-invalid_prog:
- pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->sec_name, prog->type,
- prog->attach_btf_id, prog->expected_attach_type, name);
- return -EINVAL;
}
#define BTF_TRACE_PREFIX "btf_trace_"
@@ -8587,6 +9701,29 @@ invalid_prog:
#define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_MAX_NAME_SIZE 128
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind)
+{
+ switch (attach_type) {
+ case BPF_TRACE_RAW_TP:
+ *prefix = BTF_TRACE_PREFIX;
+ *kind = BTF_KIND_TYPEDEF;
+ break;
+ case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP:
+ *prefix = BTF_LSM_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ case BPF_TRACE_ITER:
+ *prefix = BTF_ITER_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ default:
+ *prefix = "";
+ *kind = BTF_KIND_FUNC;
+ }
+}
+
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind)
{
@@ -8596,7 +9733,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
ret = snprintf(btf_type_name, sizeof(btf_type_name),
"%s%s", prefix, name);
/* snprintf returns the number of characters written excluding the
- * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+ * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
* indicates truncation.
*/
if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -8604,27 +9741,14 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
return btf__find_by_name_kind(btf, btf_type_name, kind);
}
-static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
- enum bpf_attach_type attach_type)
+static inline int find_attach_btf_id(struct btf *btf, const char *name,
+ enum bpf_attach_type attach_type)
{
- int err;
-
- if (attach_type == BPF_TRACE_RAW_TP)
- err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
- BTF_KIND_TYPEDEF);
- else if (attach_type == BPF_LSM_MAC)
- err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
- BTF_KIND_FUNC);
- else if (attach_type == BPF_TRACE_ITER)
- err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
- BTF_KIND_FUNC);
- else
- err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
-
- if (err <= 0)
- pr_warn("%s is not found in vmlinux BTF\n", name);
+ const char *prefix;
+ int kind;
- return err;
+ btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
+ return find_btf_by_prefix_kind(btf, prefix, name, kind);
}
int libbpf_find_vmlinux_btf_id(const char *name,
@@ -8633,37 +9757,45 @@ int libbpf_find_vmlinux_btf_id(const char *name,
struct btf *btf;
int err;
- btf = libbpf_find_kernel_btf();
- if (IS_ERR(btf)) {
+ btf = btf__load_vmlinux_btf();
+ err = libbpf_get_error(btf);
+ if (err) {
pr_warn("vmlinux BTF is not found\n");
- return -EINVAL;
+ return libbpf_err(err);
}
- err = __find_vmlinux_btf_id(btf, name, attach_type);
+ err = find_attach_btf_id(btf, name, attach_type);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+
btf__free(btf);
- return err;
+ return libbpf_err(err);
}
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
{
- struct bpf_prog_info_linear *info_linear;
- struct bpf_prog_info *info;
- struct btf *btf = NULL;
- int err = -EINVAL;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ struct btf *btf;
+ int err;
- info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
- if (IS_ERR_OR_NULL(info_linear)) {
- pr_warn("failed get_prog_info_linear for FD %d\n",
- attach_prog_fd);
- return -EINVAL;
+ memset(&info, 0, info_len);
+ err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
+ if (err) {
+ pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
+ attach_prog_fd, err);
+ return err;
}
- info = &info_linear->info;
- if (!info->btf_id) {
+
+ err = -EINVAL;
+ if (!info.btf_id) {
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
- pr_warn("Failed to get BTF of the program\n");
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
goto out;
}
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
@@ -8673,78 +9805,151 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
goto out;
}
out:
- free(info_linear);
return err;
}
-static int libbpf_find_attach_btf_id(struct bpf_program *prog)
+static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
+ enum bpf_attach_type attach_type,
+ int *btf_obj_fd, int *btf_type_id)
+{
+ int ret, i;
+
+ ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
+ if (ret > 0) {
+ *btf_obj_fd = 0; /* vmlinux BTF */
+ *btf_type_id = ret;
+ return 0;
+ }
+ if (ret != -ENOENT)
+ return ret;
+
+ ret = load_module_btfs(obj);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ const struct module_btf *mod = &obj->btf_modules[i];
+
+ ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
+ if (ret > 0) {
+ *btf_obj_fd = mod->fd;
+ *btf_type_id = ret;
+ return 0;
+ }
+ if (ret == -ENOENT)
+ continue;
+
+ return ret;
+ }
+
+ return -ESRCH;
+}
+
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
+ int *btf_obj_fd, int *btf_type_id)
{
enum bpf_attach_type attach_type = prog->expected_attach_type;
__u32 attach_prog_fd = prog->attach_prog_fd;
- const char *name = prog->sec_name;
- int i, err;
+ int err = 0;
- if (!name)
- return -EINVAL;
+ /* BPF program's BTF ID */
+ if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
+ if (!attach_prog_fd) {
+ pr_warn("prog '%s': attach program FD is not set\n", prog->name);
+ return -EINVAL;
+ }
+ err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+ if (err < 0) {
+ pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
+ prog->name, attach_prog_fd, attach_name, err);
+ return err;
+ }
+ *btf_obj_fd = 0;
+ *btf_type_id = err;
+ return 0;
+ }
- for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
- if (!section_defs[i].is_attach_btf)
- continue;
- if (strncmp(name, section_defs[i].sec, section_defs[i].len))
- continue;
- if (attach_prog_fd)
- err = libbpf_find_prog_btf_id(name + section_defs[i].len,
- attach_prog_fd);
- else
- err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
- name + section_defs[i].len,
- attach_type);
+ /* kernel/module BTF ID */
+ if (prog->obj->gen_loader) {
+ bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
+ *btf_obj_fd = 0;
+ *btf_type_id = 1;
+ } else {
+ err = find_kernel_btf_id(prog->obj, attach_name,
+ attach_type, btf_obj_fd,
+ btf_type_id);
+ }
+ if (err) {
+ pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
+ prog->name, attach_name, err);
return err;
}
- pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
- return -ESRCH;
+ return 0;
}
int libbpf_attach_type_by_name(const char *name,
enum bpf_attach_type *attach_type)
{
char *type_names;
- int i;
+ const struct bpf_sec_def *sec_def;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
- if (strncmp(name, section_defs[i].sec, section_defs[i].len))
- continue;
- if (!section_defs[i].is_attachable)
- return -EINVAL;
- *attach_type = section_defs[i].expected_attach_type;
- return 0;
- }
- pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
- type_names = libbpf_get_type_names(true);
- if (type_names != NULL) {
- pr_debug("attachable section(type) names are:%s\n", type_names);
- free(type_names);
+ sec_def = find_sec_def(name);
+ if (!sec_def) {
+ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
+ type_names = libbpf_get_type_names(true);
+ if (type_names != NULL) {
+ pr_debug("attachable section(type) names are:%s\n", type_names);
+ free(type_names);
+ }
+
+ return libbpf_err(-EINVAL);
}
- return -EINVAL;
+ if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
+ return libbpf_err(-EINVAL);
+ if (!(sec_def->cookie & SEC_ATTACHABLE))
+ return libbpf_err(-EINVAL);
+
+ *attach_type = sec_def->expected_attach_type;
+ return 0;
}
int bpf_map__fd(const struct bpf_map *map)
{
- return map ? map->fd : -EINVAL;
+ if (!map)
+ return libbpf_err(-EINVAL);
+ if (!map_is_created(map))
+ return -1;
+ return map->fd;
}
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
+static bool map_uses_real_name(const struct bpf_map *map)
{
- return map ? &map->def : ERR_PTR(-EINVAL);
+ /* Since libbpf started to support custom .data.* and .rodata.* maps,
+ * their user-visible name differs from kernel-visible name. Users see
+ * such map's corresponding ELF section name as a map name.
+ * This check distinguishes .data/.rodata from .data.* and .rodata.*
+ * maps to know which name has to be returned to the user.
+ */
+ if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
+ return true;
+ if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
+ return true;
+ return false;
}
const char *bpf_map__name(const struct bpf_map *map)
{
- return map ? map->name : NULL;
+ if (!map)
+ return NULL;
+
+ if (map_uses_real_name(map))
+ return map->real_name;
+
+ return map->name;
}
enum bpf_map_type bpf_map__type(const struct bpf_map *map)
@@ -8754,8 +9959,8 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
map->def.type = type;
return 0;
}
@@ -8767,12 +9972,25 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
map->def.map_flags = flags;
return 0;
}
+__u64 bpf_map__map_extra(const struct bpf_map *map)
+{
+ return map->map_extra;
+}
+
+int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
+{
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
+ map->map_extra = map_extra;
+ return 0;
+}
+
__u32 bpf_map__numa_node(const struct bpf_map *map)
{
return map->numa_node;
@@ -8780,8 +9998,8 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
map->numa_node = numa_node;
return 0;
}
@@ -8793,8 +10011,8 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
map->def.key_size = size;
return 0;
}
@@ -8804,10 +10022,106 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
return map->def.value_size;
}
+static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
+{
+ struct btf *btf;
+ struct btf_type *datasec_type, *var_type;
+ struct btf_var_secinfo *var;
+ const struct btf_type *array_type;
+ const struct btf_array *array;
+ int vlen, element_sz, new_array_id;
+ __u32 nr_elements;
+
+ /* check btf existence */
+ btf = bpf_object__btf(map->obj);
+ if (!btf)
+ return -ENOENT;
+
+ /* verify map is datasec */
+ datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
+ if (!btf_is_datasec(datasec_type)) {
+ pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify datasec has at least one var */
+ vlen = btf_vlen(datasec_type);
+ if (vlen == 0) {
+ pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify last var in the datasec is an array */
+ var = &btf_var_secinfos(datasec_type)[vlen - 1];
+ var_type = btf_type_by_id(btf, var->type);
+ array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
+ if (!btf_is_array(array_type)) {
+ pr_warn("map '%s': cannot be resized, last var must be an array\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify request size aligns with array */
+ array = btf_array(array_type);
+ element_sz = btf__resolve_size(btf, array->type);
+ if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
+ pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
+ bpf_map__name(map), element_sz, size);
+ return -EINVAL;
+ }
+
+ /* create a new array based on the existing array, but with new length */
+ nr_elements = (size - var->offset) / element_sz;
+ new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
+ if (new_array_id < 0)
+ return new_array_id;
+
+ /* adding a new btf type invalidates existing pointers to btf objects,
+ * so refresh pointers before proceeding
+ */
+ datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
+ var = &btf_var_secinfos(datasec_type)[vlen - 1];
+ var_type = btf_type_by_id(btf, var->type);
+
+ /* finally update btf info */
+ datasec_type->size = size;
+ var->size = size - var->offset;
+ var_type->type = new_array_id;
+
+ return 0;
+}
+
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map->obj->loaded || map->reused)
+ return libbpf_err(-EBUSY);
+
+ if (map->mmaped) {
+ size_t mmap_old_sz, mmap_new_sz;
+ int err;
+
+ if (map->def.type != BPF_MAP_TYPE_ARRAY)
+ return -EOPNOTSUPP;
+
+ mmap_old_sz = bpf_map_mmap_sz(map);
+ mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
+ err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
+ if (err) {
+ pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
+ bpf_map__name(map), err);
+ return err;
+ }
+ err = map_btf_datasec_resize(map, size);
+ if (err && err != -ENOENT) {
+ pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
+ bpf_map__name(map), err);
+ map->btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ }
+ }
+
map->def.value_size = size;
return 0;
}
@@ -8822,41 +10136,45 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
return map ? map->btf_value_type_id : 0;
}
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv)
+int bpf_map__set_initial_value(struct bpf_map *map,
+ const void *data, size_t size)
{
- if (!map)
- return -EINVAL;
+ size_t actual_sz;
- if (map->priv) {
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- }
+ if (map->obj->loaded || map->reused)
+ return libbpf_err(-EBUSY);
- map->priv = priv;
- map->clear_priv = clear_priv;
- return 0;
-}
-
-void *bpf_map__priv(const struct bpf_map *map)
-{
- return map ? map->priv : ERR_PTR(-EINVAL);
-}
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+ return libbpf_err(-EINVAL);
-int bpf_map__set_initial_value(struct bpf_map *map,
- const void *data, size_t size)
-{
- if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
- size != map->def.value_size || map->fd >= 0)
- return -EINVAL;
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ actual_sz = map->obj->arena_data_sz;
+ else
+ actual_sz = map->def.value_size;
+ if (size != actual_sz)
+ return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size);
return 0;
}
-bool bpf_map__is_offload_neutral(const struct bpf_map *map)
+void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
{
- return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+ if (bpf_map__is_struct_ops(map)) {
+ if (psize)
+ *psize = map->def.value_size;
+ return map->st_ops->data;
+ }
+
+ if (!map->mmaped)
+ return NULL;
+
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ *psize = map->obj->arena_data_sz;
+ else
+ *psize = map->def.value_size;
+
+ return map->mmaped;
}
bool bpf_map__is_internal(const struct bpf_map *map)
@@ -8871,8 +10189,8 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
{
- if (map->fd >= 0)
- return -EBUSY;
+ if (map_is_created(map))
+ return libbpf_err(-EBUSY);
map->map_ifindex = ifindex;
return 0;
}
@@ -8881,11 +10199,15 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
{
if (!bpf_map_type__is_map_in_map(map->def.type)) {
pr_warn("error: unsupported map type\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->inner_map_fd != -1) {
pr_warn("error: inner_map_fd already specified\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
+ }
+ if (map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
}
map->inner_map_fd = fd;
return 0;
@@ -8898,7 +10220,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
struct bpf_map *s, *e;
if (!obj || !obj->maps)
- return NULL;
+ return errno = EINVAL, NULL;
s = obj->maps;
e = obj->maps + obj->nr_maps;
@@ -8906,7 +10228,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
if ((m < s) || (m >= e)) {
pr_warn("error in %s: map handler doesn't belong to object\n",
__func__);
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (m - obj->maps) + i;
@@ -8916,7 +10238,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
}
struct bpf_map *
-bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
+bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
if (prev == NULL)
return obj->maps;
@@ -8925,7 +10247,7 @@ bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
}
struct bpf_map *
-bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
+bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
{
if (next == NULL) {
if (!obj->nr_maps)
@@ -8942,10 +10264,25 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
struct bpf_map *pos;
bpf_object__for_each_map(pos, obj) {
- if (pos->name && !strcmp(pos->name, name))
+ /* if it's a special internal map name (which always starts
+ * with dot) then check if that special name matches the
+ * real map name (ELF section name)
+ */
+ if (name[0] == '.') {
+ if (pos->real_name && strcmp(pos->real_name, name) == 0)
+ return pos;
+ continue;
+ }
+ /* otherwise map name has to be an exact match */
+ if (map_uses_real_name(pos)) {
+ if (strcmp(pos->real_name, name) == 0)
+ return pos;
+ continue;
+ }
+ if (strcmp(pos->name, name) == 0)
return pos;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
int
@@ -8954,113 +10291,133 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
}
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+static int validate_map_op(const struct bpf_map *map, size_t key_sz,
+ size_t value_sz, bool check_value_sz)
{
- return ERR_PTR(-ENOTSUP);
+ if (!map_is_created(map)) /* map is not yet created */
+ return -ENOENT;
+
+ if (map->def.key_size != key_sz) {
+ pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
+ map->name, key_sz, map->def.key_size);
+ return -EINVAL;
+ }
+
+ if (!check_value_sz)
+ return 0;
+
+ switch (map->def.type) {
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
+ int num_cpu = libbpf_num_possible_cpus();
+ size_t elem_sz = roundup(map->def.value_size, 8);
+
+ if (value_sz != num_cpu * elem_sz) {
+ pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
+ map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
+ return -EINVAL;
+ }
+ break;
+ }
+ default:
+ if (map->def.value_size != value_sz) {
+ pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
+ map->name, value_sz, map->def.value_size);
+ return -EINVAL;
+ }
+ break;
+ }
+ return 0;
}
-long libbpf_get_error(const void *ptr)
+int bpf_map__lookup_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ void *value, size_t value_sz, __u64 flags)
{
- return PTR_ERR_OR_ZERO(ptr);
+ int err;
+
+ err = validate_map_op(map, key_sz, value_sz, true);
+ if (err)
+ return libbpf_err(err);
+
+ return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
}
-int bpf_prog_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
+int bpf_map__update_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ const void *value, size_t value_sz, __u64 flags)
{
- struct bpf_prog_load_attr attr;
+ int err;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
+ err = validate_map_op(map, key_sz, value_sz, true);
+ if (err)
+ return libbpf_err(err);
- return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+ return bpf_map_update_elem(map->fd, key, value, flags);
}
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd)
+int bpf_map__delete_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz, __u64 flags)
{
- struct bpf_object_open_attr open_attr = {};
- struct bpf_program *prog, *first_prog = NULL;
- struct bpf_object *obj;
- struct bpf_map *map;
int err;
- if (!attr)
- return -EINVAL;
- if (!attr->file)
- return -EINVAL;
-
- open_attr.file = attr->file;
- open_attr.prog_type = attr->prog_type;
+ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+ if (err)
+ return libbpf_err(err);
- obj = bpf_object__open_xattr(&open_attr);
- if (IS_ERR_OR_NULL(obj))
- return -ENOENT;
+ return bpf_map_delete_elem_flags(map->fd, key, flags);
+}
- bpf_object__for_each_program(prog, obj) {
- enum bpf_attach_type attach_type = attr->expected_attach_type;
- /*
- * to preserve backwards compatibility, bpf_prog_load treats
- * attr->prog_type, if specified, as an override to whatever
- * bpf_object__open guessed
- */
- if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
- bpf_program__set_type(prog, attr->prog_type);
- bpf_program__set_expected_attach_type(prog,
- attach_type);
- }
- if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
- /*
- * we haven't guessed from section name and user
- * didn't provide a fallback type, too bad...
- */
- bpf_object__close(obj);
- return -EINVAL;
- }
+int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ void *value, size_t value_sz, __u64 flags)
+{
+ int err;
- prog->prog_ifindex = attr->ifindex;
- prog->log_level = attr->log_level;
- prog->prog_flags |= attr->prog_flags;
- if (!first_prog)
- first_prog = prog;
- }
+ err = validate_map_op(map, key_sz, value_sz, true);
+ if (err)
+ return libbpf_err(err);
- bpf_object__for_each_map(map, obj) {
- if (!bpf_map__is_offload_neutral(map))
- map->map_ifindex = attr->ifindex;
- }
+ return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
+}
- if (!first_prog) {
- pr_warn("object file doesn't contain bpf program\n");
- bpf_object__close(obj);
- return -ENOENT;
- }
+int bpf_map__get_next_key(const struct bpf_map *map,
+ const void *cur_key, void *next_key, size_t key_sz)
+{
+ int err;
- err = bpf_object__load(obj);
- if (err) {
- bpf_object__close(obj);
- return err;
- }
+ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+ if (err)
+ return libbpf_err(err);
- *pobj = obj;
- *prog_fd = bpf_program__fd(first_prog);
- return 0;
+ return bpf_map_get_next_key(map->fd, cur_key, next_key);
}
-struct bpf_link {
- int (*detach)(struct bpf_link *link);
- int (*destroy)(struct bpf_link *link);
- char *pin_path; /* NULL, if not pinned */
- int fd; /* hook FD, -1 if not applicable */
- bool disconnected;
-};
+long libbpf_get_error(const void *ptr)
+{
+ if (!IS_ERR_OR_NULL(ptr))
+ return 0;
+
+ if (IS_ERR(ptr))
+ errno = -PTR_ERR(ptr);
+
+ /* If ptr == NULL, then errno should be already set by the failing
+ * API, because libbpf never returns NULL on success and it now always
+ * sets errno on error. So no extra errno handling for ptr == NULL
+ * case.
+ */
+ return -errno;
+}
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
- return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ int ret;
+
+ ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ return libbpf_err_errno(ret);
}
/* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -9087,13 +10444,14 @@ int bpf_link__destroy(struct bpf_link *link)
if (!link->disconnected && link->detach)
err = link->detach(link);
- if (link->destroy)
- link->destroy(link);
if (link->pin_path)
free(link->pin_path);
- free(link);
+ if (link->dealloc)
+ link->dealloc(link);
+ else
+ free(link);
- return err;
+ return libbpf_err(err);
}
int bpf_link__fd(const struct bpf_link *link)
@@ -9108,7 +10466,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
static int bpf_link__detach_fd(struct bpf_link *link)
{
- return close(link->fd);
+ return libbpf_err_errno(close(link->fd));
}
struct bpf_link *bpf_link__open(const char *path)
@@ -9120,13 +10478,13 @@ struct bpf_link *bpf_link__open(const char *path)
if (fd < 0) {
fd = -errno;
pr_warn("failed to open link at %s: %d\n", path, fd);
- return ERR_PTR(fd);
+ return libbpf_err_ptr(fd);
}
link = calloc(1, sizeof(*link));
if (!link) {
close(fd);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
link->detach = &bpf_link__detach_fd;
link->fd = fd;
@@ -9134,7 +10492,7 @@ struct bpf_link *bpf_link__open(const char *path)
link->pin_path = strdup(path);
if (!link->pin_path) {
bpf_link__destroy(link);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
return link;
@@ -9150,22 +10508,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
int err;
if (link->pin_path)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
link->pin_path = strdup(path);
if (!link->pin_path)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (bpf_obj_pin(link->fd, link->pin_path)) {
err = -errno;
zfree(&link->pin_path);
- return err;
+ return libbpf_err(err);
}
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -9177,7 +10535,7 @@ int bpf_link__unpin(struct bpf_link *link)
int err;
if (!link->pin_path)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = unlink(link->pin_path);
if (err != 0)
@@ -9188,61 +10546,132 @@ int bpf_link__unpin(struct bpf_link *link)
return 0;
}
-static int bpf_link__detach_perf_event(struct bpf_link *link)
+struct bpf_link_perf {
+ struct bpf_link link;
+ int perf_event_fd;
+ /* legacy kprobe support: keep track of probe identifier and type */
+ char *legacy_probe_name;
+ bool legacy_is_kprobe;
+ bool legacy_is_retprobe;
+};
+
+static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
+static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
+
+static int bpf_link_perf_detach(struct bpf_link *link)
{
- int err;
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+ int err = 0;
- err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
- if (err)
+ if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
err = -errno;
+ if (perf_link->perf_event_fd != link->fd)
+ close(perf_link->perf_event_fd);
close(link->fd);
+
+ /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
+ if (perf_link->legacy_probe_name) {
+ if (perf_link->legacy_is_kprobe) {
+ err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
+ perf_link->legacy_is_retprobe);
+ } else {
+ err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
+ perf_link->legacy_is_retprobe);
+ }
+ }
+
return err;
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
- int pfd)
+static void bpf_link_perf_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ free(perf_link->legacy_probe_name);
+ free(perf_link);
+}
+
+struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link *link;
- int prog_fd, err;
+ struct bpf_link_perf *link;
+ int prog_fd, link_fd = -1, err;
+ bool force_ioctl_attach;
+
+ if (!OPTS_VALID(opts, bpf_perf_event_opts))
+ return libbpf_err_ptr(-EINVAL);
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
prog->name, pfd);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
- link->detach = &bpf_link__detach_perf_event;
- link->fd = pfd;
+ return libbpf_err_ptr(-ENOMEM);
+ link->link.detach = &bpf_link_perf_detach;
+ link->link.dealloc = &bpf_link_perf_dealloc;
+ link->perf_event_fd = pfd;
+
+ force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
+ .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
+
+ link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
+ prog->name, pfd,
+ err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
+ }
+ link->link.fd = link_fd;
+ } else {
+ if (OPTS_GET(opts, bpf_cookie, 0)) {
+ pr_warn("prog '%s': user context value is not supported\n", prog->name);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
- if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
- err = -errno;
- free(link);
- pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
- prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- if (err == -EPROTO)
- pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- prog->name, pfd);
- return ERR_PTR(err);
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
+ goto err_out;
+ }
+ link->link.fd = pfd;
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
- free(link);
- pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return ERR_PTR(err);
+ goto err_out;
}
- return link;
+
+ return &link->link;
+err_out:
+ if (link_fd >= 0)
+ close(link_fd);
+ free(link);
+ return libbpf_err_ptr(err);
+}
+
+struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
+{
+ return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
}
/*
@@ -9256,7 +10685,7 @@ static int parse_uint_from_file(const char *file, const char *fmt)
int err, ret;
FILE *f;
- f = fopen(file, "r");
+ f = fopen(file, "re");
if (!f) {
err = -errno;
pr_debug("failed to open '%s': %s\n", file,
@@ -9303,12 +10732,21 @@ static int determine_uprobe_retprobe_bit(void)
return parse_uint_from_file(file, "config:%d\n");
}
+#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
+#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
+
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
- uint64_t offset, int pid)
+ uint64_t offset, int pid, size_t ref_ctr_off)
{
- struct perf_event_attr attr = {};
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
char errmsg[STRERR_BUFSIZE];
- int type, pfd, err;
+ int type, pfd;
+
+ if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
+ return -EINVAL;
+
+ memset(&attr, 0, attr_sz);
type = uprobe ? determine_uprobe_perf_type()
: determine_kprobe_perf_type();
@@ -9330,8 +10768,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
}
attr.config |= 1 << bit;
}
- attr.size = sizeof(attr);
+ attr.size = attr_sz;
attr.type = type;
+ attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
attr.config2 = offset; /* kprobe_addr or probe_offset */
@@ -9340,85 +10779,1368 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
pid < 0 ? -1 : pid /* pid */,
pid == -1 ? 0 : -1 /* cpu */,
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
- if (pfd < 0) {
+ return pfd >= 0 ? pfd : -errno;
+}
+
+static int append_to_file(const char *file, const char *fmt, ...)
+{
+ int fd, n, err = 0;
+ va_list ap;
+ char buf[1024];
+
+ va_start(ap, fmt);
+ n = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (n < 0 || n >= sizeof(buf))
+ return -EINVAL;
+
+ fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (write(fd, buf, n) < 0)
err = -errno;
- pr_warn("%s perf_event_open() failed: %s\n",
- uprobe ? "uprobe" : "kprobe",
+
+ close(fd);
+ return err;
+}
+
+#define DEBUGFS "/sys/kernel/debug/tracing"
+#define TRACEFS "/sys/kernel/tracing"
+
+static bool use_debugfs(void)
+{
+ static int has_debugfs = -1;
+
+ if (has_debugfs < 0)
+ has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
+
+ return has_debugfs == 1;
+}
+
+static const char *tracefs_path(void)
+{
+ return use_debugfs() ? DEBUGFS : TRACEFS;
+}
+
+static const char *tracefs_kprobe_events(void)
+{
+ return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
+}
+
+static const char *tracefs_uprobe_events(void)
+{
+ return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
+}
+
+static const char *tracefs_available_filter_functions(void)
+{
+ return use_debugfs() ? DEBUGFS"/available_filter_functions"
+ : TRACEFS"/available_filter_functions";
+}
+
+static const char *tracefs_available_filter_functions_addrs(void)
+{
+ return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
+ : TRACEFS"/available_filter_functions_addrs";
+}
+
+static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
+ const char *kfunc_name, size_t offset)
+{
+ static int index = 0;
+ int i;
+
+ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
+ __sync_fetch_and_add(&index, 1));
+
+ /* sanitize binary_path in the probe name */
+ for (i = 0; buf[i]; i++) {
+ if (!isalnum(buf[i]))
+ buf[i] = '_';
+ }
+}
+
+static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
+ const char *kfunc_name, size_t offset)
+{
+ return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
+ retprobe ? 'r' : 'p',
+ retprobe ? "kretprobes" : "kprobes",
+ probe_name, kfunc_name, offset);
+}
+
+static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
+{
+ return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
+ retprobe ? "kretprobes" : "kprobes", probe_name);
+}
+
+static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
+{
+ char file[256];
+
+ snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
+ const char *kfunc_name, size_t offset, int pid)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
+ char errmsg[STRERR_BUFSIZE];
+ int type, pfd, err;
+
+ err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
+ if (err < 0) {
+ pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
+ kfunc_name, offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return err;
}
+ type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
+ if (type < 0) {
+ err = type;
+ pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
+ kfunc_name, offset,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_clean_legacy;
+ }
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.config = type;
+ attr.type = PERF_TYPE_TRACEPOINT;
+
+ pfd = syscall(__NR_perf_event_open, &attr,
+ pid < 0 ? -1 : pid, /* pid */
+ pid == -1 ? 0 : -1, /* cpu */
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("legacy kprobe perf_event_open() failed: %s\n",
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_clean_legacy;
+ }
return pfd;
+
+err_clean_legacy:
+ /* Clear the newly added legacy kprobe_event */
+ remove_kprobe_event_legacy(probe_name, retprobe);
+ return err;
}
-struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
- bool retprobe,
- const char *func_name)
+static const char *arch_specific_syscall_pfx(void)
+{
+#if defined(__x86_64__)
+ return "x64";
+#elif defined(__i386__)
+ return "ia32";
+#elif defined(__s390x__)
+ return "s390x";
+#elif defined(__s390__)
+ return "s390";
+#elif defined(__arm__)
+ return "arm";
+#elif defined(__aarch64__)
+ return "arm64";
+#elif defined(__mips__)
+ return "mips";
+#elif defined(__riscv)
+ return "riscv";
+#elif defined(__powerpc__)
+ return "powerpc";
+#elif defined(__powerpc64__)
+ return "powerpc64";
+#else
+ return NULL;
+#endif
+}
+
+int probe_kern_syscall_wrapper(int token_fd)
{
+ char syscall_name[64];
+ const char *ksys_pfx;
+
+ ksys_pfx = arch_specific_syscall_pfx();
+ if (!ksys_pfx)
+ return 0;
+
+ snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
+
+ if (determine_kprobe_perf_type() >= 0) {
+ int pfd;
+
+ pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
+ if (pfd >= 0)
+ close(pfd);
+
+ return pfd >= 0 ? 1 : 0;
+ } else { /* legacy mode */
+ char probe_name[128];
+
+ gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+ if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
+ return 0;
+
+ (void)remove_kprobe_event_legacy(probe_name, false);
+ return 1;
+ }
+}
+
+struct bpf_link *
+bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
char errmsg[STRERR_BUFSIZE];
+ char *legacy_probe = NULL;
struct bpf_link *link;
+ size_t offset;
+ bool retprobe, legacy;
int pfd, err;
- pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
- 0 /* offset */, -1 /* pid */);
+ if (!OPTS_VALID(opts, bpf_kprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
+ retprobe = OPTS_GET(opts, retprobe, false);
+ offset = OPTS_GET(opts, offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ legacy = determine_kprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (!legacy) {
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe,
+ func_name, offset,
+ -1 /* pid */, 0 /* ref_ctr_off */);
+ } else {
+ char probe_name[256];
+
+ gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
+ func_name, offset);
+
+ legacy_probe = strdup(probe_name);
+ if (!legacy_probe)
+ return libbpf_err_ptr(-ENOMEM);
+
+ pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
+ offset, -1 /* pid */);
+ }
if (pfd < 0) {
- pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
- prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ err = -errno;
+ pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe",
+ func_name, offset,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
}
- link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
- pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
- prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe",
+ func_name, offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ goto err_clean_legacy;
+ }
+ if (legacy) {
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ perf_link->legacy_probe_name = legacy_probe;
+ perf_link->legacy_is_kprobe = true;
+ perf_link->legacy_is_retprobe = retprobe;
}
+
return link;
+
+err_clean_legacy:
+ if (legacy)
+ remove_kprobe_event_legacy(legacy_probe, retprobe);
+err_out:
+ free(legacy_probe);
+ return libbpf_err_ptr(err);
}
-static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
+ bool retprobe,
+ const char *func_name)
{
- const char *func_name;
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .retprobe = retprobe,
+ );
+
+ return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
+}
+
+struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
+ const char *syscall_name,
+ const struct bpf_ksyscall_opts *opts)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ char func_name[128];
+
+ if (!OPTS_VALID(opts, bpf_ksyscall_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
+ /* arch_specific_syscall_pfx() should never return NULL here
+ * because it is guarded by kernel_supports(). However, since
+ * compiler does not know that we have an explicit conditional
+ * as well.
+ */
+ snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
+ arch_specific_syscall_pfx() ? : "", syscall_name);
+ } else {
+ snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
+ }
+
+ kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
+ kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
+}
+
+/* Adapted from perf/util/string.c */
+bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*pat == '?') { /* Matches any single character */
+ str++;
+ pat++;
+ continue;
+ }
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+struct kprobe_multi_resolve {
+ const char *pattern;
+ unsigned long *addrs;
+ size_t cap;
+ size_t cnt;
+};
+
+struct avail_kallsyms_data {
+ char **syms;
+ size_t cnt;
+ struct kprobe_multi_resolve *res;
+};
+
+static int avail_func_cmp(const void *a, const void *b)
+{
+ return strcmp(*(const char **)a, *(const char **)b);
+}
+
+static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
+ const char *sym_name, void *ctx)
+{
+ struct avail_kallsyms_data *data = ctx;
+ struct kprobe_multi_resolve *res = data->res;
+ int err;
+
+ if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
+ return 0;
+
+ err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
+ if (err)
+ return err;
+
+ res->addrs[res->cnt++] = (unsigned long)sym_addr;
+ return 0;
+}
+
+static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
+{
+ const char *available_functions_file = tracefs_available_filter_functions();
+ struct avail_kallsyms_data data;
+ char sym_name[500];
+ FILE *f;
+ int err = 0, ret, i;
+ char **syms = NULL;
+ size_t cap = 0, cnt = 0;
+
+ f = fopen(available_functions_file, "re");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open %s: %d\n", available_functions_file, err);
+ return err;
+ }
+
+ while (true) {
+ char *name;
+
+ ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
+ if (ret == EOF && feof(f))
+ break;
+
+ if (ret != 1) {
+ pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!glob_match(sym_name, res->pattern))
+ continue;
+
+ err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
+ if (err)
+ goto cleanup;
+
+ name = strdup(sym_name);
+ if (!name) {
+ err = -errno;
+ goto cleanup;
+ }
+
+ syms[cnt++] = name;
+ }
+
+ /* no entries found, bail out */
+ if (cnt == 0) {
+ err = -ENOENT;
+ goto cleanup;
+ }
+
+ /* sort available functions */
+ qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
+
+ data.syms = syms;
+ data.res = res;
+ data.cnt = cnt;
+ libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
+
+ if (res->cnt == 0)
+ err = -ENOENT;
+
+cleanup:
+ for (i = 0; i < cnt; i++)
+ free((char *)syms[i]);
+ free(syms);
+
+ fclose(f);
+ return err;
+}
+
+static bool has_available_filter_functions_addrs(void)
+{
+ return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
+}
+
+static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
+{
+ const char *available_path = tracefs_available_filter_functions_addrs();
+ char sym_name[500];
+ FILE *f;
+ int ret, err = 0;
+ unsigned long long sym_addr;
+
+ f = fopen(available_path, "re");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open %s: %d\n", available_path, err);
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
+ if (ret == EOF && feof(f))
+ break;
+
+ if (ret != 2) {
+ pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
+ ret);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!glob_match(sym_name, res->pattern))
+ continue;
+
+ err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
+ sizeof(*res->addrs), res->cnt + 1);
+ if (err)
+ goto cleanup;
+
+ res->addrs[res->cnt++] = (unsigned long)sym_addr;
+ }
+
+ if (res->cnt == 0)
+ err = -ENOENT;
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+struct bpf_link *
+bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
+ const char *pattern,
+ const struct bpf_kprobe_multi_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, lopts);
+ struct kprobe_multi_resolve res = {
+ .pattern = pattern,
+ };
+ struct bpf_link *link = NULL;
+ char errmsg[STRERR_BUFSIZE];
+ const unsigned long *addrs;
+ int err, link_fd, prog_fd;
+ const __u64 *cookies;
+ const char **syms;
bool retprobe;
+ size_t cnt;
+
+ if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ syms = OPTS_GET(opts, syms, false);
+ addrs = OPTS_GET(opts, addrs, false);
+ cnt = OPTS_GET(opts, cnt, false);
+ cookies = OPTS_GET(opts, cookies, false);
+
+ if (!pattern && !addrs && !syms)
+ return libbpf_err_ptr(-EINVAL);
+ if (pattern && (addrs || syms || cookies || cnt))
+ return libbpf_err_ptr(-EINVAL);
+ if (!pattern && !cnt)
+ return libbpf_err_ptr(-EINVAL);
+ if (addrs && syms)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (pattern) {
+ if (has_available_filter_functions_addrs())
+ err = libbpf_available_kprobes_parse(&res);
+ else
+ err = libbpf_available_kallsyms_parse(&res);
+ if (err)
+ goto error;
+ addrs = res.addrs;
+ cnt = res.cnt;
+ }
+
+ retprobe = OPTS_GET(opts, retprobe, false);
+
+ lopts.kprobe_multi.syms = syms;
+ lopts.kprobe_multi.addrs = addrs;
+ lopts.kprobe_multi.cookies = cookies;
+ lopts.kprobe_multi.cnt = cnt;
+ lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ err = -ENOMEM;
+ goto error;
+ }
+ link->detach = &bpf_link__detach_fd;
- func_name = prog->sec_name + sec->len;
- retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+ prog_fd = bpf_program__fd(prog);
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach: %s\n",
+ prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto error;
+ }
+ link->fd = link_fd;
+ free(res.addrs);
+ return link;
- return bpf_program__attach_kprobe(prog, retprobe, func_name);
+error:
+ free(link);
+ free(res.addrs);
+ return libbpf_err_ptr(err);
}
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
- bool retprobe, pid_t pid,
- const char *binary_path,
- size_t func_offset)
+static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ unsigned long offset = 0;
+ const char *func_name;
+ char *func;
+ int n;
+
+ *link = NULL;
+
+ /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
+ if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
+ return 0;
+
+ opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
+ if (opts.retprobe)
+ func_name = prog->sec_name + sizeof("kretprobe/") - 1;
+ else
+ func_name = prog->sec_name + sizeof("kprobe/") - 1;
+
+ n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
+ if (n < 1) {
+ pr_warn("kprobe name is invalid: %s\n", func_name);
+ return -EINVAL;
+ }
+ if (opts.retprobe && offset != 0) {
+ free(func);
+ pr_warn("kretprobes do not support offset specification\n");
+ return -EINVAL;
+ }
+
+ opts.offset = offset;
+ *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
+ free(func);
+ return libbpf_get_error(*link);
+}
+
+static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ LIBBPF_OPTS(bpf_ksyscall_opts, opts);
+ const char *syscall_name;
+
+ *link = NULL;
+
+ /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
+ if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
+ return 0;
+
+ opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
+ if (opts.retprobe)
+ syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
+ else
+ syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
+
+ *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
+ return *link ? 0 : -errno;
+}
+
+static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *spec;
+ char *pattern;
+ int n;
+
+ *link = NULL;
+
+ /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
+ if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
+ strcmp(prog->sec_name, "kretprobe.multi") == 0)
+ return 0;
+
+ opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
+ if (opts.retprobe)
+ spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
+ else
+ spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
+
+ n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
+ if (n < 1) {
+ pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
+ return -EINVAL;
+ }
+
+ *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
+ free(pattern);
+ return libbpf_get_error(*link);
+}
+
+static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
+ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ int n, ret = -EINVAL;
+
+ *link = NULL;
+
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
+ &probe_type, &binary_path, &func_name);
+ switch (n) {
+ case 1:
+ /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+ ret = 0;
+ break;
+ case 3:
+ opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
+ *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
+ ret = libbpf_get_error(*link);
+ break;
+ default:
+ pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
+ prog->sec_name);
+ break;
+ }
+ free(probe_type);
+ free(binary_path);
+ free(func_name);
+ return ret;
+}
+
+static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
+ const char *binary_path, uint64_t offset)
+{
+ int i;
+
+ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
+
+ /* sanitize binary_path in the probe name */
+ for (i = 0; buf[i]; i++) {
+ if (!isalnum(buf[i]))
+ buf[i] = '_';
+ }
+}
+
+static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
+ const char *binary_path, size_t offset)
+{
+ return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
+ retprobe ? 'r' : 'p',
+ retprobe ? "uretprobes" : "uprobes",
+ probe_name, binary_path, offset);
+}
+
+static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
+{
+ return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
+ retprobe ? "uretprobes" : "uprobes", probe_name);
+}
+
+static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
+{
+ char file[512];
+
+ snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
+ const char *binary_path, size_t offset, int pid)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
+ int type, pfd, err;
+
+ err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
+ if (err < 0) {
+ pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
+ binary_path, (size_t)offset, err);
+ return err;
+ }
+ type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
+ if (type < 0) {
+ err = type;
+ pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
+ binary_path, offset, err);
+ goto err_clean_legacy;
+ }
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.config = type;
+ attr.type = PERF_TYPE_TRACEPOINT;
+
+ pfd = syscall(__NR_perf_event_open, &attr,
+ pid < 0 ? -1 : pid, /* pid */
+ pid == -1 ? 0 : -1, /* cpu */
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
+ goto err_clean_legacy;
+ }
+ return pfd;
+
+err_clean_legacy:
+ /* Clear the newly added legacy uprobe_event */
+ remove_uprobe_event_legacy(probe_name, retprobe);
+ return err;
+}
+
+/* Find offset of function name in archive specified by path. Currently
+ * supported are .zip files that do not compress their contents, as used on
+ * Android in the form of APKs, for example. "file_name" is the name of the ELF
+ * file inside the archive. "func_name" matches symbol name or name@@LIB for
+ * library functions.
+ *
+ * An overview of the APK format specifically provided here:
+ * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
+ */
+static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
+ const char *func_name)
+{
+ struct zip_archive *archive;
+ struct zip_entry entry;
+ long ret;
+ Elf *elf;
+
+ archive = zip_archive_open(archive_path);
+ if (IS_ERR(archive)) {
+ ret = PTR_ERR(archive);
+ pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
+ return ret;
+ }
+
+ ret = zip_archive_find_entry(archive, file_name, &entry);
+ if (ret) {
+ pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
+ archive_path, ret);
+ goto out;
+ }
+ pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
+ (unsigned long)entry.data_offset);
+
+ if (entry.compression) {
+ pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
+ archive_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+
+ elf = elf_memory((void *)entry.data, entry.data_length);
+ if (!elf) {
+ pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
+ elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__LIBELF;
+ goto out;
+ }
+
+ ret = elf_find_func_offset(elf, file_name, func_name);
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
+ func_name, file_name, archive_path, entry.data_offset, ret,
+ ret + entry.data_offset);
+ ret += entry.data_offset;
+ }
+ elf_end(elf);
+
+out:
+ zip_archive_close(archive);
+ return ret;
+}
+
+static const char *arch_specific_lib_paths(void)
+{
+ /*
+ * Based on https://packages.debian.org/sid/libc6.
+ *
+ * Assume that the traced program is built for the same architecture
+ * as libbpf, which should cover the vast majority of cases.
+ */
+#if defined(__x86_64__)
+ return "/lib/x86_64-linux-gnu";
+#elif defined(__i386__)
+ return "/lib/i386-linux-gnu";
+#elif defined(__s390x__)
+ return "/lib/s390x-linux-gnu";
+#elif defined(__s390__)
+ return "/lib/s390-linux-gnu";
+#elif defined(__arm__) && defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabi";
+#elif defined(__arm__) && !defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabihf";
+#elif defined(__aarch64__)
+ return "/lib/aarch64-linux-gnu";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
+ return "/lib/mips64el-linux-gnuabi64";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
+ return "/lib/mipsel-linux-gnu";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return "/lib/powerpc64le-linux-gnu";
+#elif defined(__sparc__) && defined(__arch64__)
+ return "/lib/sparc64-linux-gnu";
+#elif defined(__riscv) && __riscv_xlen == 64
+ return "/lib/riscv64-linux-gnu";
+#else
+ return NULL;
+#endif
+}
+
+/* Get full path to program/shared library. */
+static int resolve_full_path(const char *file, char *result, size_t result_sz)
+{
+ const char *search_paths[3] = {};
+ int i, perm;
+
+ if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
+ search_paths[0] = getenv("LD_LIBRARY_PATH");
+ search_paths[1] = "/usr/lib64:/usr/lib";
+ search_paths[2] = arch_specific_lib_paths();
+ perm = R_OK;
+ } else {
+ search_paths[0] = getenv("PATH");
+ search_paths[1] = "/usr/bin:/usr/sbin";
+ perm = R_OK | X_OK;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
+ const char *s;
+
+ if (!search_paths[i])
+ continue;
+ for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
+ char *next_path;
+ int seg_len;
+
+ if (s[0] == ':')
+ s++;
+ next_path = strchr(s, ':');
+ seg_len = next_path ? next_path - s : strlen(s);
+ if (!seg_len)
+ continue;
+ snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
+ /* ensure it has required permissions */
+ if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
+ continue;
+ pr_debug("resolved '%s' to '%s'\n", file, result);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+struct bpf_link *
+bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
+ pid_t pid,
+ const char *path,
+ const char *func_pattern,
+ const struct bpf_uprobe_multi_opts *opts)
+{
+ const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, lopts);
+ unsigned long *resolved_offsets = NULL;
+ int err = 0, link_fd, prog_fd;
+ struct bpf_link *link = NULL;
char errmsg[STRERR_BUFSIZE];
+ char full_path[PATH_MAX];
+ const __u64 *cookies;
+ const char **syms;
+ size_t cnt;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ syms = OPTS_GET(opts, syms, NULL);
+ offsets = OPTS_GET(opts, offsets, NULL);
+ ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
+ cookies = OPTS_GET(opts, cookies, NULL);
+ cnt = OPTS_GET(opts, cnt, 0);
+
+ /*
+ * User can specify 2 mutually exclusive set of inputs:
+ *
+ * 1) use only path/func_pattern/pid arguments
+ *
+ * 2) use path/pid with allowed combinations of:
+ * syms/offsets/ref_ctr_offsets/cookies/cnt
+ *
+ * - syms and offsets are mutually exclusive
+ * - ref_ctr_offsets and cookies are optional
+ *
+ * Any other usage results in error.
+ */
+
+ if (!path)
+ return libbpf_err_ptr(-EINVAL);
+ if (!func_pattern && cnt == 0)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (func_pattern) {
+ if (syms || offsets || ref_ctr_offsets || cookies || cnt)
+ return libbpf_err_ptr(-EINVAL);
+ } else {
+ if (!!syms == !!offsets)
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (func_pattern) {
+ if (!strchr(path, '/')) {
+ err = resolve_full_path(path, full_path, sizeof(full_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, path, err);
+ return libbpf_err_ptr(err);
+ }
+ path = full_path;
+ }
+
+ err = elf_resolve_pattern_offsets(path, func_pattern,
+ &resolved_offsets, &cnt);
+ if (err < 0)
+ return libbpf_err_ptr(err);
+ offsets = resolved_offsets;
+ } else if (syms) {
+ err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
+ if (err < 0)
+ return libbpf_err_ptr(err);
+ offsets = resolved_offsets;
+ }
+
+ lopts.uprobe_multi.path = path;
+ lopts.uprobe_multi.offsets = offsets;
+ lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
+ lopts.uprobe_multi.cookies = cookies;
+ lopts.uprobe_multi.cnt = cnt;
+ lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
+
+ if (pid == 0)
+ pid = getpid();
+ if (pid > 0)
+ lopts.uprobe_multi.pid = pid;
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ err = -ENOMEM;
+ goto error;
+ }
+ link->detach = &bpf_link__detach_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
+ prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto error;
+ }
+ link->fd = link_fd;
+ free(resolved_offsets);
+ return link;
+
+error:
+ free(resolved_offsets);
+ free(link);
+ return libbpf_err_ptr(err);
+}
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts)
+{
+ const char *archive_path = NULL, *archive_sep = NULL;
+ char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
+ char full_path[PATH_MAX];
struct bpf_link *link;
+ size_t ref_ctr_off;
int pfd, err;
+ bool retprobe, legacy;
+ const char *func_name;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
+ retprobe = OPTS_GET(opts, retprobe, false);
+ ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ if (!binary_path)
+ return libbpf_err_ptr(-EINVAL);
+
+ /* Check if "binary_path" refers to an archive. */
+ archive_sep = strstr(binary_path, "!/");
+ if (archive_sep) {
+ full_path[0] = '\0';
+ libbpf_strlcpy(full_path, binary_path,
+ min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
+ archive_path = full_path;
+ binary_path = archive_sep + 2;
+ } else if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, full_path, sizeof(full_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = full_path;
+ }
+ func_name = OPTS_GET(opts, func_name, NULL);
+ if (func_name) {
+ long sym_off;
+
+ if (archive_path) {
+ sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
+ func_name);
+ binary_path = archive_path;
+ } else {
+ sym_off = elf_find_func_offset_from_file(binary_path, func_name);
+ }
+ if (sym_off < 0)
+ return libbpf_err_ptr(sym_off);
+ func_offset += sym_off;
+ }
- pfd = perf_event_open_probe(true /* uprobe */, retprobe,
- binary_path, func_offset, pid);
+ legacy = determine_uprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (!legacy) {
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
+ func_offset, pid, ref_ctr_off);
+ } else {
+ char probe_name[PATH_MAX + 64];
+
+ if (ref_ctr_off)
+ return libbpf_err_ptr(-EINVAL);
+
+ gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
+ binary_path, func_offset);
+
+ legacy_probe = strdup(probe_name);
+ if (!legacy_probe)
+ return libbpf_err_ptr(-ENOMEM);
+
+ pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
+ binary_path, func_offset, pid);
+ }
if (pfd < 0) {
+ err = -errno;
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
}
- link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ goto err_clean_legacy;
+ }
+ if (legacy) {
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ perf_link->legacy_probe_name = legacy_probe;
+ perf_link->legacy_is_kprobe = false;
+ perf_link->legacy_is_retprobe = retprobe;
}
return link;
+
+err_clean_legacy:
+ if (legacy)
+ remove_uprobe_event_legacy(legacy_probe, retprobe);
+err_out:
+ free(legacy_probe);
+ return libbpf_err_ptr(err);
+}
+
+/* Format of u[ret]probe section definition supporting auto-attach:
+ * u[ret]probe/binary:function[+offset]
+ *
+ * binary can be an absolute/relative path or a filename; the latter is resolved to a
+ * full binary path via bpf_program__attach_uprobe_opts.
+ *
+ * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
+ * specified (and auto-attach is not possible) or the above format is specified for
+ * auto-attach.
+ */
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
+ int n, c, ret = -EINVAL;
+ long offset = 0;
+
+ *link = NULL;
+
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
+ &probe_type, &binary_path, &func_name);
+ switch (n) {
+ case 1:
+ /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+ ret = 0;
+ break;
+ case 2:
+ pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
+ prog->name, prog->sec_name);
+ break;
+ case 3:
+ /* check if user specifies `+offset`, if yes, this should be
+ * the last part of the string, make sure sscanf read to EOL
+ */
+ func_off = strrchr(func_name, '+');
+ if (func_off) {
+ n = sscanf(func_off, "+%li%n", &offset, &c);
+ if (n == 1 && *(func_off + c) == '\0')
+ func_off[0] = '\0';
+ else
+ offset = 0;
+ }
+ opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
+ strcmp(probe_type, "uretprobe.s") == 0;
+ if (opts.retprobe && offset != 0) {
+ pr_warn("prog '%s': uretprobes do not support offset specification\n",
+ prog->name);
+ break;
+ }
+ opts.func_name = func_name;
+ *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
+ ret = libbpf_get_error(*link);
+ break;
+ default:
+ pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
+ prog->sec_name);
+ break;
+ }
+ free(probe_type);
+ free(binary_path);
+ free(func_name);
+
+ return ret;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
+
+ return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
+}
+
+struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts)
+{
+ char resolved_path[512];
+ struct bpf_object *obj = prog->obj;
+ struct bpf_link *link;
+ __u64 usdt_cookie;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (bpf_program__fd(prog) < 0) {
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (!binary_path)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = resolved_path;
+ }
+
+ /* USDT manager is instantiated lazily on first USDT attach. It will
+ * be destroyed together with BPF object in bpf_object__close().
+ */
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ if (!obj->usdt_man) {
+ obj->usdt_man = usdt_manager_new(obj);
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ }
+
+ usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
+ link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
+ usdt_provider, usdt_name, usdt_cookie);
+ err = libbpf_get_error(link);
+ if (err)
+ return libbpf_err_ptr(err);
+ return link;
+}
+
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ char *path = NULL, *provider = NULL, *name = NULL;
+ const char *sec_name;
+ int n, err;
+
+ sec_name = bpf_program__section_name(prog);
+ if (strcmp(sec_name, "usdt") == 0) {
+ /* no auto-attach for just SEC("usdt") */
+ *link = NULL;
+ return 0;
+ }
+
+ n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
+ if (n != 3) {
+ pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
+ sec_name);
+ err = -EINVAL;
+ } else {
+ *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
+ provider, name, NULL);
+ err = libbpf_get_error(*link);
+ }
+ free(path);
+ free(provider);
+ free(name);
+ return err;
}
static int determine_tracepoint_id(const char *tp_category,
@@ -9427,9 +12149,8 @@ static int determine_tracepoint_id(const char *tp_category,
char file[PATH_MAX];
int ret;
- ret = snprintf(file, sizeof(file),
- "/sys/kernel/debug/tracing/events/%s/%s/id",
- tp_category, tp_name);
+ ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
+ tracefs_path(), tp_category, tp_name);
if (ret < 0)
return -errno;
if (ret >= sizeof(file)) {
@@ -9443,7 +12164,8 @@ static int determine_tracepoint_id(const char *tp_category,
static int perf_event_open_tracepoint(const char *tp_category,
const char *tp_name)
{
- struct perf_event_attr attr = {};
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
char errmsg[STRERR_BUFSIZE];
int tp_id, pfd, err;
@@ -9455,8 +12177,9 @@ static int perf_event_open_tracepoint(const char *tp_category,
return tp_id;
}
+ memset(&attr, 0, attr_sz);
attr.type = PERF_TYPE_TRACEPOINT;
- attr.size = sizeof(attr);
+ attr.size = attr_sz;
attr.config = tp_id;
pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
@@ -9471,60 +12194,80 @@ static int perf_event_open_tracepoint(const char *tp_category,
return pfd;
}
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
- const char *tp_category,
- const char *tp_name)
+struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_tracepoint_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
-static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
+}
+
+static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
char *sec_name, *tp_cat, *tp_name;
- struct bpf_link *link;
+
+ *link = NULL;
+
+ /* no auto-attach for SEC("tp") or SEC("tracepoint") */
+ if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
+ return 0;
sec_name = strdup(prog->sec_name);
if (!sec_name)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
- /* extract "tp/<category>/<name>" */
- tp_cat = sec_name + sec->len;
+ /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
+ if (str_has_pfx(prog->sec_name, "tp/"))
+ tp_cat = sec_name + sizeof("tp/") - 1;
+ else
+ tp_cat = sec_name + sizeof("tracepoint/") - 1;
tp_name = strchr(tp_cat, '/');
if (!tp_name) {
- link = ERR_PTR(-EINVAL);
- goto out;
+ free(sec_name);
+ return -EINVAL;
}
*tp_name = '\0';
tp_name++;
- link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
+ *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
free(sec_name);
- return link;
+ return libbpf_get_error(*link);
}
-struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
const char *tp_name)
{
char errmsg[STRERR_BUFSIZE];
@@ -9534,12 +12277,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -9548,84 +12291,123 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
return link;
}
-static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
- const char *tp_name = prog->sec_name + sec->len;
+ static const char *const prefixes[] = {
+ "raw_tp",
+ "raw_tracepoint",
+ "raw_tp.w",
+ "raw_tracepoint.w",
+ };
+ size_t i;
+ const char *tp_name = NULL;
+
+ *link = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
+ size_t pfx_len;
+
+ if (!str_has_pfx(prog->sec_name, prefixes[i]))
+ continue;
- return bpf_program__attach_raw_tracepoint(prog, tp_name);
+ pfx_len = strlen(prefixes[i]);
+ /* no auto-attach case of, e.g., SEC("raw_tp") */
+ if (prog->sec_name[pfx_len] == '\0')
+ return 0;
+
+ if (prog->sec_name[pfx_len] != '/')
+ continue;
+
+ tp_name = prog->sec_name + pfx_len + 1;
+ break;
+ }
+
+ if (!tp_name) {
+ pr_warn("prog '%s': invalid section name '%s'\n",
+ prog->name, prog->sec_name);
+ return -EINVAL;
+ }
+
+ *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
+ return libbpf_get_error(*link);
}
/* Common logic for all BPF program types that attach to a btf_id */
-static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
+static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
+ const struct bpf_trace_opts *opts)
{
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int prog_fd, pfd;
+ if (!OPTS_VALID(opts, bpf_trace_opts))
+ return libbpf_err_ptr(-EINVAL);
+
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
+ link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
+ pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
if (pfd < 0) {
pfd = -errno;
free(link);
pr_warn("prog '%s': failed to attach: %s\n",
prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
- return (struct bpf_link *)link;
+ return link;
}
-struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
{
- return bpf_program__attach_btf_id(prog);
+ return bpf_program__attach_btf_id(prog, NULL);
}
-struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
+ const struct bpf_trace_opts *opts)
{
- return bpf_program__attach_btf_id(prog);
+ return bpf_program__attach_btf_id(prog, opts);
}
-static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
{
- return bpf_program__attach_trace(prog);
+ return bpf_program__attach_btf_id(prog, NULL);
}
-static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
- return bpf_program__attach_lsm(prog);
+ *link = bpf_program__attach_trace(prog);
+ return libbpf_get_error(*link);
}
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
+static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
- return bpf_program__attach_iter(prog, NULL);
+ *link = bpf_program__attach_lsm(prog);
+ return libbpf_get_error(*link);
}
static struct bpf_link *
-bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
- const char *target_name)
+bpf_program_attach_fd(const struct bpf_program *prog,
+ int target_fd, const char *target_name,
+ const struct bpf_link_create_opts *opts)
{
- DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
- .target_btf_id = btf_id);
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -9634,47 +12416,116 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- attach_type = bpf_program__get_expected_attach_type(prog);
- link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
+ attach_type = bpf_program__expected_attach_type(prog);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
pr_warn("prog '%s': failed to attach to %s: %s\n",
prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
}
struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
{
- return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
+ return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
}
struct bpf_link *
-bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
+{
+ return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
+}
+
+struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
{
- return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
+ return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
}
-struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+struct bpf_link *
+bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
+ const struct bpf_tcx_opts *opts)
{
+ LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+ __u32 relative_id;
+ int relative_fd;
+
+ if (!OPTS_VALID(opts, bpf_tcx_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (!ifindex) {
+ pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+ if (relative_fd && relative_id) {
+ pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
+ link_create_opts.tcx.relative_fd = relative_fd;
+ link_create_opts.tcx.relative_id = relative_id;
+ link_create_opts.flags = OPTS_GET(opts, flags, 0);
+
/* target_fd/target_ifindex use the same field in LINK_CREATE */
- return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+ return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
}
-struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+struct bpf_link *
+bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
+ const struct bpf_netkit_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+ __u32 relative_id;
+ int relative_fd;
+
+ if (!OPTS_VALID(opts, bpf_netkit_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (!ifindex) {
+ pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+ if (relative_fd && relative_id) {
+ pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
+ link_create_opts.netkit.relative_fd = relative_fd;
+ link_create_opts.netkit.relative_id = relative_id;
+ link_create_opts.flags = OPTS_GET(opts, flags, 0);
+
+ return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
+}
+
+struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
int target_fd,
const char *attach_func_name)
{
@@ -9683,21 +12534,26 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
if (!!target_fd != !!attach_func_name) {
pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (prog->type != BPF_PROG_TYPE_EXT) {
pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (target_fd) {
+ LIBBPF_OPTS(bpf_link_create_opts, target_opts);
+
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
if (btf_id < 0)
- return ERR_PTR(btf_id);
+ return libbpf_err_ptr(btf_id);
- return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+ target_opts.target_btf_id = btf_id;
+
+ return bpf_program_attach_fd(prog, target_fd, "freplace",
+ &target_opts);
} else {
/* no target, so use raw_tracepoint_open for compatibility
* with old kernels
@@ -9707,7 +12563,7 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
}
struct bpf_link *
-bpf_program__attach_iter(struct bpf_program *prog,
+bpf_program__attach_iter(const struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts)
{
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
@@ -9717,7 +12573,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
__u32 target_fd = 0;
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -9725,12 +12581,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -9740,78 +12596,190 @@ bpf_program__attach_iter(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to iterator: %s\n",
prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
}
-struct bpf_link *bpf_program__attach(struct bpf_program *prog)
+static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
- const struct bpf_sec_def *sec_def;
+ *link = bpf_program__attach_iter(prog, NULL);
+ return libbpf_get_error(*link);
+}
+
+struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
+ const struct bpf_netfilter_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, lopts);
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ if (!OPTS_VALID(opts, bpf_netfilter_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return libbpf_err_ptr(-ENOMEM);
+
+ link->detach = &bpf_link__detach_fd;
+
+ lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
+ lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
+ lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
+ lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
+ if (link_fd < 0) {
+ char errmsg[STRERR_BUFSIZE];
- sec_def = find_sec_def(prog->sec_name);
- if (!sec_def || !sec_def->attach_fn)
- return ERR_PTR(-ESRCH);
+ link_fd = -errno;
+ free(link);
+ pr_warn("prog '%s': failed to attach to netfilter: %s\n",
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return libbpf_err_ptr(link_fd);
+ }
+ link->fd = link_fd;
- return sec_def->attach_fn(sec_def, prog);
+ return link;
}
+struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
+{
+ struct bpf_link *link = NULL;
+ int err;
+
+ if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
+ return libbpf_err_ptr(-EOPNOTSUPP);
+
+ err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
+ if (err)
+ return libbpf_err_ptr(err);
+
+ /* When calling bpf_program__attach() explicitly, auto-attach support
+ * is expected to work, so NULL returned link is considered an error.
+ * This is different for skeleton's attach, see comment in
+ * bpf_object__attach_skeleton().
+ */
+ if (!link)
+ return libbpf_err_ptr(-EOPNOTSUPP);
+
+ return link;
+}
+
+struct bpf_link_struct_ops {
+ struct bpf_link link;
+ int map_fd;
+};
+
static int bpf_link__detach_struct_ops(struct bpf_link *link)
{
+ struct bpf_link_struct_ops *st_link;
__u32 zero = 0;
- if (bpf_map_delete_elem(link->fd, &zero))
- return -errno;
+ st_link = container_of(link, struct bpf_link_struct_ops, link);
- return 0;
+ if (st_link->map_fd < 0)
+ /* w/o a real link */
+ return bpf_map_delete_elem(link->fd, &zero);
+
+ return close(link->fd);
}
-struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
+struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
{
- struct bpf_struct_ops *st_ops;
- struct bpf_link *link;
- __u32 i, zero = 0;
- int err;
+ struct bpf_link_struct_ops *link;
+ __u32 zero = 0;
+ int err, fd;
if (!bpf_map__is_struct_ops(map) || map->fd == -1)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-EINVAL);
-
- st_ops = map->st_ops;
- for (i = 0; i < btf_vlen(st_ops->type); i++) {
- struct bpf_program *prog = st_ops->progs[i];
- void *kern_data;
- int prog_fd;
+ return libbpf_err_ptr(-EINVAL);
+
+ /* kern_vdata should be prepared during the loading phase. */
+ err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+ /* It can be EBUSY if the map has been used to create or
+ * update a link before. We don't allow updating the value of
+ * a struct_ops once it is set. That ensures that the value
+ * never changed. So, it is safe to skip EBUSY.
+ */
+ if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
+ free(link);
+ return libbpf_err_ptr(err);
+ }
- if (!prog)
- continue;
+ link->link.detach = bpf_link__detach_struct_ops;
- prog_fd = bpf_program__fd(prog);
- kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
- *(unsigned long *)kern_data = prog_fd;
+ if (!(map->def.map_flags & BPF_F_LINK)) {
+ /* w/o a real link */
+ link->link.fd = map->fd;
+ link->map_fd = -1;
+ return &link->link;
}
- err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
- if (err) {
- err = -errno;
+ fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
+ if (fd < 0) {
free(link);
- return ERR_PTR(err);
+ return libbpf_err_ptr(fd);
}
- link->detach = bpf_link__detach_struct_ops;
- link->fd = map->fd;
+ link->link.fd = fd;
+ link->map_fd = map->fd;
- return link;
+ return &link->link;
}
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data)
+/*
+ * Swap the back struct_ops of a link with a new struct_ops map.
+ */
+int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
+{
+ struct bpf_link_struct_ops *st_ops_link;
+ __u32 zero = 0;
+ int err;
+
+ if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
+ return -EINVAL;
+
+ st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
+ /* Ensure the type of a link is correct */
+ if (st_ops_link->map_fd < 0)
+ return -EINVAL;
+
+ err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+ /* It can be EBUSY if the map has been used to create or
+ * update a link before. We don't allow updating the value of
+ * a struct_ops once it is set. That ensures that the value
+ * never changed. So, it is safe to skip EBUSY.
+ */
+ if (err && err != -EBUSY)
+ return err;
+
+ err = bpf_link_update(link->fd, map->fd, NULL);
+ if (err < 0)
+ return err;
+
+ st_ops_link->map_fd = map->fd;
+
+ return 0;
+}
+
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+ void *private_data);
+
+static enum bpf_perf_event_ret
+perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data)
{
struct perf_event_mmap_page *header = mmap_mem;
__u64 data_head = ring_buffer_read_head(header);
@@ -9853,7 +12821,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
}
ring_buffer_write_tail(header, data_tail);
- return ret;
+ return libbpf_err(ret);
}
struct perf_buffer;
@@ -9990,39 +12958,60 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p);
struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb,
+ perf_buffer_lost_fn lost_cb,
+ void *ctx,
const struct perf_buffer_opts *opts)
{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
struct perf_buffer_params p = {};
- struct perf_event_attr attr = { 0, };
+ struct perf_event_attr attr;
+ __u32 sample_period;
+
+ if (!OPTS_VALID(opts, perf_buffer_opts))
+ return libbpf_err_ptr(-EINVAL);
+ sample_period = OPTS_GET(opts, sample_period, 1);
+ if (!sample_period)
+ sample_period = 1;
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = 1;
- attr.wakeup_events = 1;
+ attr.sample_period = sample_period;
+ attr.wakeup_events = sample_period;
p.attr = &attr;
- p.sample_cb = opts ? opts->sample_cb : NULL;
- p.lost_cb = opts ? opts->lost_cb : NULL;
- p.ctx = opts ? opts->ctx : NULL;
+ p.sample_cb = sample_cb;
+ p.lost_cb = lost_cb;
+ p.ctx = ctx;
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
-struct perf_buffer *
-perf_buffer__new_raw(int map_fd, size_t page_cnt,
- const struct perf_buffer_raw_opts *opts)
+struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
+ const struct perf_buffer_raw_opts *opts)
{
struct perf_buffer_params p = {};
- p.attr = opts->attr;
- p.event_cb = opts->event_cb;
- p.ctx = opts->ctx;
- p.cpu_cnt = opts->cpu_cnt;
- p.cpus = opts->cpus;
- p.map_keys = opts->map_keys;
+ if (!attr)
+ return libbpf_err_ptr(-EINVAL);
+
+ if (!OPTS_VALID(opts, perf_buffer_raw_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ p.attr = attr;
+ p.event_cb = event_cb;
+ p.ctx = ctx;
+ p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
+ p.cpus = OPTS_GET(opts, cpus, NULL);
+ p.map_keys = OPTS_GET(opts, map_keys, NULL);
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -10036,7 +13025,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
__u32 map_info_len;
int err, i, j, n;
- if (page_cnt & (page_cnt - 1)) {
+ if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
pr_warn("page count should be power of two, but is %zu\n",
page_cnt);
return ERR_PTR(-EINVAL);
@@ -10045,7 +13034,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
/* best-effort sanity checks */
memset(&map, 0, sizeof(map));
map_info_len = sizeof(map);
- err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+ err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
if (err) {
err = -errno;
/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
@@ -10224,10 +13213,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
{
enum bpf_perf_event_ret ret;
- ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
- pb->page_size, &cpu_buf->buf,
- &cpu_buf->buf_size,
- perf_buffer__process_record, cpu_buf);
+ ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+ pb->page_size, &cpu_buf->buf,
+ &cpu_buf->buf_size,
+ perf_buffer__process_record, cpu_buf);
if (ret != LIBBPF_PERF_EVENT_CONT)
return ret;
return 0;
@@ -10243,16 +13232,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
int i, cnt, err;
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+ if (cnt < 0)
+ return -errno;
+
for (i = 0; i < cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("error while processing records: %d\n", err);
- return err;
+ return libbpf_err(err);
}
}
- return cnt < 0 ? -errno : cnt;
+ return cnt;
}
/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -10273,15 +13265,31 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return cpu_buf->fd;
}
+int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return libbpf_err(-EINVAL);
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return libbpf_err(-ENOENT);
+
+ *buf = cpu_buf->base;
+ *buf_size = pb->mmap_size;
+ return 0;
+}
+
/*
* Consume data from perf ring buffer corresponding to slot *buf_idx* in
* PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
@@ -10295,11 +13303,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return perf_buffer__process_records(pb, cpu_buf);
}
@@ -10317,280 +13325,54 @@ int perf_buffer__consume(struct perf_buffer *pb)
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
- return err;
+ return libbpf_err(err);
}
}
return 0;
}
-struct bpf_prog_info_array_desc {
- int array_offset; /* e.g. offset of jited_prog_insns */
- int count_offset; /* e.g. offset of jited_prog_len */
- int size_offset; /* > 0: offset of rec size,
- * < 0: fix size of -size_offset
- */
-};
-
-static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
- [BPF_PROG_INFO_JITED_INSNS] = {
- offsetof(struct bpf_prog_info, jited_prog_insns),
- offsetof(struct bpf_prog_info, jited_prog_len),
- -1,
- },
- [BPF_PROG_INFO_XLATED_INSNS] = {
- offsetof(struct bpf_prog_info, xlated_prog_insns),
- offsetof(struct bpf_prog_info, xlated_prog_len),
- -1,
- },
- [BPF_PROG_INFO_MAP_IDS] = {
- offsetof(struct bpf_prog_info, map_ids),
- offsetof(struct bpf_prog_info, nr_map_ids),
- -(int)sizeof(__u32),
- },
- [BPF_PROG_INFO_JITED_KSYMS] = {
- offsetof(struct bpf_prog_info, jited_ksyms),
- offsetof(struct bpf_prog_info, nr_jited_ksyms),
- -(int)sizeof(__u64),
- },
- [BPF_PROG_INFO_JITED_FUNC_LENS] = {
- offsetof(struct bpf_prog_info, jited_func_lens),
- offsetof(struct bpf_prog_info, nr_jited_func_lens),
- -(int)sizeof(__u32),
- },
- [BPF_PROG_INFO_FUNC_INFO] = {
- offsetof(struct bpf_prog_info, func_info),
- offsetof(struct bpf_prog_info, nr_func_info),
- offsetof(struct bpf_prog_info, func_info_rec_size),
- },
- [BPF_PROG_INFO_LINE_INFO] = {
- offsetof(struct bpf_prog_info, line_info),
- offsetof(struct bpf_prog_info, nr_line_info),
- offsetof(struct bpf_prog_info, line_info_rec_size),
- },
- [BPF_PROG_INFO_JITED_LINE_INFO] = {
- offsetof(struct bpf_prog_info, jited_line_info),
- offsetof(struct bpf_prog_info, nr_jited_line_info),
- offsetof(struct bpf_prog_info, jited_line_info_rec_size),
- },
- [BPF_PROG_INFO_PROG_TAGS] = {
- offsetof(struct bpf_prog_info, prog_tags),
- offsetof(struct bpf_prog_info, nr_prog_tags),
- -(int)sizeof(__u8) * BPF_TAG_SIZE,
- },
-
-};
-
-static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
- int offset)
-{
- __u32 *array = (__u32 *)info;
-
- if (offset >= 0)
- return array[offset / sizeof(__u32)];
- return -(int)offset;
-}
-
-static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
- int offset)
-{
- __u64 *array = (__u64 *)info;
-
- if (offset >= 0)
- return array[offset / sizeof(__u64)];
- return -(int)offset;
-}
-
-static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
- __u32 val)
-{
- __u32 *array = (__u32 *)info;
-
- if (offset >= 0)
- array[offset / sizeof(__u32)] = val;
-}
-
-static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
- __u64 val)
-{
- __u64 *array = (__u64 *)info;
-
- if (offset >= 0)
- array[offset / sizeof(__u64)] = val;
-}
-
-struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays)
-{
- struct bpf_prog_info_linear *info_linear;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 data_len = 0;
- int i, err;
- void *ptr;
-
- if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
- return ERR_PTR(-EINVAL);
-
- /* step 1: get array dimensions */
- err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
- if (err) {
- pr_debug("can't get prog info: %s", strerror(errno));
- return ERR_PTR(-EFAULT);
- }
-
- /* step 2: calculate total size of all arrays */
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- bool include_array = (arrays & (1UL << i)) > 0;
- struct bpf_prog_info_array_desc *desc;
- __u32 count, size;
-
- desc = bpf_prog_info_array_desc + i;
-
- /* kernel is too old to support this field */
- if (info_len < desc->array_offset + sizeof(__u32) ||
- info_len < desc->count_offset + sizeof(__u32) ||
- (desc->size_offset > 0 && info_len < desc->size_offset))
- include_array = false;
-
- if (!include_array) {
- arrays &= ~(1UL << i); /* clear the bit */
- continue;
- }
-
- count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-
- data_len += count * size;
- }
-
- /* step 3: allocate continuous memory */
- data_len = roundup(data_len, sizeof(__u64));
- info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
- if (!info_linear)
- return ERR_PTR(-ENOMEM);
-
- /* step 4: fill data to info_linear->info */
- info_linear->arrays = arrays;
- memset(&info_linear->info, 0, sizeof(info));
- ptr = info_linear->data;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u32 count, size;
-
- if ((arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- bpf_prog_info_set_offset_u32(&info_linear->info,
- desc->count_offset, count);
- bpf_prog_info_set_offset_u32(&info_linear->info,
- desc->size_offset, size);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset,
- ptr_to_u64(ptr));
- ptr += count * size;
- }
-
- /* step 5: call syscall again to get required arrays */
- err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
- if (err) {
- pr_debug("can't get prog info: %s", strerror(errno));
- free(info_linear);
- return ERR_PTR(-EFAULT);
- }
-
- /* step 6: verify the data */
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u32 v1, v2;
-
- if ((arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
- desc->count_offset);
- if (v1 != v2)
- pr_warn("%s: mismatch in element count\n", __func__);
-
- v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
- desc->size_offset);
- if (v1 != v2)
- pr_warn("%s: mismatch in rec size\n", __func__);
- }
-
- /* step 7: update info_len and data_len */
- info_linear->info_len = sizeof(struct bpf_prog_info);
- info_linear->data_len = data_len;
-
- return info_linear;
-}
-
-void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
-{
- int i;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u64 addr, offs;
-
- if ((info_linear->arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- addr = bpf_prog_info_read_offset_u64(&info_linear->info,
- desc->array_offset);
- offs = addr - ptr_to_u64(info_linear->data);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset, offs);
- }
-}
-
-void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
-{
- int i;
-
- for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
- struct bpf_prog_info_array_desc *desc;
- __u64 addr, offs;
-
- if ((info_linear->arrays & (1UL << i)) == 0)
- continue;
-
- desc = bpf_prog_info_array_desc + i;
- offs = bpf_prog_info_read_offset_u64(&info_linear->info,
- desc->array_offset);
- addr = offs + ptr_to_u64(info_linear->data);
- bpf_prog_info_set_offset_u64(&info_linear->info,
- desc->array_offset, addr);
- }
-}
-
int bpf_program__set_attach_target(struct bpf_program *prog,
int attach_prog_fd,
const char *attach_func_name)
{
- int btf_id;
+ int btf_obj_fd = 0, btf_id = 0, err;
- if (!prog || attach_prog_fd < 0 || !attach_func_name)
- return -EINVAL;
+ if (!prog || attach_prog_fd < 0)
+ return libbpf_err(-EINVAL);
+
+ if (prog->obj->loaded)
+ return libbpf_err(-EINVAL);
+
+ if (attach_prog_fd && !attach_func_name) {
+ /* remember attach_prog_fd and let bpf_program__load() find
+ * BTF ID during the program load
+ */
+ prog->attach_prog_fd = attach_prog_fd;
+ return 0;
+ }
- if (attach_prog_fd)
+ if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
- else
- btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
- prog->expected_attach_type);
+ if (btf_id < 0)
+ return libbpf_err(btf_id);
+ } else {
+ if (!attach_func_name)
+ return libbpf_err(-EINVAL);
- if (btf_id < 0)
- return btf_id;
+ /* load btf_vmlinux, if not yet */
+ err = bpf_object__load_vmlinux_btf(prog->obj, true);
+ if (err)
+ return libbpf_err(err);
+ err = find_kernel_btf_id(prog->obj, attach_func_name,
+ prog->expected_attach_type,
+ &btf_obj_fd, &btf_id);
+ if (err)
+ return libbpf_err(err);
+ }
prog->attach_btf_id = btf_id;
+ prog->attach_btf_obj_fd = btf_obj_fd;
prog->attach_prog_fd = attach_prog_fd;
return 0;
}
@@ -10650,7 +13432,7 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
int fd, err = 0, len;
char buf[128];
- fd = open(fcpu, O_RDONLY);
+ fd = open(fcpu, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
err = -errno;
pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
@@ -10685,7 +13467,7 @@ int libbpf_num_possible_cpus(void)
err = parse_cpu_mask_file(fcpu, &mask, &n);
if (err)
- return err;
+ return libbpf_err(err);
tmp_cpus = 0;
for (i = 0; i < n; i++) {
@@ -10698,6 +13480,49 @@ int libbpf_num_possible_cpus(void)
return tmp_cpus;
}
+static int populate_skeleton_maps(const struct bpf_object *obj,
+ struct bpf_map_skeleton *maps,
+ size_t map_cnt)
+{
+ int i;
+
+ for (i = 0; i < map_cnt; i++) {
+ struct bpf_map **map = maps[i].map;
+ const char *name = maps[i].name;
+ void **mmaped = maps[i].mmaped;
+
+ *map = bpf_object__find_map_by_name(obj, name);
+ if (!*map) {
+ pr_warn("failed to find skeleton map '%s'\n", name);
+ return -ESRCH;
+ }
+
+ /* externs shouldn't be pre-setup from user code */
+ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
+ *mmaped = (*map)->mmaped;
+ }
+ return 0;
+}
+
+static int populate_skeleton_progs(const struct bpf_object *obj,
+ struct bpf_prog_skeleton *progs,
+ size_t prog_cnt)
+{
+ int i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ struct bpf_program **prog = progs[i].prog;
+ const char *name = progs[i].name;
+
+ *prog = bpf_object__find_program_by_name(obj, name);
+ if (!*prog) {
+ pr_warn("failed to find skeleton program '%s'\n", name);
+ return -ESRCH;
+ }
+ }
+ return 0;
+}
+
int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
const struct bpf_object_open_opts *opts)
{
@@ -10705,7 +13530,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
.object_name = s->name,
);
struct bpf_object *obj;
- int i;
+ int err;
/* Attempt to preserve opts->object_name, unless overriden by user
* explicitly. Overwriting object name for skeletons is discouraged,
@@ -10720,44 +13545,99 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
}
obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
- if (IS_ERR(obj)) {
- pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
- s->name, PTR_ERR(obj));
- return PTR_ERR(obj);
+ err = libbpf_get_error(obj);
+ if (err) {
+ pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+ s->name, err);
+ return libbpf_err(err);
}
*s->obj = obj;
+ err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
+ if (err) {
+ pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
+ return libbpf_err(err);
+ }
- for (i = 0; i < s->map_cnt; i++) {
- struct bpf_map **map = s->maps[i].map;
- const char *name = s->maps[i].name;
- void **mmaped = s->maps[i].mmaped;
+ err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
+ if (err) {
+ pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
+ return libbpf_err(err);
+ }
- *map = bpf_object__find_map_by_name(obj, name);
- if (!*map) {
- pr_warn("failed to find skeleton map '%s'\n", name);
- return -ESRCH;
- }
+ return 0;
+}
- /* externs shouldn't be pre-setup from user code */
- if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
- *mmaped = (*map)->mmaped;
+int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
+{
+ int err, len, var_idx, i;
+ const char *var_name;
+ const struct bpf_map *map;
+ struct btf *btf;
+ __u32 map_type_id;
+ const struct btf_type *map_type, *var_type;
+ const struct bpf_var_skeleton *var_skel;
+ struct btf_var_secinfo *var;
+
+ if (!s->obj)
+ return libbpf_err(-EINVAL);
+
+ btf = bpf_object__btf(s->obj);
+ if (!btf) {
+ pr_warn("subskeletons require BTF at runtime (object %s)\n",
+ bpf_object__name(s->obj));
+ return libbpf_err(-errno);
}
- for (i = 0; i < s->prog_cnt; i++) {
- struct bpf_program **prog = s->progs[i].prog;
- const char *name = s->progs[i].name;
+ err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
+ if (err) {
+ pr_warn("failed to populate subskeleton maps: %d\n", err);
+ return libbpf_err(err);
+ }
- *prog = bpf_object__find_program_by_name(obj, name);
- if (!*prog) {
- pr_warn("failed to find skeleton program '%s'\n", name);
- return -ESRCH;
+ err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
+ if (err) {
+ pr_warn("failed to populate subskeleton maps: %d\n", err);
+ return libbpf_err(err);
+ }
+
+ for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
+ var_skel = &s->vars[var_idx];
+ map = *var_skel->map;
+ map_type_id = bpf_map__btf_value_type_id(map);
+ map_type = btf__type_by_id(btf, map_type_id);
+
+ if (!btf_is_datasec(map_type)) {
+ pr_warn("type for map '%1$s' is not a datasec: %2$s",
+ bpf_map__name(map),
+ __btf_kind_str(btf_kind(map_type)));
+ return libbpf_err(-EINVAL);
+ }
+
+ len = btf_vlen(map_type);
+ var = btf_var_secinfos(map_type);
+ for (i = 0; i < len; i++, var++) {
+ var_type = btf__type_by_id(btf, var->type);
+ var_name = btf__name_by_offset(btf, var_type->name_off);
+ if (strcmp(var_name, var_skel->name) == 0) {
+ *var_skel->addr = map->mmaped + var->offset;
+ break;
+ }
}
}
-
return 0;
}
+void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
+{
+ if (!s)
+ return;
+ free(s->maps);
+ free(s->progs);
+ free(s->vars);
+ free(s);
+}
+
int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
{
int i, err;
@@ -10765,13 +13645,13 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
err = bpf_object__load(*s->obj);
if (err) {
pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
- return err;
+ return libbpf_err(err);
}
for (i = 0; i < s->map_cnt; i++) {
struct bpf_map *map = *s->maps[i].map;
size_t mmap_sz = bpf_map_mmap_sz(map);
- int prot, map_fd = bpf_map__fd(map);
+ int prot, map_fd = map->fd;
void **mmaped = s->maps[i].mmaped;
if (!mmaped)
@@ -10782,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
continue;
}
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ *mmaped = map->mmaped;
+ continue;
+ }
+
if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ;
else
@@ -10797,14 +13682,13 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
* as per normal clean up procedure, so we don't need to worry
* about it from skeleton's clean up perspective.
*/
- *mmaped = mmap(map->mmaped, mmap_sz, prot,
- MAP_SHARED | MAP_FIXED, map_fd, 0);
+ *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
if (*mmaped == MAP_FAILED) {
err = -errno;
*mmaped = NULL;
pr_warn("failed to re-mmap() map '%s': %d\n",
bpf_map__name(map), err);
- return err;
+ return libbpf_err(err);
}
}
@@ -10813,26 +13697,40 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
{
- int i;
+ int i, err;
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
- const struct bpf_sec_def *sec_def;
- if (!prog->load)
+ if (!prog->autoload || !prog->autoattach)
continue;
- sec_def = find_sec_def(prog->sec_name);
- if (!sec_def || !sec_def->attach_fn)
+ /* auto-attaching not supported for this program */
+ if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
continue;
- *link = sec_def->attach_fn(sec_def, prog);
- if (IS_ERR(*link)) {
- pr_warn("failed to auto-attach program '%s': %ld\n",
- bpf_program__name(prog), PTR_ERR(*link));
- return PTR_ERR(*link);
- }
+ /* if user already set the link manually, don't attempt auto-attach */
+ if (*link)
+ continue;
+
+ err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
+ if (err) {
+ pr_warn("prog '%s': failed to auto-attach: %d\n",
+ bpf_program__name(prog), err);
+ return libbpf_err(err);
+ }
+
+ /* It's possible that for some SEC() definitions auto-attach
+ * is supported in some cases (e.g., if definition completely
+ * specifies target information), but is not in other cases.
+ * SEC("uprobe") is one such case. If user specified target
+ * binary and function name, such BPF program can be
+ * auto-attached. But if not, it shouldn't trigger skeleton's
+ * attach to fail. It should just be skipped.
+ * attach_fn signals such case with returning 0 (no error) and
+ * setting link to NULL.
+ */
}
return 0;
@@ -10852,6 +13750,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
+ if (!s)
+ return;
+
if (s->progs)
bpf_object__detach_skeleton(s);
if (s->obj)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..7b510761f545 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -18,11 +18,16 @@
#include <linux/bpf.h>
#include "libbpf_common.h"
+#include "libbpf_legacy.h"
#ifdef __cplusplus
extern "C" {
#endif
+LIBBPF_API __u32 libbpf_major_version(void);
+LIBBPF_API __u32 libbpf_minor_version(void);
+LIBBPF_API const char *libbpf_version_string(void);
+
enum libbpf_errno {
__LIBBPF_ERRNO__START = 4000,
@@ -46,6 +51,42 @@ enum libbpf_errno {
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+/**
+ * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type
+ * value into a textual representation.
+ * @param t The attach type.
+ * @return Pointer to a static string identifying the attach type. NULL is
+ * returned for unknown **bpf_attach_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t);
+
+/**
+ * @brief **libbpf_bpf_link_type_str()** converts the provided link type value
+ * into a textual representation.
+ * @param t The link type.
+ * @return Pointer to a static string identifying the link type. NULL is
+ * returned for unknown **bpf_link_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t);
+
+/**
+ * @brief **libbpf_bpf_map_type_str()** converts the provided map type value
+ * into a textual representation.
+ * @param t The map type.
+ * @return Pointer to a static string identifying the map type. NULL is
+ * returned for unknown **bpf_map_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t);
+
+/**
+ * @brief **libbpf_bpf_prog_type_str()** converts the provided program type
+ * value into a textual representation.
+ * @param t The program type.
+ * @return Pointer to a static string identifying the program type. NULL is
+ * returned for unknown **bpf_prog_type** values.
+ */
+LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t);
+
enum libbpf_print_level {
LIBBPF_WARN,
LIBBPF_INFO,
@@ -55,18 +96,21 @@ enum libbpf_print_level {
typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
const char *, va_list ap);
+/**
+ * @brief **libbpf_set_print()** sets user-provided log callback function to
+ * be used for libbpf warnings and informational messages.
+ * @param fn The log print function. If NULL, libbpf won't print anything.
+ * @return Pointer to old print function.
+ *
+ * This function is thread-safe.
+ */
LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
/* Hide internal to user */
struct bpf_object;
-struct bpf_object_open_attr {
- const char *file;
- enum bpf_prog_type prog_type;
-};
-
struct bpf_object_open_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* object name override, if provided:
* - for object open from file, this will override setting object
@@ -77,50 +121,163 @@ struct bpf_object_open_opts {
const char *object_name;
/* parse map definitions non-strictly, allowing extra attributes/data */
bool relaxed_maps;
- /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
- * Value is ignored. Relocations always are processed non-strictly.
- * Non-relocatable instructions are replaced with invalid ones to
- * prevent accidental errors.
- * */
- bool relaxed_core_relocs;
/* maps that set the 'pinning' attribute in their definition will have
* their pin_path attribute set to a file in this directory, and be
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
*/
const char *pin_root_path;
- __u32 attach_prog_fd;
+
+ __u32 :32; /* stub out now removed attach_prog_fd */
+
/* Additional kernel config content that augments and overrides
* system Kconfig for CONFIG_xxx externs.
*/
const char *kconfig;
-};
-#define bpf_object_open_opts__last_field kconfig
+ /* Path to the custom BTF to be used for BPF CO-RE relocations.
+ * This custom BTF completely replaces the use of vmlinux BTF
+ * for the purpose of CO-RE relocations.
+ * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+ * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+ */
+ const char *btf_custom_path;
+ /* Pointer to a buffer for storing kernel logs for applicable BPF
+ * commands. Valid kernel_log_size has to be specified as well and are
+ * passed-through to bpf() syscall. Keep in mind that kernel might
+ * fail operation with -ENOSPC error if provided buffer is too small
+ * to contain entire log output.
+ * See the comment below for kernel_log_level for interaction between
+ * log_buf and log_level settings.
+ *
+ * If specified, this log buffer will be passed for:
+ * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden
+ * with bpf_program__set_log() on per-program level, to get
+ * BPF verifier log output.
+ * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get
+ * BTF sanity checking log.
+ *
+ * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite
+ * previous contents, so if you need more fine-grained control, set
+ * per-program buffer with bpf_program__set_log_buf() to preserve each
+ * individual program's verification log. Keep using kernel_log_buf
+ * for BTF verification log, if necessary.
+ */
+ char *kernel_log_buf;
+ size_t kernel_log_size;
+ /*
+ * Log level can be set independently from log buffer. Log_level=0
+ * means that libbpf will attempt loading BTF or program without any
+ * logging requested, but will retry with either its own or custom log
+ * buffer, if provided, and log_level=1 on any error.
+ * And vice versa, setting log_level>0 will request BTF or prog
+ * loading with verbose log from the first attempt (and as such also
+ * for successfully loaded BTF or program), and the actual log buffer
+ * could be either libbpf's own auto-allocated log buffer, if
+ * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf.
+ * If user didn't provide custom log buffer, libbpf will emit captured
+ * logs through its print callback.
+ */
+ __u32 kernel_log_level;
+ /* Path to BPF FS mount point to derive BPF token from.
+ *
+ * Created BPF token will be used for all bpf() syscall operations
+ * that accept BPF token (e.g., map creation, BTF and program loads,
+ * etc) automatically within instantiated BPF object.
+ *
+ * If bpf_token_path is not specified, libbpf will consult
+ * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be
+ * taken as a value of bpf_token_path option and will force libbpf to
+ * either create BPF token from provided custom BPF FS path, or will
+ * disable implicit BPF token creation, if envvar value is an empty
+ * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are
+ * set at the same time.
+ *
+ * Setting bpf_token_path option to empty string disables libbpf's
+ * automatic attempt to create BPF token from default BPF FS mount
+ * point (/sys/fs/bpf), in case this default behavior is undesirable.
+ */
+ const char *bpf_token_path;
+ size_t :0;
+};
+#define bpf_object_open_opts__last_field bpf_token_path
+
+/**
+ * @brief **bpf_object__open()** creates a bpf_object by opening
+ * the BPF ELF object file pointed to by the passed path and loading it
+ * into memory.
+ * @param path BPF object file path.
+ * @return pointer to the new bpf_object; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+
+/**
+ * @brief **bpf_object__open_file()** creates a bpf_object by opening
+ * the BPF ELF object file pointed to by the passed path and loading it
+ * into memory.
+ * @param path BPF object file path
+ * @param opts options for how to load the bpf object, this parameter is
+ * optional and can be set to NULL
+ * @return pointer to the new bpf_object; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
+
+/**
+ * @brief **bpf_object__open_mem()** creates a bpf_object by reading
+ * the BPF objects raw bytes from a memory buffer containing a valid
+ * BPF ELF object file.
+ * @param obj_buf pointer to the buffer containing ELF file bytes
+ * @param obj_buf_sz number of bytes in the buffer
+ * @param opts options for how to load the bpf object
+ * @return pointer to the new bpf_object; or NULL is returned on error,
+ * error code is stored in errno
+ */
LIBBPF_API struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
-/* deprecated bpf_object__open variants */
-LIBBPF_API struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
- const char *name);
-LIBBPF_API struct bpf_object *
-bpf_object__open_xattr(struct bpf_object_open_attr *attr);
-
-enum libbpf_pin_type {
- LIBBPF_PIN_NONE,
- /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
- LIBBPF_PIN_BY_NAME,
-};
+/**
+ * @brief **bpf_object__load()** loads BPF object into kernel.
+ * @param obj Pointer to a valid BPF object instance returned by
+ * **bpf_object__open*()** APIs
+ * @return 0, on success; negative error code, otherwise, error code is
+ * stored in errno
+ */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-/* pin_maps and unpin_maps can both be called with a NULL path, in which case
- * they will use the pin_path attribute of each map (and ignore all maps that
- * don't have a pin_path set).
+/**
+ * @brief **bpf_object__close()** closes a BPF object and releases all
+ * resources.
+ * @param obj Pointer to a valid BPF object
+ */
+LIBBPF_API void bpf_object__close(struct bpf_object *obj);
+
+/**
+ * @brief **bpf_object__pin_maps()** pins each map contained within
+ * the BPF object at the passed directory.
+ * @param obj Pointer to a valid BPF object
+ * @param path A directory where maps should be pinned.
+ * @return 0, on success; negative error code, otherwise
+ *
+ * If `path` is NULL `bpf_map__pin` (which is being used on each map)
+ * will use the pin_path attribute of each map. In this case, maps that
+ * don't have a pin_path set will be ignored.
*/
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
+
+/**
+ * @brief **bpf_object__unpin_maps()** unpins each map contained within
+ * the BPF object found in the passed directory.
+ * @param obj Pointer to a valid BPF object
+ * @param path A directory where pinned maps should be searched for.
+ * @return 0, on success; negative error code, otherwise
+ *
+ * If `path` is NULL `bpf_map__unpin` (which is being used on each map)
+ * will use the pin_path attribute of each map. In this case, maps that
+ * don't have a pin_path set will be ignored.
+ */
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
@@ -128,45 +285,20 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
-LIBBPF_API void bpf_object__close(struct bpf_object *object);
-
-struct bpf_object_load_attr {
- struct bpf_object *obj;
- int log_level;
- const char *target_btf_path;
-};
-
-/* Load/unload object into/from kernel */
-LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
-LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+LIBBPF_API int bpf_object__unpin(struct bpf_object *object, const char *path);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
- const char *title);
-LIBBPF_API struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name);
-LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
-#define bpf_object__for_each_safe(pos, tmp) \
- for ((pos) = bpf_object__next(NULL), \
- (tmp) = bpf_object__next(pos); \
- (pos) != NULL; \
- (pos) = (tmp), (tmp) = bpf_object__next(tmp))
-
-typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
- bpf_object_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
-
LIBBPF_API int
libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type);
@@ -177,46 +309,106 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
/* Accessors of bpf_program */
struct bpf_program;
-LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
- const struct bpf_object *obj);
-#define bpf_object__for_each_program(pos, obj) \
- for ((pos) = bpf_program__next(NULL, (obj)); \
- (pos) != NULL; \
- (pos) = bpf_program__next((pos), (obj)))
-
-LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
- const struct bpf_object *obj);
+LIBBPF_API struct bpf_program *
+bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog);
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
+#define bpf_object__for_each_program(pos, obj) \
+ for ((pos) = bpf_object__next_program((obj), NULL); \
+ (pos) != NULL; \
+ (pos) = bpf_object__next_program((obj), (pos)))
-LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv);
+LIBBPF_API struct bpf_program *
+bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog);
-LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
-LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
+LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach);
+
+struct bpf_insn;
+
+/**
+ * @brief **bpf_program__insns()** gives read-only access to BPF program's
+ * underlying BPF instructions.
+ * @param prog BPF program for which to return instructions
+ * @return a pointer to an array of BPF instructions that belong to the
+ * specified BPF program
+ *
+ * Returned pointer is always valid and not NULL. Number of `struct bpf_insn`
+ * pointed to can be fetched using **bpf_program__insn_cnt()** API.
+ *
+ * Keep in mind, libbpf can modify and append/delete BPF program's
+ * instructions as it processes BPF object file and prepares everything for
+ * uploading into the kernel. So depending on the point in BPF object
+ * lifetime, **bpf_program__insns()** can return different sets of
+ * instructions. As an example, during BPF object load phase BPF program
+ * instructions will be CO-RE-relocated, BPF subprograms instructions will be
+ * appended, ldimm64 instructions will have FDs embedded, etc. So instructions
+ * returned before **bpf_object__load()** and after it might be quite
+ * different.
+ */
+LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog);
-/* returns program size in bytes */
-LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
+/**
+ * @brief **bpf_program__set_insns()** can set BPF program's underlying
+ * BPF instructions.
+ *
+ * WARNING: This is a very advanced libbpf API and users need to know
+ * what they are doing. This should be used from prog_prepare_load_fn
+ * callback only.
+ *
+ * @param prog BPF program for which to return instructions
+ * @param new_insns a pointer to an array of BPF instructions
+ * @param new_insn_cnt number of `struct bpf_insn`'s that form
+ * specified BPF program
+ * @return 0, on success; negative error code, otherwise
+ */
+LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog,
+ struct bpf_insn *new_insns, size_t new_insn_cnt);
+
+/**
+ * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s
+ * that form specified BPF program.
+ * @param prog BPF program for which to return number of BPF instructions
+ *
+ * See **bpf_program__insns()** documentation for notes on how libbpf can
+ * change instructions and their count during different phases of
+ * **bpf_object** lifetime.
+ */
+LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog);
-LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
- __u32 kern_version);
LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
-LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
- const char *path,
- int instance);
-LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
- const char *path,
- int instance);
+
+/**
+ * @brief **bpf_program__pin()** pins the BPF program to a file
+ * in the BPF FS specified by a path. This increments the programs
+ * reference count, allowing it to stay loaded after the process
+ * which loaded it has exited.
+ *
+ * @param prog BPF program to pin, must already be loaded
+ * @param path file path in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+
+/**
+ * @brief **bpf_program__unpin()** unpins the BPF program from a file
+ * in the BPFFS specified by a path. This decrements the programs
+ * reference count.
+ *
+ * The file pinning the BPF program can also be unlinked by a different
+ * process in which case this function will return an error.
+ *
+ * @param prog BPF program to unpin
+ * @param path file path to the pin in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
@@ -225,7 +417,31 @@ struct bpf_link;
LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+/**
+ * @brief **bpf_link__pin()** pins the BPF link to a file
+ * in the BPF FS specified by a path. This increments the links
+ * reference count, allowing it to stay loaded after the process
+ * which loaded it has exited.
+ *
+ * @param link BPF link to pin, must already be loaded
+ * @param path file path in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
+
LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+
+/**
+ * @brief **bpf_link__unpin()** unpins the BPF link from a file
+ * in the BPFFS specified by a path. This decrements the links
+ * reference count.
+ *
+ * The file pinning the BPF link can also be unlinked by a different
+ * process in which case this function will return an error.
+ *
+ * @param prog BPF program to unpin
+ * @param path file path to the pin in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
@@ -233,41 +449,395 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+/**
+ * @brief **bpf_program__attach()** is a generic function for attaching
+ * a BPF program based on auto-detection of program type, attach type,
+ * and extra paremeters, where applicable.
+ *
+ * @param prog BPF program to attach
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ *
+ * This is supported for:
+ * - kprobe/kretprobe (depends on SEC() definition)
+ * - uprobe/uretprobe (depends on SEC() definition)
+ * - tracepoint
+ * - raw tracepoint
+ * - tracing programs (typed raw TP/fentry/fexit/fmod_ret)
+ */
LIBBPF_API struct bpf_link *
-bpf_program__attach(struct bpf_program *prog);
+bpf_program__attach(const struct bpf_program *prog);
+
+struct bpf_perf_event_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* don't use BPF link when attach BPF program */
+ bool force_ioctl_attach;
+ size_t :0;
+};
+#define bpf_perf_event_opts__last_field force_ioctl_attach
+
LIBBPF_API struct bpf_link *
-bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd);
+
LIBBPF_API struct bpf_link *
-bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts);
+
+/**
+ * enum probe_attach_mode - the mode to attach kprobe/uprobe
+ *
+ * force libbpf to attach kprobe/uprobe in specific mode, -ENOTSUP will
+ * be returned if it is not supported by the kernel.
+ */
+enum probe_attach_mode {
+ /* attach probe in latest supported mode by kernel */
+ PROBE_ATTACH_MODE_DEFAULT = 0,
+ /* attach probe in legacy mode, using debugfs/tracefs */
+ PROBE_ATTACH_MODE_LEGACY,
+ /* create perf event with perf_event_open() syscall */
+ PROBE_ATTACH_MODE_PERF,
+ /* attach probe with BPF link */
+ PROBE_ATTACH_MODE_LINK,
+};
+
+struct bpf_kprobe_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* function's offset to install kprobe to */
+ size_t offset;
+ /* kprobe is return probe */
+ bool retprobe;
+ /* kprobe attach mode */
+ enum probe_attach_mode attach_mode;
+ size_t :0;
+};
+#define bpf_kprobe_opts__last_field attach_mode
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
const char *func_name);
LIBBPF_API struct bpf_link *
-bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts);
+
+struct bpf_kprobe_multi_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* array of function symbols to attach */
+ const char **syms;
+ /* array of function addresses to attach */
+ const unsigned long *addrs;
+ /* array of user-provided values fetchable through bpf_get_attach_cookie */
+ const __u64 *cookies;
+ /* number of elements in syms/addrs/cookies arrays */
+ size_t cnt;
+ /* create return kprobes */
+ bool retprobe;
+ size_t :0;
+};
+
+#define bpf_kprobe_multi_opts__last_field retprobe
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
+ const char *pattern,
+ const struct bpf_kprobe_multi_opts *opts);
+
+struct bpf_uprobe_multi_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* array of function symbols to attach to */
+ const char **syms;
+ /* array of function addresses to attach to */
+ const unsigned long *offsets;
+ /* optional, array of associated ref counter offsets */
+ const unsigned long *ref_ctr_offsets;
+ /* optional, array of associated BPF cookies */
+ const __u64 *cookies;
+ /* number of elements in syms/addrs/cookies arrays */
+ size_t cnt;
+ /* create return uprobes */
+ bool retprobe;
+ size_t :0;
+};
+
+#define bpf_uprobe_multi_opts__last_field retprobe
+
+/**
+ * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program
+ * to multiple uprobes with uprobe_multi link.
+ *
+ * User can specify 2 mutually exclusive set of inputs:
+ *
+ * 1) use only path/func_pattern/pid arguments
+ *
+ * 2) use path/pid with allowed combinations of
+ * syms/offsets/ref_ctr_offsets/cookies/cnt
+ *
+ * - syms and offsets are mutually exclusive
+ * - ref_ctr_offsets and cookies are optional
+ *
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary
+ * @param func_pattern Regular expression to specify functions to attach
+ * BPF program to
+ * @param opts Additional options (see **struct bpf_uprobe_multi_opts**)
+ * @return 0, on success; negative error code, otherwise
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
+ pid_t pid,
+ const char *binary_path,
+ const char *func_pattern,
+ const struct bpf_uprobe_multi_opts *opts);
+
+struct bpf_ksyscall_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* attach as return probe? */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_ksyscall_opts__last_field retprobe
+
+/**
+ * @brief **bpf_program__attach_ksyscall()** attaches a BPF program
+ * to kernel syscall handler of a specified syscall. Optionally it's possible
+ * to request to install retprobe that will be triggered at syscall exit. It's
+ * also possible to associate BPF cookie (though options).
+ *
+ * Libbpf automatically will determine correct full kernel function name,
+ * which depending on system architecture and kernel version/configuration
+ * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will
+ * attach specified program using kprobe/kretprobe mechanism.
+ *
+ * **bpf_program__attach_ksyscall()** is an API counterpart of declarative
+ * **SEC("ksyscall/<syscall>")** annotation of BPF programs.
+ *
+ * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do
+ * not handle all the calling convention quirks for mmap(), clone() and compat
+ * syscalls. It also only attaches to "native" syscall interfaces. If host
+ * system supports compat syscalls or defines 32-bit syscalls in 64-bit
+ * kernel, such syscall interfaces won't be attached to by libbpf.
+ *
+ * These limitations may or may not change in the future. Therefore it is
+ * recommended to use SEC("kprobe") for these syscalls or if working with
+ * compat and 32-bit interfaces is required.
+ *
+ * @param prog BPF program to attach
+ * @param syscall_name Symbolic name of the syscall (e.g., "bpf")
+ * @param opts Additional options (see **struct bpf_ksyscall_opts**)
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_ksyscall(const struct bpf_program *prog,
+ const char *syscall_name,
+ const struct bpf_ksyscall_opts *opts);
+
+struct bpf_uprobe_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* offset of kernel reference counted USDT semaphore, added in
+ * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ size_t ref_ctr_offset;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* uprobe is return probe, invoked at function return time */
+ bool retprobe;
+ /* Function name to attach to. Could be an unqualified ("abc") or library-qualified
+ * "abc@LIBXYZ" name. To specify function entry, func_name should be set while
+ * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an
+ * offset within a function, specify func_name and use func_offset argument to specify
+ * offset within the function. Shared library functions must specify the shared library
+ * binary_path.
+ */
+ const char *func_name;
+ /* uprobe attach mode */
+ enum probe_attach_mode attach_mode;
+ size_t :0;
+};
+#define bpf_uprobe_opts__last_field attach_mode
+
+/**
+ * @brief **bpf_program__attach_uprobe()** attaches a BPF program
+ * to the userspace function which is found by binary path and
+ * offset. You can optionally specify a particular proccess to attach
+ * to. You can also optionally attach the program to the function
+ * exit instead of entry.
+ *
+ * @param prog BPF program to attach
+ * @param retprobe Attach to function exit
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains the function symbol
+ * @param func_offset Offset within the binary of the function symbol
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
+
+/**
+ * @brief **bpf_program__attach_uprobe_opts()** is just like
+ * bpf_program__attach_uprobe() except with a options struct
+ * for various configurations.
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains the function symbol
+ * @param func_offset Offset within the binary of the function symbol
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts);
+
+struct bpf_usdt_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value accessible through usdt_cookie() */
+ __u64 usdt_cookie;
+ size_t :0;
+};
+#define bpf_usdt_opts__last_field usdt_cookie
+
+/**
+ * @brief **bpf_program__attach_usdt()** is just like
+ * bpf_program__attach_uprobe_opts() except it covers USDT (User-space
+ * Statically Defined Tracepoint) attachment, instead of attaching to
+ * user-space function entry or exit.
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains provided USDT probe
+ * @param usdt_provider USDT provider name
+ * @param usdt_name USDT probe name
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts);
+
+struct bpf_tracepoint_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_tracepoint_opts__last_field bpf_cookie
+
LIBBPF_API struct bpf_link *
-bpf_program__attach_tracepoint(struct bpf_program *prog,
+bpf_program__attach_tracepoint(const struct bpf_program *prog,
const char *tp_category,
const char *tp_name);
LIBBPF_API struct bpf_link *
-bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
const char *tp_name);
+
+struct bpf_trace_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 cookie;
+};
+#define bpf_trace_opts__last_field cookie
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_trace(const struct bpf_program *prog);
LIBBPF_API struct bpf_link *
-bpf_program__attach_trace(struct bpf_program *prog);
+bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts);
+
LIBBPF_API struct bpf_link *
-bpf_program__attach_lsm(struct bpf_program *prog);
+bpf_program__attach_lsm(const struct bpf_program *prog);
LIBBPF_API struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd);
LIBBPF_API struct bpf_link *
-bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd);
LIBBPF_API struct bpf_link *
-bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex);
LIBBPF_API struct bpf_link *
-bpf_program__attach_freplace(struct bpf_program *prog,
+bpf_program__attach_freplace(const struct bpf_program *prog,
int target_fd, const char *attach_func_name);
+struct bpf_netfilter_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+};
+#define bpf_netfilter_opts__last_field flags
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netfilter(const struct bpf_program *prog,
+ const struct bpf_netfilter_opts *opts);
+
+struct bpf_tcx_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ __u32 flags;
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_tcx_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
+ const struct bpf_tcx_opts *opts);
+
+struct bpf_netkit_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ __u32 flags;
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_netkit_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
+ const struct bpf_netkit_opts *opts);
+
struct bpf_map;
-LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
+LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -277,135 +847,78 @@ struct bpf_iter_attach_opts {
#define bpf_iter_attach_opts__last_field link_info_len
LIBBPF_API struct bpf_link *
-bpf_program__attach_iter(struct bpf_program *prog,
+bpf_program__attach_iter(const struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts);
-struct bpf_insn;
+LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
-/*
- * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transformed into
- * multiple variants to be attached to different hooks.
- *
- * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * form an API for this purpose.
- *
- * - bpf_program_prep_t:
- * Defines a 'preprocessor', which is a caller defined function
- * passed to libbpf through bpf_program__set_prep(), and will be
- * called before program is loaded. The processor should adjust
- * the program one time for each instance according to the instance id
- * passed to it.
+/**
+ * @brief **bpf_program__set_type()** sets the program
+ * type of the passed BPF program.
+ * @param prog BPF program to set the program type for
+ * @param type program type to set the BPF map to have
+ * @return error code; or 0 if no error. An error occurs
+ * if the object is already loaded.
*
- * - bpf_program__set_prep:
- * Attaches a preprocessor to a BPF program. The number of instances
- * that should be created is also passed through this function.
- *
- * - bpf_program__nth_fd:
- * After the program is loaded, get resulting FD of a given instance
- * of the BPF program.
- *
- * If bpf_program__set_prep() is not used, the program would be loaded
- * without adjustment during bpf_object__load(). The program has only
- * one instance. In this case bpf_program__fd(prog) is equal to
- * bpf_program__nth_fd(prog, 0).
+ * This must be called before the BPF object is loaded,
+ * otherwise it has no effect and an error is returned.
*/
+LIBBPF_API int bpf_program__set_type(struct bpf_program *prog,
+ enum bpf_prog_type type);
-struct bpf_prog_prep_result {
- /*
- * If not NULL, load new instruction array.
- * If set to NULL, don't load this instance.
- */
- struct bpf_insn *new_insn_ptr;
- int new_insn_cnt;
-
- /* If not NULL, result FD is written to it. */
- int *pfd;
-};
-
-/*
- * Parameters of bpf_program_prep_t:
- * - prog: The bpf_program being loaded.
- * - n: Index of instance being generated.
- * - insns: BPF instructions array.
- * - insns_cnt:Number of instructions in insns.
- * - res: Output parameter, result of transformation.
+LIBBPF_API enum bpf_attach_type
+bpf_program__expected_attach_type(const struct bpf_program *prog);
+
+/**
+ * @brief **bpf_program__set_expected_attach_type()** sets the
+ * attach type of the passed BPF program. This is used for
+ * auto-detection of attachment when programs are loaded.
+ * @param prog BPF program to set the attach type for
+ * @param type attach type to set the BPF map to have
+ * @return error code; or 0 if no error. An error occurs
+ * if the object is already loaded.
*
- * Return value:
- * - Zero: pre-processing success.
- * - Non-zero: pre-processing error, stop loading.
+ * This must be called before the BPF object is loaded,
+ * otherwise it has no effect and an error is returned.
*/
-typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
- struct bpf_insn *insns, int insns_cnt,
- struct bpf_prog_prep_result *res);
-
-LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
- bpf_program_prep_t prep);
-
-LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-/*
- * Adjust type of BPF program. Default is kprobe.
- */
-LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
-LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
-LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
- enum bpf_prog_type type);
-
-LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog);
-LIBBPF_API void
+LIBBPF_API int
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
+LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags);
+
+/* Per-program log level and log buffer getters/setters.
+ * See bpf_object_open_opts comments regarding log_level and log_buf
+ * interactions.
+ */
+LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level);
+LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
+LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+
+/**
+ * @brief **bpf_program__set_attach_target()** sets BTF-based attach target
+ * for supported BPF program types:
+ * - BTF-aware raw tracepoints (tp_btf);
+ * - fentry/fexit/fmod_ret;
+ * - lsm;
+ * - freplace.
+ * @param prog BPF program to set the attach type for
+ * @param type attach type to set the BPF map to have
+ * @return error code; or 0 if no error occurred.
+ */
LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
-LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
-
-/*
- * No need for __attribute__((packed)), all members of 'bpf_map_def'
- * are all aligned. In addition, using __attribute__((packed))
- * would trigger a -Wpacked warning message, and lead to an error
- * if -Werror is set.
- */
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
-};
-
-/*
- * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
- * so no need to worry about a name clash.
+/**
+ * @brief **bpf_object__find_map_by_name()** returns BPF map of
+ * the given name, if it exists within the passed BPF object
+ * @param obj BPF object
+ * @param name name of the BPF map
+ * @return BPF map instance, if such map exists within the BPF object;
+ * or NULL otherwise.
*/
LIBBPF_API struct bpf_map *
bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
@@ -413,29 +926,48 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
LIBBPF_API int
bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
-/*
- * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the BPF object file.
- */
LIBBPF_API struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
-LIBBPF_API struct bpf_map *
-bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
#define bpf_object__for_each_map(pos, obj) \
- for ((pos) = bpf_map__next(NULL, (obj)); \
+ for ((pos) = bpf_object__next_map((obj), NULL); \
(pos) != NULL; \
- (pos) = bpf_map__next((pos), (obj)))
+ (pos) = bpf_object__next_map((obj), (pos)))
#define bpf_map__for_each bpf_object__for_each_map
LIBBPF_API struct bpf_map *
-bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
-
-/* get/set map FD */
+bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create
+ * BPF map during BPF object load phase.
+ * @param map the BPF map instance
+ * @param autocreate whether to create BPF map during BPF object load
+ * @return 0 on success; -EBUSY if BPF object was already loaded
+ *
+ * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating
+ * BPF map. By default, libbpf will attempt to create every single BPF map
+ * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall
+ * and fill in map FD in BPF instructions.
+ *
+ * This API allows to opt-out of this process for specific map instance. This
+ * can be useful if host kernel doesn't support such BPF map type or used
+ * combination of flags and user application wants to avoid creating such
+ * a map in the first place. User is still responsible to make sure that their
+ * BPF-side code that expects to use such missing BPF map is recognized by BPF
+ * verifier as dead code, otherwise BPF verifier will reject such BPF program.
+ */
+LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate);
+LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__fd()** gets the file descriptor of the passed
+ * BPF map
+ * @param map the BPF map instance
+ * @return the file descriptor; or -EINVAL in case of an error
+ */
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-/* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
/* get/set map type */
@@ -444,7 +976,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
/* get/set map size (max_entries) */
LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
/* get/set map flags */
LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
@@ -454,8 +985,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
/* get/set map key size */
LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
-/* get/set map value size */
+/* get map value size */
LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+/**
+ * @brief **bpf_map__set_value_size()** sets map value size.
+ * @param map the BPF map instance
+ * @return 0, on success; negative error, otherwise
+ *
+ * There is a special case for maps with associated memory-mapped regions, like
+ * the global data section maps (bss, data, rodata). When this function is used
+ * on such a map, the mapped region is resized. Afterward, an attempt is made to
+ * adjust the corresponding BTF info. This attempt is best-effort and can only
+ * succeed if the last variable of the data section map is an array. The array
+ * BTF type is replaced by a new BTF array type with a different length.
+ * Any previously existing pointers returned from bpf_map__initial_value() or
+ * corresponding data section skeleton pointer must be reinitialized.
+ */
LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
/* get map key/value BTF type IDs */
LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
@@ -463,67 +1008,268 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
/* get/set map if_index */
LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+/* get/set map map_extra flags */
+LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
-typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize);
+
+/**
+ * @brief **bpf_map__is_internal()** tells the caller whether or not the
+ * passed map is a special map created by libbpf automatically for things like
+ * global variables, __ksym externs, Kconfig values, etc
+ * @param map the bpf_map
+ * @return true, if the map is an internal map; false, otherwise
+ */
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the
+ * BPF map should be pinned. This does not actually create the 'pin'.
+ * @param map The bpf_map
+ * @param path The path
+ * @return 0, on success; negative error, otherwise
+ */
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
-LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__pin_path()** gets the path attribute that tells where the
+ * BPF map should be pinned.
+ * @param map The bpf_map
+ * @return The path string; which can be NULL
+ */
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__is_pinned()** tells the caller whether or not the
+ * passed map has been pinned via a 'pin' file.
+ * @param map The bpf_map
+ * @return true, if the map is pinned; false, otherwise
+ */
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__pin()** creates a file that serves as a 'pin'
+ * for the BPF map. This increments the reference count on the
+ * BPF map which will keep the BPF map loaded even after the
+ * userspace process which loaded it has exited.
+ * @param map The bpf_map to pin
+ * @param path A file path for the 'pin'
+ * @return 0, on success; negative error, otherwise
+ *
+ * If `path` is NULL the maps `pin_path` attribute will be used. If this is
+ * also NULL, an error will be returned and the map will not be pinned.
+ */
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
+
+/**
+ * @brief **bpf_map__unpin()** removes the file that serves as a
+ * 'pin' for the BPF map.
+ * @param map The bpf_map to unpin
+ * @param path A file path for the 'pin'
+ * @return 0, on success; negative error, otherwise
+ *
+ * The `path` parameter can be NULL, in which case the `pin_path`
+ * map attribute is unpinned. If both the `path` parameter and
+ * `pin_path` map attribute are set, they must be equal.
+ */
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
+
+/**
+ * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value
+ * corresponding to provided key.
+ * @param map BPF map to lookup element in
+ * @param key pointer to memory containing bytes of the key used for lookup
+ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
+ * @param value pointer to memory in which looked up value will be stored
+ * @param value_sz size in byte of value data memory; it has to match BPF map
+ * definition's **value_size**. For per-CPU BPF maps value size has to be
+ * a product of BPF map value size and number of possible CPUs in the system
+ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
+ * per-CPU values value size has to be aligned up to closest 8 bytes for
+ * alignment reasons, so expected size is: `round_up(value_size, 8)
+ * * libbpf_num_possible_cpus()`.
+ * @flags extra flags passed to kernel for this operation
+ * @return 0, on success; negative error, otherwise
+ *
+ * **bpf_map__lookup_elem()** is high-level equivalent of
+ * **bpf_map_lookup_elem()** API with added check for key and value size.
+ */
+LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ void *value, size_t value_sz, __u64 flags);
+
+/**
+ * @brief **bpf_map__update_elem()** allows to insert or update value in BPF
+ * map that corresponds to provided key.
+ * @param map BPF map to insert to or update element in
+ * @param key pointer to memory containing bytes of the key
+ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
+ * @param value pointer to memory containing bytes of the value
+ * @param value_sz size in byte of value data memory; it has to match BPF map
+ * definition's **value_size**. For per-CPU BPF maps value size has to be
+ * a product of BPF map value size and number of possible CPUs in the system
+ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
+ * per-CPU values value size has to be aligned up to closest 8 bytes for
+ * alignment reasons, so expected size is: `round_up(value_size, 8)
+ * * libbpf_num_possible_cpus()`.
+ * @flags extra flags passed to kernel for this operation
+ * @return 0, on success; negative error, otherwise
+ *
+ * **bpf_map__update_elem()** is high-level equivalent of
+ * **bpf_map_update_elem()** API with added check for key and value size.
+ */
+LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ const void *value, size_t value_sz, __u64 flags);
+
+/**
+ * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that
+ * corresponds to provided key.
+ * @param map BPF map to delete element from
+ * @param key pointer to memory containing bytes of the key
+ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
+ * @flags extra flags passed to kernel for this operation
+ * @return 0, on success; negative error, otherwise
+ *
+ * **bpf_map__delete_elem()** is high-level equivalent of
+ * **bpf_map_delete_elem()** API with added check for key size.
+ */
+LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz, __u64 flags);
+
+/**
+ * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value
+ * corresponding to provided key and atomically delete it afterwards.
+ * @param map BPF map to lookup element in
+ * @param key pointer to memory containing bytes of the key used for lookup
+ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
+ * @param value pointer to memory in which looked up value will be stored
+ * @param value_sz size in byte of value data memory; it has to match BPF map
+ * definition's **value_size**. For per-CPU BPF maps value size has to be
+ * a product of BPF map value size and number of possible CPUs in the system
+ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
+ * per-CPU values value size has to be aligned up to closest 8 bytes for
+ * alignment reasons, so expected size is: `round_up(value_size, 8)
+ * * libbpf_num_possible_cpus()`.
+ * @flags extra flags passed to kernel for this operation
+ * @return 0, on success; negative error, otherwise
+ *
+ * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
+ * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size.
+ */
+LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
+ const void *key, size_t key_sz,
+ void *value, size_t value_sz, __u64 flags);
+
+/**
+ * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by
+ * fetching next key that follows current key.
+ * @param map BPF map to fetch next key from
+ * @param cur_key pointer to memory containing bytes of current key or NULL to
+ * fetch the first key
+ * @param next_key pointer to memory to write next key into
+ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
+ * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map;
+ * negative error, otherwise
+ *
+ * **bpf_map__get_next_key()** is high-level equivalent of
+ * **bpf_map_get_next_key()** API with added check for key size.
+ */
+LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
+ const void *cur_key, void *next_key, size_t key_sz);
+
+struct bpf_xdp_set_link_opts {
+ size_t sz;
+ int old_fd;
+ size_t :0;
+};
+#define bpf_xdp_set_link_opts__last_field old_fd
-LIBBPF_API long libbpf_get_error(const void *ptr);
+struct bpf_xdp_attach_opts {
+ size_t sz;
+ int old_prog_fd;
+ size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
-struct bpf_prog_load_attr {
- const char *file;
- enum bpf_prog_type prog_type;
- enum bpf_attach_type expected_attach_type;
- int ifindex;
- int log_level;
- int prog_flags;
+struct bpf_xdp_query_opts {
+ size_t sz;
+ __u32 prog_id; /* output */
+ __u32 drv_prog_id; /* output */
+ __u32 hw_prog_id; /* output */
+ __u32 skb_prog_id; /* output */
+ __u8 attach_mode; /* output */
+ __u64 feature_flags; /* output */
+ __u32 xdp_zc_max_segs; /* output */
+ size_t :0;
+};
+#define bpf_xdp_query_opts__last_field xdp_zc_max_segs
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+ const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
+/* TC related API */
+enum bpf_tc_attach_point {
+ BPF_TC_INGRESS = 1 << 0,
+ BPF_TC_EGRESS = 1 << 1,
+ BPF_TC_CUSTOM = 1 << 2,
};
-LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
- struct bpf_object **pobj, int *prog_fd);
-LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd);
+#define BPF_TC_PARENT(a, b) \
+ ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU))
-struct xdp_link_info {
- __u32 prog_id;
- __u32 drv_prog_id;
- __u32 hw_prog_id;
- __u32 skb_prog_id;
- __u8 attach_mode;
+enum bpf_tc_flags {
+ BPF_TC_F_REPLACE = 1 << 0,
};
-struct bpf_xdp_set_link_opts {
+struct bpf_tc_hook {
size_t sz;
- int old_fd;
+ int ifindex;
+ enum bpf_tc_attach_point attach_point;
+ __u32 parent;
+ size_t :0;
};
-#define bpf_xdp_set_link_opts__last_field old_fd
+#define bpf_tc_hook__last_field parent
-LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
- const struct bpf_xdp_set_link_opts *opts);
-LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
-LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags);
+struct bpf_tc_opts {
+ size_t sz;
+ int prog_fd;
+ __u32 flags;
+ __u32 prog_id;
+ __u32 handle;
+ __u32 priority;
+ size_t :0;
+};
+#define bpf_tc_opts__last_field priority
+
+LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
/* Ring buffer APIs */
struct ring_buffer;
+struct ring;
+struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
struct ring_buffer_opts {
- size_t sz; /* size of this struct, for forward/backward compatiblity */
+ size_t sz; /* size of this struct, for forward/backward compatibility */
};
#define ring_buffer_opts__last_field sz
@@ -536,6 +1282,191 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ring_buffer_sample_fn sample_cb, void *ctx);
LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
+
+/**
+ * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given
+ * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance.
+ *
+ * @param rb A ringbuffer manager object.
+ * @param idx An index into the ringbuffers contained within the ringbuffer
+ * manager object. The index is 0-based and corresponds to the order in which
+ * ring_buffer__add was called.
+ * @return A ringbuffer object on success; NULL and errno set if the index is
+ * invalid.
+ */
+LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb,
+ unsigned int idx);
+
+/**
+ * @brief **ring__consumer_pos()** returns the current consumer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current consumer position.
+ */
+LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__producer_pos()** returns the current producer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current producer position.
+ */
+LIBBPF_API unsigned long ring__producer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__avail_data_size()** returns the number of bytes in the
+ * ringbuffer not yet consumed. This has no locking associated with it, so it
+ * can be inaccurate if operations are ongoing while this is called. However, it
+ * should still show the correct trend over the long-term.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of bytes not yet consumed.
+ */
+LIBBPF_API size_t ring__avail_data_size(const struct ring *r);
+
+/**
+ * @brief **ring__size()** returns the total size of the ringbuffer's map data
+ * area (excluding special producer/consumer pages). Effectively this gives the
+ * amount of usable bytes of data inside the ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The total size of the ringbuffer map data area.
+ */
+LIBBPF_API size_t ring__size(const struct ring *r);
+
+/**
+ * @brief **ring__map_fd()** returns the file descriptor underlying the given
+ * ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The underlying ringbuffer file descriptor
+ */
+LIBBPF_API int ring__map_fd(const struct ring *r);
+
+/**
+ * @brief **ring__consume()** consumes available ringbuffer data without event
+ * polling.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of records consumed (or INT_MAX, whichever is less), or
+ * a negative number if any of the callbacks return an error.
+ */
+LIBBPF_API int ring__consume(struct ring *r);
+
+struct user_ring_buffer_opts {
+ size_t sz; /* size of this struct, for forward/backward compatibility */
+};
+
+#define user_ring_buffer_opts__last_field sz
+
+/**
+ * @brief **user_ring_buffer__new()** creates a new instance of a user ring
+ * buffer.
+ *
+ * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
+ * @param opts Options for how the ring buffer should be created.
+ * @return A user ring buffer on success; NULL and errno being set on a
+ * failure.
+ */
+LIBBPF_API struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
+
+/**
+ * @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
+ * user ring buffer.
+ * @param rb A pointer to a user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize accessing
+ * this function if there are multiple producers. If a size is requested that
+ * is larger than the size of the entire ring buffer, errno will be set to
+ * E2BIG and NULL is returned. If the ring buffer could accommodate the size,
+ * but currently does not have enough space, errno is set to ENOSPC and NULL is
+ * returned.
+ *
+ * After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
+ * the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
+
+/**
+ * @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
+ * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
+ * available.
+ * @param rb The user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @param timeout_ms The amount of time, in milliseconds, for which the caller
+ * should block when waiting for a sample. -1 causes the caller to block
+ * indefinitely.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize
+ * accessing this function if there are multiple producers
+ *
+ * If **timeout_ms** is -1, the function will block indefinitely until a sample
+ * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
+ * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
+ * will occur and the function will return immediately after attempting to
+ * reserve a sample.
+ *
+ * If **size** is larger than the size of the entire ring buffer, errno is set
+ * to E2BIG and NULL is returned. If the ring buffer could accommodate
+ * **size**, but currently does not have enough space, the caller will block
+ * until at most **timeout_ms** has elapsed. If insufficient space is available
+ * at that time, errno is set to ENOSPC, and NULL is returned.
+ *
+ * The kernel guarantees that it will wake up this thread to check if
+ * sufficient space is available in the ring buffer at least once per
+ * invocation of the **bpf_ringbuf_drain()** helper function, provided that at
+ * least one sample is consumed, and the BPF program did not invoke the
+ * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
+ * kernel does not guarantee this. If the helper function is invoked with
+ * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
+ * consumed.
+ *
+ * When a sample of size **size** is found within **timeout_ms**, a pointer to
+ * the sample is returned. After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the ring buffer.
+ * Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
+ __u32 size,
+ int timeout_ms);
+
+/**
+ * @brief **user_ring_buffer__submit()** submits a previously reserved sample
+ * into the ring buffer.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
+
+/**
+ * @brief **user_ring_buffer__discard()** discards a previously reserved sample.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
+
+/**
+ * @brief **user_ring_buffer__free()** frees a ring buffer that was previously
+ * created with **user_ring_buffer__new()**.
+ * @param rb The user ring buffer being freed.
+ */
+LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);
/* Perf buffer APIs */
struct perf_buffer;
@@ -546,16 +1477,27 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
/* common use perf buffer options */
struct perf_buffer_opts {
- /* if specified, sample_cb is called for each sample */
- perf_buffer_sample_fn sample_cb;
- /* if specified, lost_cb is called for each batch of lost samples */
- perf_buffer_lost_fn lost_cb;
- /* ctx is provided to sample_cb and lost_cb */
- void *ctx;
+ size_t sz;
+ __u32 sample_period;
+ size_t :0;
};
-
+#define perf_buffer_opts__last_field sample_period
+
+/**
+ * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified
+ * BPF_PERF_EVENT_ARRAY map
+ * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF
+ * code to send data over to user-space
+ * @param page_cnt number of memory pages allocated for each per-CPU buffer
+ * @param sample_cb function called on each received data record
+ * @param lost_cb function called when record loss has occurred
+ * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @return a new instance of struct perf_buffer on success, NULL on error with
+ * *errno* containing an error code
+ */
LIBBPF_API struct perf_buffer *
perf_buffer__new(int map_fd, size_t page_cnt,
+ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
const struct perf_buffer_opts *opts);
enum bpf_perf_event_ret {
@@ -571,12 +1513,9 @@ typedef enum bpf_perf_event_ret
/* raw perf buffer options, giving most power and control */
struct perf_buffer_raw_opts {
- /* perf event attrs passed directly into perf_event_open() */
- struct perf_event_attr *attr;
- /* raw event callback */
- perf_buffer_event_fn event_cb;
- /* ctx is provided to event_cb */
- void *ctx;
+ size_t sz;
+ long :0;
+ long :0;
/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
* max_entries of given PERF_EVENT_ARRAY map)
*/
@@ -586,9 +1525,13 @@ struct perf_buffer_raw_opts {
/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
int *map_keys;
};
+#define perf_buffer_raw_opts__last_field map_keys
+
+struct perf_event_attr;
LIBBPF_API struct perf_buffer *
-perf_buffer__new_raw(int map_fd, size_t page_cnt,
+perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
+ perf_buffer_event_fn event_cb, void *ctx,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
@@ -598,14 +1541,22 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
-
-typedef enum bpf_perf_event_ret
- (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
- void *private_data);
-LIBBPF_API enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
- void **copy_mem, size_t *copy_size,
- bpf_perf_event_print_t fn, void *private_data);
+/**
+ * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying
+ * memory region of the ring buffer.
+ * This ring buffer can be used to implement a custom events consumer.
+ * The ring buffer starts with the *struct perf_event_mmap_page*, which
+ * holds the ring buffer managment fields, when accessing the header
+ * structure it's important to be SMP aware.
+ * You can refer to *perf_event_read_simple* for a simple example.
+ * @param pb the perf buffer structure
+ * @param buf_idx the buffer index to retreive
+ * @param buf (out) gets the base pointer of the mmap()'ed memory
+ * @param buf_size (out) gets the size of the mmap()'ed region
+ * @return 0 on success, negative error code for failure
+ */
+LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf,
+ size_t *buf_size);
struct bpf_prog_linfo;
struct bpf_prog_info;
@@ -628,79 +1579,53 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
* user, causing subsequent probes to fail. In this case, the caller may want
* to adjust that limit with setrlimit().
*/
-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
- __u32 ifindex);
-LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
- enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
-
-/*
- * Get bpf_prog_info in continuous memory
- *
- * struct bpf_prog_info has multiple arrays. The user has option to choose
- * arrays to fetch from kernel. The following APIs provide an uniform way to
- * fetch these data. All arrays in bpf_prog_info are stored in a single
- * continuous memory region. This makes it easy to store the info in a
- * file.
- *
- * Before writing bpf_prog_info_linear to files, it is necessary to
- * translate pointers in bpf_prog_info to offsets. Helper functions
- * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
- * are introduced to switch between pointers and offsets.
- *
- * Examples:
- * # To fetch map_ids and prog_tags:
- * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
- * (1UL << BPF_PROG_INFO_PROG_TAGS);
- * struct bpf_prog_info_linear *info_linear =
- * bpf_program__get_prog_info_linear(fd, arrays);
- *
- * # To save data in file
- * bpf_program__bpil_addr_to_offs(info_linear);
- * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
- *
- * # To read data from file
- * read(f, info_linear, <proper_size>);
- * bpf_program__bpil_offs_to_addr(info_linear);
- */
-enum bpf_prog_info_array {
- BPF_PROG_INFO_FIRST_ARRAY = 0,
- BPF_PROG_INFO_JITED_INSNS = 0,
- BPF_PROG_INFO_XLATED_INSNS,
- BPF_PROG_INFO_MAP_IDS,
- BPF_PROG_INFO_JITED_KSYMS,
- BPF_PROG_INFO_JITED_FUNC_LENS,
- BPF_PROG_INFO_FUNC_INFO,
- BPF_PROG_INFO_LINE_INFO,
- BPF_PROG_INFO_JITED_LINE_INFO,
- BPF_PROG_INFO_PROG_TAGS,
- BPF_PROG_INFO_LAST_ARRAY,
-};
-
-struct bpf_prog_info_linear {
- /* size of struct bpf_prog_info, when the tool is compiled */
- __u32 info_len;
- /* total bytes allocated for data, round up to 8 bytes */
- __u32 data_len;
- /* which arrays are included in data */
- __u64 arrays;
- struct bpf_prog_info info;
- __u8 data[];
-};
-
-LIBBPF_API struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays);
-
-LIBBPF_API void
-bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
-LIBBPF_API void
-bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+/**
+ * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports
+ * BPF programs of a given type.
+ * @param prog_type BPF program type to detect kernel support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given program type is supported; 0, if given program type is
+ * not supported; negative error code if feature detection failed or can't be
+ * performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts);
+/**
+ * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports
+ * BPF maps of a given type.
+ * @param map_type BPF map type to detect kernel support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given map type is supported; 0, if given map type is
+ * not supported; negative error code if feature detection failed or can't be
+ * performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts);
+/**
+ * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the
+ * use of a given BPF helper from specified BPF program type.
+ * @param prog_type BPF program type used to check the support of BPF helper
+ * @param helper_id BPF helper ID (enum bpf_func_id) to check support for
+ * @param opts reserved for future extensibility, should be NULL
+ * @return 1, if given combination of program type and helper is supported; 0,
+ * if the combination is not supported; negative error code if feature
+ * detection for provided input arguments failed or can't be performed
+ *
+ * Make sure the process has required set of CAP_* permissions (or runs as
+ * root) when performing feature checking.
+ */
+LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type,
+ enum bpf_func_id helper_id, const void *opts);
-/*
- * A helper function to get the number of possible CPUs before looking up
- * per-CPU maps. Negative errno is returned on failure.
+/**
+ * @brief **libbpf_num_possible_cpus()** is a helper function to get the
+ * number of possible CPUs that the host kernel supports and expects.
+ * @return number of possible CPUs; or error code on failure
*
* Example usage:
*
@@ -710,7 +1635,6 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
* }
* long values[ncpus];
* bpf_map_lookup_elem(per_cpu_map_fd, key, values);
- *
*/
LIBBPF_API int libbpf_num_possible_cpus(void);
@@ -730,17 +1654,17 @@ struct bpf_object_skeleton {
size_t sz; /* size of this struct, for forward/backward compatibility */
const char *name;
- void *data;
+ const void *data;
size_t data_sz;
struct bpf_object **obj;
int map_cnt;
- int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */
+ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */
struct bpf_map_skeleton *maps;
int prog_cnt;
- int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */
+ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */
struct bpf_prog_skeleton *progs;
};
@@ -752,12 +1676,183 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
+struct bpf_var_skeleton {
+ const char *name;
+ struct bpf_map **map;
+ void **addr;
+};
+
+struct bpf_object_subskeleton {
+ size_t sz; /* size of this struct, for forward/backward compatibility */
+
+ const struct bpf_object *obj;
+
+ int map_cnt;
+ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */
+ struct bpf_map_skeleton *maps;
+
+ int prog_cnt;
+ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */
+ struct bpf_prog_skeleton *progs;
+
+ int var_cnt;
+ int var_skel_sz; /* sizeof(struct bpf_var_skeleton) */
+ struct bpf_var_skeleton *vars;
+};
+
+LIBBPF_API int
+bpf_object__open_subskeleton(struct bpf_object_subskeleton *s);
+LIBBPF_API void
+bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s);
+
+struct gen_loader_opts {
+ size_t sz; /* size of this struct, for forward/backward compatibility */
+ const char *data;
+ const char *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+};
+
+#define gen_loader_opts__last_field insns_sz
+LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
+ struct gen_loader_opts *opts);
+
enum libbpf_tristate {
TRI_NO = 0,
TRI_YES = 1,
TRI_MODULE = 2,
};
+struct bpf_linker_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+};
+#define bpf_linker_opts__last_field sz
+
+struct bpf_linker_file_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+};
+#define bpf_linker_file_opts__last_field sz
+
+struct bpf_linker;
+
+LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker,
+ const char *filename,
+ const struct bpf_linker_file_opts *opts);
+LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
+LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
+
+/*
+ * Custom handling of BPF program's SEC() definitions
+ */
+
+struct bpf_prog_load_opts; /* defined in bpf.h */
+
+/* Called during bpf_object__open() for each recognized BPF program. Callback
+ * can use various bpf_program__set_*() setters to adjust whatever properties
+ * are necessary.
+ */
+typedef int (*libbpf_prog_setup_fn_t)(struct bpf_program *prog, long cookie);
+
+/* Called right before libbpf performs bpf_prog_load() to load BPF program
+ * into the kernel. Callback can adjust opts as necessary.
+ */
+typedef int (*libbpf_prog_prepare_load_fn_t)(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts, long cookie);
+
+/* Called during skeleton attach or through bpf_program__attach(). If
+ * auto-attach is not supported, callback should return 0 and set link to
+ * NULL (it's not considered an error during skeleton attach, but it will be
+ * an error for bpf_program__attach() calls). On error, error should be
+ * returned directly and link set to NULL. On success, return 0 and set link
+ * to a valid struct bpf_link.
+ */
+typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cookie,
+ struct bpf_link **link);
+
+struct libbpf_prog_handler_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* User-provided value that is passed to prog_setup_fn,
+ * prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to
+ * register one set of callbacks for multiple SEC() definitions and
+ * still be able to distinguish them, if necessary. For example,
+ * libbpf itself is using this to pass necessary flags (e.g.,
+ * sleepable flag) to a common internal SEC() handler.
+ */
+ long cookie;
+ /* BPF program initialization callback (see libbpf_prog_setup_fn_t).
+ * Callback is optional, pass NULL if it's not necessary.
+ */
+ libbpf_prog_setup_fn_t prog_setup_fn;
+ /* BPF program loading callback (see libbpf_prog_prepare_load_fn_t).
+ * Callback is optional, pass NULL if it's not necessary.
+ */
+ libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
+ /* BPF program attach callback (see libbpf_prog_attach_fn_t).
+ * Callback is optional, pass NULL if it's not necessary.
+ */
+ libbpf_prog_attach_fn_t prog_attach_fn;
+};
+#define libbpf_prog_handler_opts__last_field prog_attach_fn
+
+/**
+ * @brief **libbpf_register_prog_handler()** registers a custom BPF program
+ * SEC() handler.
+ * @param sec section prefix for which custom handler is registered
+ * @param prog_type BPF program type associated with specified section
+ * @param exp_attach_type Expected BPF attach type associated with specified section
+ * @param opts optional cookie, callbacks, and other extra options
+ * @return Non-negative handler ID is returned on success. This handler ID has
+ * to be passed to *libbpf_unregister_prog_handler()* to unregister such
+ * custom handler. Negative error code is returned on error.
+ *
+ * *sec* defines which SEC() definitions are handled by this custom handler
+ * registration. *sec* can have few different forms:
+ * - if *sec* is just a plain string (e.g., "abc"), it will match only
+ * SEC("abc"). If BPF program specifies SEC("abc/whatever") it will result
+ * in an error;
+ * - if *sec* is of the form "abc/", proper SEC() form is
+ * SEC("abc/something"), where acceptable "something" should be checked by
+ * *prog_init_fn* callback, if there are additional restrictions;
+ * - if *sec* is of the form "abc+", it will successfully match both
+ * SEC("abc") and SEC("abc/whatever") forms;
+ * - if *sec* is NULL, custom handler is registered for any BPF program that
+ * doesn't match any of the registered (custom or libbpf's own) SEC()
+ * handlers. There could be only one such generic custom handler registered
+ * at any given time.
+ *
+ * All custom handlers (except the one with *sec* == NULL) are processed
+ * before libbpf's own SEC() handlers. It is allowed to "override" libbpf's
+ * SEC() handlers by registering custom ones for the same section prefix
+ * (i.e., it's possible to have custom SEC("perf_event/LLC-load-misses")
+ * handler).
+ *
+ * Note, like much of global libbpf APIs (e.g., libbpf_set_print(),
+ * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs
+ * to ensure synchronization if there is a risk of running this API from
+ * multiple threads simultaneously.
+ */
+LIBBPF_API int libbpf_register_prog_handler(const char *sec,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type exp_attach_type,
+ const struct libbpf_prog_handler_opts *opts);
+/**
+ * @brief *libbpf_unregister_prog_handler()* unregisters previously registered
+ * custom BPF program SEC() handler.
+ * @param handler_id handler ID returned by *libbpf_register_prog_handler()*
+ * after successful registration
+ * @return 0 on success, negative error code if handler isn't found
+ *
+ * Note, like much of global libbpf APIs (e.g., libbpf_set_print(),
+ * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs
+ * to ensure synchronization if there is a risk of running this API from
+ * multiple threads simultaneously.
+ */
+LIBBPF_API int libbpf_unregister_prog_handler(int handler_id);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 4ebfadf45b47..86804fd90dd1 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -1,29 +1,14 @@
LIBBPF_0.0.1 {
global:
bpf_btf_get_fd_by_id;
- bpf_create_map;
- bpf_create_map_in_map;
- bpf_create_map_in_map_node;
- bpf_create_map_name;
- bpf_create_map_node;
- bpf_create_map_xattr;
- bpf_load_btf;
- bpf_load_program;
- bpf_load_program_xattr;
bpf_map__btf_key_type_id;
bpf_map__btf_value_type_id;
- bpf_map__def;
bpf_map__fd;
- bpf_map__is_offload_neutral;
bpf_map__name;
- bpf_map__next;
bpf_map__pin;
- bpf_map__prev;
- bpf_map__priv;
bpf_map__reuse_fd;
bpf_map__set_ifindex;
bpf_map__set_inner_map_fd;
- bpf_map__set_priv;
bpf_map__unpin;
bpf_map_delete_elem;
bpf_map_get_fd_by_id;
@@ -38,79 +23,37 @@ LIBBPF_0.0.1 {
bpf_object__btf_fd;
bpf_object__close;
bpf_object__find_map_by_name;
- bpf_object__find_map_by_offset;
- bpf_object__find_program_by_title;
bpf_object__kversion;
bpf_object__load;
bpf_object__name;
- bpf_object__next;
bpf_object__open;
- bpf_object__open_buffer;
- bpf_object__open_xattr;
bpf_object__pin;
bpf_object__pin_maps;
bpf_object__pin_programs;
- bpf_object__priv;
- bpf_object__set_priv;
- bpf_object__unload;
bpf_object__unpin_maps;
bpf_object__unpin_programs;
- bpf_perf_event_read_simple;
bpf_prog_attach;
bpf_prog_detach;
bpf_prog_detach2;
bpf_prog_get_fd_by_id;
bpf_prog_get_next_id;
- bpf_prog_load;
- bpf_prog_load_xattr;
bpf_prog_query;
- bpf_prog_test_run;
- bpf_prog_test_run_xattr;
bpf_program__fd;
- bpf_program__is_kprobe;
- bpf_program__is_perf_event;
- bpf_program__is_raw_tracepoint;
- bpf_program__is_sched_act;
- bpf_program__is_sched_cls;
- bpf_program__is_socket_filter;
- bpf_program__is_tracepoint;
- bpf_program__is_xdp;
- bpf_program__load;
- bpf_program__next;
- bpf_program__nth_fd;
bpf_program__pin;
- bpf_program__pin_instance;
- bpf_program__prev;
- bpf_program__priv;
bpf_program__set_expected_attach_type;
bpf_program__set_ifindex;
- bpf_program__set_kprobe;
- bpf_program__set_perf_event;
- bpf_program__set_prep;
- bpf_program__set_priv;
- bpf_program__set_raw_tracepoint;
- bpf_program__set_sched_act;
- bpf_program__set_sched_cls;
- bpf_program__set_socket_filter;
- bpf_program__set_tracepoint;
bpf_program__set_type;
- bpf_program__set_xdp;
- bpf_program__title;
bpf_program__unload;
bpf_program__unpin;
- bpf_program__unpin_instance;
bpf_prog_linfo__free;
bpf_prog_linfo__new;
bpf_prog_linfo__lfind_addr_func;
bpf_prog_linfo__lfind;
bpf_raw_tracepoint_open;
- bpf_set_link_xdp_fd;
bpf_task_fd_query;
- bpf_verify_program;
btf__fd;
btf__find_by_name;
btf__free;
- btf__get_from_id;
btf__name_by_offset;
btf__new;
btf__resolve_size;
@@ -127,48 +70,24 @@ LIBBPF_0.0.1 {
LIBBPF_0.0.2 {
global:
- bpf_probe_helper;
- bpf_probe_map_type;
- bpf_probe_prog_type;
- bpf_map__resize;
bpf_map_lookup_elem_flags;
bpf_object__btf;
bpf_object__find_map_fd_by_name;
- bpf_get_link_xdp_id;
- btf__dedup;
- btf__get_map_kv_tids;
- btf__get_nr_types;
btf__get_raw_data;
- btf__load;
btf_ext__free;
- btf_ext__func_info_rec_size;
btf_ext__get_raw_data;
- btf_ext__line_info_rec_size;
btf_ext__new;
- btf_ext__reloc_func_info;
- btf_ext__reloc_line_info;
- xsk_umem__create;
- xsk_socket__create;
- xsk_umem__delete;
- xsk_socket__delete;
- xsk_umem__fd;
- xsk_socket__fd;
- bpf_program__get_prog_info_linear;
- bpf_program__bpil_addr_to_offs;
- bpf_program__bpil_offs_to_addr;
} LIBBPF_0.0.1;
LIBBPF_0.0.3 {
global:
bpf_map__is_internal;
bpf_map_freeze;
- btf__finalize_data;
} LIBBPF_0.0.2;
LIBBPF_0.0.4 {
global:
bpf_link__destroy;
- bpf_object__load_xattr;
bpf_program__attach_kprobe;
bpf_program__attach_perf_event;
bpf_program__attach_raw_tracepoint;
@@ -176,14 +95,10 @@ LIBBPF_0.0.4 {
bpf_program__attach_uprobe;
btf_dump__dump_type;
btf_dump__free;
- btf_dump__new;
btf__parse_elf;
libbpf_num_possible_cpus;
perf_buffer__free;
- perf_buffer__new;
- perf_buffer__new_raw;
perf_buffer__poll;
- xsk_umem__create;
} LIBBPF_0.0.3;
LIBBPF_0.0.5 {
@@ -193,7 +108,6 @@ LIBBPF_0.0.5 {
LIBBPF_0.0.6 {
global:
- bpf_get_link_xdp_info;
bpf_map__get_pin_path;
bpf_map__is_pinned;
bpf_map__set_pin_path;
@@ -202,9 +116,6 @@ LIBBPF_0.0.6 {
bpf_program__attach_trace;
bpf_program__get_expected_attach_type;
bpf_program__get_type;
- bpf_program__is_tracing;
- bpf_program__set_tracing;
- bpf_program__size;
btf__find_by_name_kind;
libbpf_find_vmlinux_btf_id;
} LIBBPF_0.0.5;
@@ -224,14 +135,8 @@ LIBBPF_0.0.7 {
bpf_object__detach_skeleton;
bpf_object__load_skeleton;
bpf_object__open_skeleton;
- bpf_probe_large_insn_limit;
- bpf_prog_attach_xattr;
bpf_program__attach;
bpf_program__name;
- bpf_program__is_extension;
- bpf_program__is_struct_ops;
- bpf_program__set_extension;
- bpf_program__set_struct_ops;
btf__align_of;
libbpf_find_kernel_btf;
} LIBBPF_0.0.6;
@@ -247,12 +152,10 @@ LIBBPF_0.0.8 {
bpf_link_create;
bpf_link_update;
bpf_map__set_initial_value;
+ bpf_prog_attach_opts;
bpf_program__attach_cgroup;
bpf_program__attach_lsm;
- bpf_program__is_lsm;
bpf_program__set_attach_target;
- bpf_program__set_lsm;
- bpf_set_link_xdp_fd_opts;
} LIBBPF_0.0.7;
LIBBPF_0.0.9 {
@@ -290,9 +193,7 @@ LIBBPF_0.1.0 {
bpf_map__value_size;
bpf_program__attach_xdp;
bpf_program__autoload;
- bpf_program__is_sk_lookup;
bpf_program__set_autoload;
- bpf_program__set_sk_lookup;
btf__parse;
btf__parse_raw;
btf__pointer_size;
@@ -335,5 +236,181 @@ LIBBPF_0.2.0 {
perf_buffer__buffer_fd;
perf_buffer__epoll_fd;
perf_buffer__consume_buffer;
- xsk_socket__create_shared;
} LIBBPF_0.1.0;
+
+LIBBPF_0.3.0 {
+ global:
+ btf__base_btf;
+ btf__parse_elf_split;
+ btf__parse_raw_split;
+ btf__parse_split;
+ btf__new_empty_split;
+ ring_buffer__epoll_fd;
+} LIBBPF_0.2.0;
+
+LIBBPF_0.4.0 {
+ global:
+ btf__add_float;
+ btf__add_type;
+ bpf_linker__add_file;
+ bpf_linker__finalize;
+ bpf_linker__free;
+ bpf_linker__new;
+ bpf_map__inner_map;
+ bpf_object__set_kversion;
+ bpf_tc_attach;
+ bpf_tc_detach;
+ bpf_tc_hook_create;
+ bpf_tc_hook_destroy;
+ bpf_tc_query;
+} LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+ global:
+ bpf_map__initial_value;
+ bpf_map__pin_path;
+ bpf_map_lookup_and_delete_elem_flags;
+ bpf_program__attach_kprobe_opts;
+ bpf_program__attach_perf_event_opts;
+ bpf_program__attach_tracepoint_opts;
+ bpf_program__attach_uprobe_opts;
+ bpf_object__gen_loader;
+ btf__load_from_kernel_by_id;
+ btf__load_from_kernel_by_id_split;
+ btf__load_into_kernel;
+ btf__load_module_btf;
+ btf__load_vmlinux_btf;
+ btf_dump__dump_type_data;
+ libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
+
+LIBBPF_0.6.0 {
+ global:
+ bpf_map__map_extra;
+ bpf_map__set_map_extra;
+ bpf_map_create;
+ bpf_object__next_map;
+ bpf_object__next_program;
+ bpf_object__prev_map;
+ bpf_object__prev_program;
+ bpf_prog_load;
+ bpf_program__flags;
+ bpf_program__insn_cnt;
+ bpf_program__insns;
+ bpf_program__set_flags;
+ btf__add_btf;
+ btf__add_decl_tag;
+ btf__add_type_tag;
+ btf__dedup;
+ btf__raw_data;
+ btf__type_cnt;
+ btf_dump__new;
+ libbpf_major_version;
+ libbpf_minor_version;
+ libbpf_version_string;
+ perf_buffer__new;
+ perf_buffer__new_raw;
+} LIBBPF_0.5.0;
+
+LIBBPF_0.7.0 {
+ global:
+ bpf_btf_load;
+ bpf_program__expected_attach_type;
+ bpf_program__log_buf;
+ bpf_program__log_level;
+ bpf_program__set_log_buf;
+ bpf_program__set_log_level;
+ bpf_program__type;
+ bpf_xdp_attach;
+ bpf_xdp_detach;
+ bpf_xdp_query;
+ bpf_xdp_query_id;
+ libbpf_probe_bpf_helper;
+ libbpf_probe_bpf_map_type;
+ libbpf_probe_bpf_prog_type;
+ libbpf_set_memlock_rlim;
+} LIBBPF_0.6.0;
+
+LIBBPF_0.8.0 {
+ global:
+ bpf_map__autocreate;
+ bpf_map__get_next_key;
+ bpf_map__delete_elem;
+ bpf_map__lookup_and_delete_elem;
+ bpf_map__lookup_elem;
+ bpf_map__set_autocreate;
+ bpf_map__update_elem;
+ bpf_map_delete_elem_flags;
+ bpf_object__destroy_subskeleton;
+ bpf_object__open_subskeleton;
+ bpf_program__attach_kprobe_multi_opts;
+ bpf_program__attach_trace_opts;
+ bpf_program__attach_usdt;
+ bpf_program__set_insns;
+ libbpf_register_prog_handler;
+ libbpf_unregister_prog_handler;
+} LIBBPF_0.7.0;
+
+LIBBPF_1.0.0 {
+ global:
+ bpf_obj_get_opts;
+ bpf_prog_query_opts;
+ bpf_program__attach_ksyscall;
+ bpf_program__autoattach;
+ bpf_program__set_autoattach;
+ btf__add_enum64;
+ btf__add_enum64_value;
+ libbpf_bpf_attach_type_str;
+ libbpf_bpf_link_type_str;
+ libbpf_bpf_map_type_str;
+ libbpf_bpf_prog_type_str;
+ perf_buffer__buffer;
+} LIBBPF_0.8.0;
+
+LIBBPF_1.1.0 {
+ global:
+ bpf_btf_get_fd_by_id_opts;
+ bpf_link_get_fd_by_id_opts;
+ bpf_map_get_fd_by_id_opts;
+ bpf_prog_get_fd_by_id_opts;
+ user_ring_buffer__discard;
+ user_ring_buffer__free;
+ user_ring_buffer__new;
+ user_ring_buffer__reserve;
+ user_ring_buffer__reserve_blocking;
+ user_ring_buffer__submit;
+} LIBBPF_1.0.0;
+
+LIBBPF_1.2.0 {
+ global:
+ bpf_btf_get_info_by_fd;
+ bpf_link__update_map;
+ bpf_link_get_info_by_fd;
+ bpf_map_get_info_by_fd;
+ bpf_prog_get_info_by_fd;
+} LIBBPF_1.1.0;
+
+LIBBPF_1.3.0 {
+ global:
+ bpf_obj_pin_opts;
+ bpf_object__unpin;
+ bpf_prog_detach_opts;
+ bpf_program__attach_netfilter;
+ bpf_program__attach_netkit;
+ bpf_program__attach_tcx;
+ bpf_program__attach_uprobe_multi;
+ ring__avail_data_size;
+ ring__consume;
+ ring__consumer_pos;
+ ring__map_fd;
+ ring__producer_pos;
+ ring__size;
+ ring_buffer__ring;
+} LIBBPF_1.2.0;
+
+LIBBPF_1.4.0 {
+ global:
+ bpf_token_create;
+ btf__new_split;
+ btf_ext__raw_data;
+} LIBBPF_1.3.0;
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 947d8bd8a7bb..8fe248e14eb6 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -10,6 +10,7 @@
#define __LIBBPF_LIBBPF_COMMON_H
#include <string.h>
+#include "libbpf_version.h"
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
@@ -17,6 +18,36 @@
#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
+/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */
+#define LIBBPF_DEPRECATED_SINCE(major, minor, msg) \
+ __LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor \
+ (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg))
+
+#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor) \
+ (LIBBPF_MAJOR_VERSION > (major) || \
+ (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor)))
+
+/* Add checks for other versions below when planning deprecation of API symbols
+ * with the LIBBPF_DEPRECATED_SINCE macro.
+ */
+#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X) X
+#else
+#define __LIBBPF_MARK_DEPRECATED_1_0(X)
+#endif
+
+/* This set of internal macros allows to do "function overloading" based on
+ * number of arguments provided by used in backwards-compatible way during the
+ * transition to libbpf 1.0
+ * It's ugly but necessary evil that will be cleaned up when we get to 1.0.
+ * See bpf_prog_load() overload for example.
+ */
+#define ___libbpf_cat(A, B) A ## B
+#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM)
+#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N
+#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
+#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__)
+
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
@@ -30,7 +61,7 @@
* including any extra padding, it with memset() and then assigns initial
* values provided by users in struct initializer-syntax as varargs.
*/
-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
+#define LIBBPF_OPTS(TYPE, NAME, ...) \
struct TYPE NAME = ({ \
memset(&NAME, 0, sizeof(struct TYPE)); \
(struct TYPE) { \
@@ -39,4 +70,23 @@
}; \
})
+/* Helper macro to clear and optionally reinitialize libbpf options struct
+ *
+ * Small helper macro to reset all fields and to reinitialize the common
+ * structure size member. Values provided by users in struct initializer-
+ * syntax as varargs can be provided as well to reinitialize options struct
+ * specific members.
+ */
+#define LIBBPF_OPTS_RESET(NAME, ...) \
+ do { \
+ typeof(NAME) ___##NAME = ({ \
+ memset(&___##NAME, 0, sizeof(NAME)); \
+ (typeof(NAME)) { \
+ .sz = sizeof(NAME), \
+ __VA_ARGS__ \
+ }; \
+ }); \
+ memcpy(&NAME, &___##NAME, sizeof(NAME)); \
+ } while (0)
+
#endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index 0afb51f7a919..6b180172ec6b 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -12,6 +12,7 @@
#include <string.h>
#include "libbpf.h"
+#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -38,29 +39,37 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
int libbpf_strerror(int err, char *buf, size_t size)
{
+ int ret;
+
if (!buf || !size)
- return -1;
+ return libbpf_err(-EINVAL);
err = err > 0 ? err : -err;
if (err < __LIBBPF_ERRNO__START) {
- int ret;
-
ret = strerror_r(err, buf, size);
buf[size - 1] = '\0';
- return ret;
+ return libbpf_err_errno(ret);
}
if (err < __LIBBPF_ERRNO__END) {
const char *msg;
msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
- snprintf(buf, size, "%s", msg);
+ ret = snprintf(buf, size, "%s", msg);
buf[size - 1] = '\0';
+ /* The length of the buf and msg is positive.
+ * A negative number may be returned only when the
+ * size exceeds INT_MAX. Not likely to appear.
+ */
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
return 0;
}
- snprintf(buf, size, "Unknown libbpf error %d", err);
+ ret = snprintf(buf, size, "Unknown libbpf error %d", err);
buf[size - 1] = '\0';
- return -1;
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
+ return libbpf_err(-ENOENT);
}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index d99bc847bf84..864b36177424 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -11,6 +11,27 @@
#include <stdlib.h>
#include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <libelf.h>
+#include "relo_core.h"
+
+/* Android's libc doesn't support AT_EACCESS in faccessat() implementation
+ * ([0]), and just returns -EINVAL even if file exists and is accessible.
+ * See [1] for issues caused by this.
+ *
+ * So just redefine it to 0 on Android.
+ *
+ * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50
+ * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250
+ */
+#ifdef __ANDROID__
+#undef AT_EACCESS
+#define AT_EACCESS 0
+#endif
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -19,6 +40,38 @@
#pragma GCC poison reallocarray
#include "libbpf.h"
+#include "btf.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef R_BPF_64_64
+#define R_BPF_64_64 1
+#endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
+#ifndef R_BPF_64_32
+#define R_BPF_64_32 10
+#endif
+
+#ifndef SHT_LLVM_ADDRSIG
+#define SHT_LLVM_ADDRSIG 0x6FFF4C03
+#endif
+
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+/* Older libelf all end up in this expression, for both 32 and 64 bit */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -31,6 +84,12 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_TYPE_TAG_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
@@ -48,21 +107,55 @@
# define offsetofend(TYPE, FIELD) \
(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
#endif
+#ifndef __alias
+#define __alias(symbol) __attribute__((alias(#symbol)))
+#endif
+
+/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is
+ * a string literal known at compilation time or char * pointer known only at
+ * runtime.
+ */
+#define str_has_pfx(str, pfx) \
+ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+
+/* suffix check */
+static inline bool str_has_sfx(const char *str, const char *sfx)
+{
+ size_t str_len = strlen(str);
+ size_t sfx_len = strlen(sfx);
+
+ if (sfx_len > str_len)
+ return false;
+ return strcmp(str + str_len - sfx_len, sfx) == 0;
+}
/* Symbol versioning is different between static and shared library.
* Properly versioned symbols are needed for shared library, but
* only the symbol of the new version is needed for static library.
+ * Starting with GNU C 10, use symver attribute instead of .symver assembler
+ * directive, which works better with GCC LTO builds.
*/
-#ifdef SHARED
-# define COMPAT_VERSION(internal_name, api_name, version) \
+#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10
+
+#define DEFAULT_VERSION(internal_name, api_name, version) \
+ __attribute__((symver(#api_name "@@" #version)))
+#define COMPAT_VERSION(internal_name, api_name, version) \
+ __attribute__((symver(#api_name "@" #version)))
+
+#elif defined(SHARED)
+
+#define COMPAT_VERSION(internal_name, api_name, version) \
asm(".symver " #internal_name "," #api_name "@" #version);
-# define DEFAULT_VERSION(internal_name, api_name, version) \
+#define DEFAULT_VERSION(internal_name, api_name, version) \
asm(".symver " #internal_name "," #api_name "@@" #version);
-#else
-# define COMPAT_VERSION(internal_name, api_name, version)
-# define DEFAULT_VERSION(internal_name, api_name, version) \
+
+#else /* !SHARED */
+
+#define COMPAT_VERSION(internal_name, api_name, version)
+#define DEFAULT_VERSION(internal_name, api_name, version) \
extern typeof(internal_name) api_name \
__attribute__((alias(#internal_name)));
+
#endif
extern void libbpf_print(enum libbpf_print_level level,
@@ -81,6 +174,15 @@ do { \
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
+
+struct bpf_link {
+ int (*detach)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
+ char *pin_path; /* NULL, if not pinned */
+ int fd; /* hook FD, -1 if not applicable */
+ bool disconnected;
+};
+
/*
* Re-implement glibc's reallocarray() for libbpf internal-only use.
* reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -105,9 +207,92 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
return realloc(ptr, total);
}
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt);
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
+__u32 get_kernel_version(void);
+
+struct btf;
+struct btf_type;
+
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
+const char *btf_kind_str(const struct btf_type *t);
+const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)(int)btf_vlen(t);
+}
+
+static inline __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+enum map_def_parts {
+ MAP_DEF_MAP_TYPE = 0x001,
+ MAP_DEF_KEY_TYPE = 0x002,
+ MAP_DEF_KEY_SIZE = 0x004,
+ MAP_DEF_VALUE_TYPE = 0x008,
+ MAP_DEF_VALUE_SIZE = 0x010,
+ MAP_DEF_MAX_ENTRIES = 0x020,
+ MAP_DEF_MAP_FLAGS = 0x040,
+ MAP_DEF_NUMA_NODE = 0x080,
+ MAP_DEF_PINNING = 0x100,
+ MAP_DEF_INNER_MAP = 0x200,
+ MAP_DEF_MAP_EXTRA = 0x400,
+
+ MAP_DEF_ALL = 0x7ff, /* combination of all above */
+};
+
+struct btf_map_def {
+ enum map_def_parts parts;
+ __u32 map_type;
+ __u32 key_type_id;
+ __u32 key_size;
+ __u32 value_type_id;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 numa_node;
+ __u32 pinning;
+ __u64 map_extra;
+};
+
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def);
+
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+
+static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len)
+{
+ while (len > 0) {
+ if (*p)
+ return false;
+ p++;
+ len--;
+ }
+ return true;
+}
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
@@ -117,16 +302,9 @@ static inline bool libbpf_validate_opts(const char *opts,
pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
return false;
}
- if (user_sz > opts_sz) {
- size_t i;
-
- for (i = opts_sz; i < user_sz; i++) {
- if (opts[i]) {
- pr_warn("%s has non-zero extra bytes\n",
- type_name);
- return false;
- }
- }
+ if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) {
+ pr_warn("%s has non-zero extra bytes\n", type_name);
+ return false;
}
return true;
}
@@ -146,15 +324,91 @@ static inline bool libbpf_validate_opts(const char *opts,
(opts)->field = value; \
} while (0)
+#define OPTS_ZEROED(opts, last_nonzero_field) \
+({ \
+ ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field); \
+ !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off, \
+ (opts)->sz - __off); \
+})
+
+enum kern_feature_id {
+ /* v4.14: kernel support for program & map names. */
+ FEAT_PROG_NAME,
+ /* v5.2: kernel support for global data sections. */
+ FEAT_GLOBAL_DATA,
+ /* BTF support */
+ FEAT_BTF,
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+ FEAT_BTF_FUNC,
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+ FEAT_BTF_DATASEC,
+ /* BTF_FUNC_GLOBAL is supported */
+ FEAT_BTF_GLOBAL_FUNC,
+ /* BPF_F_MMAPABLE is supported for arrays */
+ FEAT_ARRAY_MMAP,
+ /* kernel support for expected_attach_type in BPF_PROG_LOAD */
+ FEAT_EXP_ATTACH_TYPE,
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
+ FEAT_PROBE_READ_KERN,
+ /* BPF_PROG_BIND_MAP is supported */
+ FEAT_PROG_BIND_MAP,
+ /* Kernel support for module BTFs */
+ FEAT_MODULE_BTF,
+ /* BTF_KIND_FLOAT support */
+ FEAT_BTF_FLOAT,
+ /* BPF perf link support */
+ FEAT_PERF_LINK,
+ /* BTF_KIND_DECL_TAG support */
+ FEAT_BTF_DECL_TAG,
+ /* BTF_KIND_TYPE_TAG support */
+ FEAT_BTF_TYPE_TAG,
+ /* memcg-based accounting for BPF maps and progs */
+ FEAT_MEMCG_ACCOUNT,
+ /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
+ FEAT_BPF_COOKIE,
+ /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
+ FEAT_BTF_ENUM64,
+ /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */
+ FEAT_SYSCALL_WRAPPER,
+ /* BPF multi-uprobe link support */
+ FEAT_UPROBE_MULTI_LINK,
+ /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */
+ FEAT_ARG_CTX_TAG,
+ /* Kernel supports '?' at the front of datasec names */
+ FEAT_BTF_QMARK_DATASEC,
+ __FEAT_CNT,
+};
+
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+struct kern_feature_cache {
+ enum kern_feature_result res[__FEAT_CNT];
+ int token_fd;
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+
+int probe_kern_syscall_wrapper(int token_fd);
+int probe_memcg_account(int token_fd);
+int bump_rlimit_memlock(void);
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len);
+ const char *str_sec, size_t str_len,
+ int token_fd);
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd);
-int bpf_object__section_size(const struct bpf_object *obj, const char *name,
- __u32 *size);
-int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
- __u32 *off);
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind);
struct btf_ext_info {
/*
@@ -164,6 +418,13 @@ struct btf_ext_info {
void *info;
__u32 rec_size;
__u32 len;
+ /* optional (maintained internally by libbpf) mapping between .BTF.ext
+ * section and corresponding ELF section. This is used to join
+ * information like CO-RE relocation records with corresponding BPF
+ * programs defined in ELF sections
+ */
+ __u32 *sec_idxs;
+ int sec_cnt;
};
#define for_each_btf_ext_sec(seg, sec) \
@@ -247,75 +508,167 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
+
+typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
+typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx);
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx);
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
+ __u32 kind);
+
+typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
+ const char *sym_name, void *ctx);
+
+int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg);
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+ if (ret < 0)
+ errno = -ret;
+ return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
*/
-enum bpf_core_relo_kind {
- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
- BPF_TYPE_SIZE = 9, /* type size in bytes */
- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
-};
+static inline int libbpf_err_errno(int ret)
+{
+ /* errno is already assumed to be set on error */
+ return ret < 0 ? -errno : ret;
+}
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- * its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- * interpretation depends on specific relocation kind:
- * - for field-based relocations, string encodes an accessed field using
- * a sequence of field and array indices, separated by colon (:). It's
- * conceptually very close to LLVM's getelementptr ([0]) instruction's
- * arguments for identifying offset to a field.
- * - for type-based relocations, strings is expected to be just "0";
- * - for enum value-based relocations, string contains an index of enum
- * value within its enum type;
- *
- * Example to provide a better feel.
- *
- * struct sample {
- * int a;
- * struct {
- * int b[10];
- * };
- * };
- *
- * struct sample *s = ...;
- * int x = &s->a; // encoded as "0:0" (a is field #0)
- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
- * // b is field #0 inside anon struct, accessing elem #5)
- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- * __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+ /* set errno on error, this doesn't break anything */
+ errno = -err;
+ return NULL;
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+ /* set errno on error, this doesn't break anything */
+ if (IS_ERR(ret))
+ errno = -PTR_ERR(ret);
+
+ return IS_ERR(ret) ? NULL : ret;
+}
+
+static inline bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
+ * Original FD is not closed or altered in any other way.
+ * Preserves original FD value, if it's invalid (negative).
*/
-struct bpf_core_relo {
- __u32 insn_off;
- __u32 type_id;
- __u32 access_str_off;
- enum bpf_core_relo_kind kind;
+static inline int dup_good_fd(int fd)
+{
+ if (fd < 0)
+ return fd;
+ return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
+/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
+ * Takes ownership of the fd passed in, and closes it if calling
+ * fcntl(fd, F_DUPFD_CLOEXEC, 3).
+ */
+static inline int ensure_good_fd(int fd)
+{
+ int old_fd = fd, saved_errno;
+
+ if (fd < 0)
+ return fd;
+ if (fd < 3) {
+ fd = dup_good_fd(fd);
+ saved_errno = errno;
+ close(old_fd);
+ errno = saved_errno;
+ if (fd < 0) {
+ pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno);
+ errno = saved_errno;
+ }
+ }
+ return fd;
+}
+
+static inline int sys_dup2(int oldfd, int newfd)
+{
+#ifdef __NR_dup2
+ return syscall(__NR_dup2, oldfd, newfd);
+#else
+ return syscall(__NR_dup3, oldfd, newfd, 0);
+#endif
+}
+
+/* Point *fixed_fd* to the same file that *tmp_fd* points to.
+ * Regardless of success, *tmp_fd* is closed.
+ * Whatever *fixed_fd* pointed to is closed silently.
+ */
+static inline int reuse_fd(int fixed_fd, int tmp_fd)
+{
+ int err;
+
+ err = sys_dup2(tmp_fd, fixed_fd);
+ err = err < 0 ? -errno : 0;
+ close(tmp_fd); /* clean up temporary FD */
+ return err;
+}
+
+/* The following two functions are exposed to bpftool */
+int bpf_core_add_cands(struct bpf_core_cand *local_cand,
+ size_t local_essent_len,
+ const struct btf *targ_btf,
+ const char *targ_btf_name,
+ int targ_start_id,
+ struct bpf_core_cand_list *cands);
+void bpf_core_free_cands(struct bpf_core_cand_list *cands);
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj);
+void usdt_manager_free(struct usdt_manager *man);
+struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
+ const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie);
+
+static inline bool is_pow_of_2(size_t x)
+{
+ return x && (x & (x - 1)) == 0;
+}
+
+#define PROG_LOAD_ATTEMPTS 5
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
+
+bool glob_match(const char *str, const char *pat);
+
+long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name);
+long elf_find_func_offset_from_file(const char *binary_path, const char *name);
+
+struct elf_fd {
+ Elf *elf;
+ int fd;
};
+int elf_open(const char *binary_path, struct elf_fd *elf_fd);
+void elf_close(struct elf_fd *elf_fd);
+
+int elf_resolve_syms_offsets(const char *binary_path, int cnt,
+ const char **syms, unsigned long **poffsets,
+ int st_type);
+int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
+ unsigned long **poffsets, size_t *pcnt);
+
+int probe_fd(int fd);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
new file mode 100644
index 000000000000..1e1be467bede
--- /dev/null
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ * [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have
+ * no effect. But they are left in libbpf_legacy.h so that applications that
+ * prepared for libbpf 1.0 before final release by using
+ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes.
+ */
+enum libbpf_strict_mode {
+ /* Turn on all supported strict features of libbpf to simulate libbpf
+ * v1.0 behavior.
+ * This will be the default behavior in libbpf v1.0.
+ */
+ LIBBPF_STRICT_ALL = 0xffffffff,
+
+ /*
+ * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+ * v1.0. It won't be supported anymore in v1.0, please update your
+ * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+ */
+ LIBBPF_STRICT_NONE = 0x00,
+ /*
+ * Return NULL pointers on error, not ERR_PTR(err).
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+ /*
+ * Return actual error codes from low-level APIs directly, not just -1.
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_DIRECT_ERRS = 0x02,
+ /*
+ * Enforce strict BPF program section (SEC()) names.
+ * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were
+ * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become
+ * unrecognized by libbpf and would have to be just SEC("xdp") and
+ * SEC("xdp") and SEC("perf_event").
+ *
+ * Note, in this mode the program pin path will be based on the
+ * function name instead of section name.
+ *
+ * Additionally, routines in the .text section are always considered
+ * sub-programs. Legacy behavior allows for a single routine in .text
+ * to be a program.
+ */
+ LIBBPF_STRICT_SEC_NAME = 0x04,
+ /*
+ * Disable the global 'bpf_objects_list'. Maintaining this list adds
+ * a race condition to bpf_object__open() and bpf_object__close().
+ * Clients can maintain it on their own if it is valuable for them.
+ */
+ LIBBPF_STRICT_NO_OBJECT_LIST = 0x08,
+ /*
+ * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the
+ * first BPF program or map creation operation. This is done only if
+ * kernel is too old to support memcg-based memory accounting for BPF
+ * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
+ * but it can be overriden with libbpf_set_memlock_rlim() API.
+ * Note that libbpf_set_memlock_rlim() needs to be called before
+ * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
+ * operation.
+ */
+ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+ /*
+ * Error out on any SEC("maps") map definition, which are deprecated
+ * in favor of BTF-defined map definitions in SEC(".maps").
+ */
+ LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
+
+ __LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+/**
+ * @brief **libbpf_get_error()** extracts the error code from the passed
+ * pointer
+ * @param ptr pointer returned from libbpf API function
+ * @return error code; or 0 if no error occured
+ *
+ * Note, as of libbpf 1.0 this function is not necessary and not recommended
+ * to be used. Libbpf doesn't return error code embedded into the pointer
+ * itself. Instead, NULL is returned on error and error code is passed through
+ * thread-local errno variable. **libbpf_get_error()** is just returning -errno
+ * value if it receives NULL, which is correct only if errno hasn't been
+ * modified between libbpf API call and corresponding **libbpf_get_error()**
+ * call. Prefer to check return for NULL and use errno directly.
+ *
+ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready
+ * before final libbpf 1.0 without needing to change them.
+ */
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
+#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
+
+/* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
+ * They are normally a trivial aliases or wrappers for proper APIs and are
+ * left to minimize unnecessary disruption for users of libbpf. But they
+ * shouldn't be used going forward.
+ */
+
+struct bpf_program;
+struct bpf_map;
+struct btf;
+struct btf_ext;
+
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5482a9b7ae2d..302188122439 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -12,74 +12,151 @@
#include <linux/btf.h>
#include <linux/filter.h>
#include <linux/kernel.h>
+#include <linux/version.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"
-static bool grep(const char *buffer, const char *pattern)
+/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
+ * but Ubuntu provides /proc/version_signature file, as described at
+ * https://ubuntu.com/kernel, with an example contents below, which we
+ * can use to get a proper LINUX_VERSION_CODE.
+ *
+ * Ubuntu 5.4.0-12.15-generic 5.4.8
+ *
+ * In the above, 5.4.8 is what kernel is actually expecting, while
+ * uname() call will return 5.4.0 in info.release.
+ */
+static __u32 get_ubuntu_kernel_version(void)
{
- return !!strstr(buffer, pattern);
-}
+ const char *ubuntu_kver_file = "/proc/version_signature";
+ __u32 major, minor, patch;
+ int ret;
+ FILE *f;
-static int get_vendor_id(int ifindex)
-{
- char ifname[IF_NAMESIZE], path[64], buf[8];
- ssize_t len;
- int fd;
+ if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0)
+ return 0;
- if (!if_indextoname(ifindex, ifname))
- return -1;
+ f = fopen(ubuntu_kver_file, "re");
+ if (!f)
+ return 0;
- snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
+ ret = fscanf(f, "%*s %*s %u.%u.%u\n", &major, &minor, &patch);
+ fclose(f);
+ if (ret != 3)
+ return 0;
- fd = open(path, O_RDONLY);
- if (fd < 0)
- return -1;
+ return KERNEL_VERSION(major, minor, patch);
+}
- len = read(fd, buf, sizeof(buf));
- close(fd);
- if (len < 0)
- return -1;
- if (len >= (ssize_t)sizeof(buf))
- return -1;
- buf[len] = '\0';
+/* On Debian LINUX_VERSION_CODE doesn't correspond to info.release.
+ * Instead, it is provided in info.version. An example content of
+ * Debian 10 looks like the below.
+ *
+ * utsname::release 4.19.0-22-amd64
+ * utsname::version #1 SMP Debian 4.19.260-1 (2022-09-29)
+ *
+ * In the above, 4.19.260 is what kernel is actually expecting, while
+ * uname() call will return 4.19.0 in info.release.
+ */
+static __u32 get_debian_kernel_version(struct utsname *info)
+{
+ __u32 major, minor, patch;
+ char *p;
+
+ p = strstr(info->version, "Debian ");
+ if (!p) {
+ /* This is not a Debian kernel. */
+ return 0;
+ }
- return strtol(buf, NULL, 0);
+ if (sscanf(p, "Debian %u.%u.%u", &major, &minor, &patch) != 3)
+ return 0;
+
+ return KERNEL_VERSION(major, minor, patch);
}
-static int get_kernel_version(void)
+__u32 get_kernel_version(void)
{
- int version, subversion, patchlevel;
- struct utsname utsn;
+ __u32 major, minor, patch, version;
+ struct utsname info;
- /* Return 0 on failure, and attempt to probe with empty kversion */
- if (uname(&utsn))
- return 0;
+ /* Check if this is an Ubuntu kernel. */
+ version = get_ubuntu_kernel_version();
+ if (version != 0)
+ return version;
+
+ uname(&info);
+
+ /* Check if this is a Debian kernel. */
+ version = get_debian_kernel_version(&info);
+ if (version != 0)
+ return version;
- if (sscanf(utsn.release, "%d.%d.%d",
- &version, &subversion, &patchlevel) != 3)
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
return 0;
- return (version << 16) + (subversion << 8) + patchlevel;
+ return KERNEL_VERSION(major, minor, patch);
}
-static void
-probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
- size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
+static int probe_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ char *log_buf, size_t log_buf_sz)
{
- struct bpf_load_program_attr xattr = {};
- int fd;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = log_buf,
+ .log_size = log_buf_sz,
+ .log_level = log_buf ? 1 : 0,
+ );
+ int fd, err, exp_err = 0;
+ const char *exp_msg = NULL;
+ char buf[4096];
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ break;
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
break;
case BPF_PROG_TYPE_SK_LOOKUP:
- xattr.expected_attach_type = BPF_SK_LOOKUP;
+ opts.expected_attach_type = BPF_SK_LOOKUP;
break;
case BPF_PROG_TYPE_KPROBE:
- xattr.kern_version = get_kernel_version();
+ opts.kern_version = get_kernel_version();
+ break;
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ opts.expected_attach_type = BPF_LIRC_MODE2;
+ break;
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ opts.log_buf = buf;
+ opts.log_size = sizeof(buf);
+ opts.log_level = 1;
+ if (prog_type == BPF_PROG_TYPE_TRACING)
+ opts.expected_attach_type = BPF_TRACE_FENTRY;
+ else
+ opts.expected_attach_type = BPF_MODIFY_RETURN;
+ opts.attach_btf_id = 1;
+
+ exp_err = -EINVAL;
+ exp_msg = "attach_btf_id 1 is not a function";
+ break;
+ case BPF_PROG_TYPE_EXT:
+ opts.log_buf = buf;
+ opts.log_size = sizeof(buf);
+ opts.log_level = 1;
+ opts.attach_btf_id = 1;
+
+ exp_err = -EINVAL;
+ exp_msg = "Cannot replace kernel functions";
+ break;
+ case BPF_PROG_TYPE_SYSCALL:
+ opts.prog_flags = BPF_F_SLEEPABLE;
+ break;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ exp_err = -524; /* -ENOTSUPP */
break;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_SOCKET_FILTER:
@@ -100,49 +177,50 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
- case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
- case BPF_PROG_TYPE_CGROUP_SOCKOPT:
- case BPF_PROG_TYPE_TRACING:
- case BPF_PROG_TYPE_STRUCT_OPS:
- case BPF_PROG_TYPE_EXT:
- case BPF_PROG_TYPE_LSM:
- default:
break;
+ case BPF_PROG_TYPE_NETFILTER:
+ opts.expected_attach_type = BPF_NETFILTER;
+ break;
+ default:
+ return -EOPNOTSUPP;
}
- xattr.prog_type = prog_type;
- xattr.insns = insns;
- xattr.insns_cnt = insns_cnt;
- xattr.license = "GPL";
- xattr.prog_ifindex = ifindex;
-
- fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
+ err = -errno;
if (fd >= 0)
close(fd);
+ if (exp_err) {
+ if (fd >= 0 || err != exp_err)
+ return 0;
+ if (exp_msg && !strstr(buf, exp_msg))
+ return 0;
+ return 1;
+ }
+ return fd >= 0 ? 1 : 0;
}
-bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
+int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
{
- struct bpf_insn insns[2] = {
+ struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
};
+ const size_t insn_cnt = ARRAY_SIZE(insns);
+ int ret;
- if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
- /* nfp returns -EINVAL on exit(0) with TC offload */
- insns[0].imm = 2;
-
- errno = 0;
- probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+ if (opts)
+ return libbpf_err(-EINVAL);
- return errno != EINVAL && errno != EOPNOTSUPP;
+ ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0);
+ return libbpf_err(ret);
}
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len)
+ const char *str_sec, size_t str_len,
+ int token_fd)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
@@ -152,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len,
.str_len = str_len,
};
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .token_fd = token_fd,
+ .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
int btf_fd, btf_len;
__u8 *raw_btf;
@@ -164,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
- btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+ btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf);
return btf_fd;
@@ -194,20 +276,19 @@ static int load_local_storage_btf(void)
};
return libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
+ strs, sizeof(strs), 0);
}
-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type)
{
- int key_size, value_size, max_entries, map_flags;
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ int key_size, value_size, max_entries;
__u32 btf_key_type_id = 0, btf_value_type_id = 0;
- struct bpf_create_map_attr attr = {};
- int fd = -1, btf_fd = -1, fd_inner;
+ int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0;
key_size = sizeof(__u32);
value_size = sizeof(__u32);
max_entries = 1;
- map_flags = 0;
switch (map_type) {
case BPF_MAP_TYPE_STACK_TRACE:
@@ -216,7 +297,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
case BPF_MAP_TYPE_LPM_TRIE:
key_size = sizeof(__u64);
value_size = sizeof(__u64);
- map_flags = BPF_F_NO_PREALLOC;
+ opts.map_flags = BPF_F_NO_PREALLOC;
break;
case BPF_MAP_TYPE_CGROUP_STORAGE:
case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
@@ -230,21 +311,40 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
break;
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
- map_flags = BPF_F_NO_PREALLOC;
+ opts.map_flags = BPF_F_NO_PREALLOC;
btf_fd = load_local_storage_btf();
if (btf_fd < 0)
- return false;
+ return btf_fd;
break;
case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_USER_RINGBUF:
key_size = 0;
value_size = 0;
- max_entries = 4096;
+ max_entries = sysconf(_SC_PAGE_SIZE);
+ break;
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
+ opts.btf_vmlinux_value_type_id = 1;
+ opts.value_type_btf_obj_fd = -1;
+ exp_err = -524; /* -ENOTSUPP */
+ break;
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ key_size = 0;
+ max_entries = 1;
+ break;
+ case BPF_MAP_TYPE_ARENA:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 1; /* one page */
+ opts.map_extra = 0; /* can mmap() at any address */
+ opts.map_flags = BPF_F_MMAPABLE;
break;
- case BPF_MAP_TYPE_UNSPEC:
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PROG_ARRAY:
@@ -263,95 +363,101 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
case BPF_MAP_TYPE_XSKMAP:
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
- case BPF_MAP_TYPE_STRUCT_OPS:
- default:
break;
+ case BPF_MAP_TYPE_UNSPEC:
+ default:
+ return -EOPNOTSUPP;
}
if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- /* TODO: probe for device, once libbpf has a function to create
- * map-in-map for offload
- */
- if (ifindex)
- return false;
-
- fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
- sizeof(__u32), sizeof(__u32), 1, 0);
+ fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
+ sizeof(__u32), sizeof(__u32), 1, NULL);
if (fd_inner < 0)
- return false;
- fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
- fd_inner, 1, 0);
- close(fd_inner);
- } else {
- /* Note: No other restriction on map type probes for offload */
- attr.map_type = map_type;
- attr.key_size = key_size;
- attr.value_size = value_size;
- attr.max_entries = max_entries;
- attr.map_flags = map_flags;
- attr.map_ifindex = ifindex;
- if (btf_fd >= 0) {
- attr.btf_fd = btf_fd;
- attr.btf_key_type_id = btf_key_type_id;
- attr.btf_value_type_id = btf_value_type_id;
- }
-
- fd = bpf_create_map_xattr(&attr);
+ goto cleanup;
+
+ opts.inner_map_fd = fd_inner;
}
+
+ if (btf_fd >= 0) {
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = btf_key_type_id;
+ opts.btf_value_type_id = btf_value_type_id;
+ }
+
+ fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);
+ err = -errno;
+
+cleanup:
if (fd >= 0)
close(fd);
+ if (fd_inner >= 0)
+ close(fd_inner);
if (btf_fd >= 0)
close(btf_fd);
- return fd >= 0;
+ if (exp_err)
+ return fd < 0 && err == exp_err ? 1 : 0;
+ else
+ return fd >= 0 ? 1 : 0;
}
-bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
- __u32 ifindex)
+int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
{
- struct bpf_insn insns[2] = {
- BPF_EMIT_CALL(id),
- BPF_EXIT_INSN()
- };
- char buf[4096] = {};
- bool res;
-
- probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
- ifindex);
- res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
-
- if (ifindex) {
- switch (get_vendor_id(ifindex)) {
- case 0x19ee: /* Netronome specific */
- res = res && !grep(buf, "not supported by FW") &&
- !grep(buf, "unsupported function id");
- break;
- default:
- break;
- }
- }
+ int ret;
- return res;
+ if (opts)
+ return libbpf_err(-EINVAL);
+
+ ret = probe_map_create(map_type);
+ return libbpf_err(ret);
}
-/*
- * Probe for availability of kernel commit (5.3):
- *
- * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
- */
-bool bpf_probe_large_insn_limit(__u32 ifindex)
+int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
+ const void *opts)
{
- struct bpf_insn insns[BPF_MAXINSNS + 1];
- int i;
+ struct bpf_insn insns[] = {
+ BPF_EMIT_CALL((__u32)helper_id),
+ BPF_EXIT_INSN(),
+ };
+ const size_t insn_cnt = ARRAY_SIZE(insns);
+ char buf[4096];
+ int ret;
- for (i = 0; i < BPF_MAXINSNS; i++)
- insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
- insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
+ if (opts)
+ return libbpf_err(-EINVAL);
- errno = 0;
- probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
- ifindex);
+ /* we can't successfully load all prog types to check for BPF helper
+ * support, so bail out with -EOPNOTSUPP error
+ */
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return -EOPNOTSUPP;
+ default:
+ break;
+ }
- return errno != E2BIG && errno != EINVAL;
+ buf[0] = '\0';
+ ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf));
+ if (ret < 0)
+ return libbpf_err(ret);
+
+ /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id)
+ * at all, it will emit something like "invalid func unknown#181".
+ * If BPF verifier recognizes BPF helper but it's not supported for
+ * given BPF program type, it will emit "unknown func bpf_sys_bpf#166".
+ * In both cases, provided combination of BPF program type and BPF
+ * helper is not supported by the kernel.
+ * In all other cases, probe_prog_load() above will either succeed (e.g.,
+ * because BPF helper happens to accept no input arguments or it
+ * accepts one input argument and initial PTR_TO_CTX is fine for
+ * that), or we'll get some more specific BPF verifier error about
+ * some unsatisfied conditions.
+ */
+ if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func ")))
+ return 0;
+ return 1; /* assume supported */
}
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
deleted file mode 100644
index 59c779c5790c..000000000000
--- a/tools/lib/bpf/libbpf_util.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-/* Copyright (c) 2019 Facebook */
-
-#ifndef __LIBBPF_LIBBPF_UTIL_H
-#define __LIBBPF_LIBBPF_UTIL_H
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Use these barrier functions instead of smp_[rw]mb() when they are
- * used in a libbpf header file. That way they can be built into the
- * application that uses libbpf.
- */
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
- asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
-#elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
new file mode 100644
index 000000000000..e783a47da815
--- /dev/null
+++ b/tools/lib/bpf/libbpf_version.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2021 Facebook */
+#ifndef __LIBBPF_VERSION_H
+#define __LIBBPF_VERSION_H
+
+#define LIBBPF_MAJOR_VERSION 1
+#define LIBBPF_MINOR_VERSION 4
+
+#endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
new file mode 100644
index 000000000000..0d4be829551b
--- /dev/null
+++ b/tools/lib/bpf/linker.c
@@ -0,0 +1,2924 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * BPF static linker
+ *
+ * Copyright (c) 2021 Facebook
+ */
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <elf.h>
+#include <libelf.h>
+#include <fcntl.h>
+#include "libbpf.h"
+#include "btf.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+#define BTF_EXTERN_SEC ".extern"
+
+struct src_sec {
+ const char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+ /* positional (not necessarily ELF) index of a matching section in a final object file */
+ int dst_id;
+ /* section data offset in a matching output section */
+ int dst_off;
+ /* whether section is omitted from the final ELF file */
+ bool skipped;
+ /* whether section is an ephemeral section, not mapped to an ELF section */
+ bool ephemeral;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* corresponding BTF DATASEC type ID */
+ int sec_type_id;
+};
+
+struct src_obj {
+ const char *filename;
+ int fd;
+ Elf *elf;
+ /* Section header strings section index */
+ size_t shstrs_sec_idx;
+ /* SYMTAB section index */
+ size_t symtab_sec_idx;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ /* List of sections (including ephemeral). Slot zero is unused. */
+ struct src_sec *secs;
+ int sec_cnt;
+
+ /* mapping of symbol indices from src to dst ELF */
+ int *sym_map;
+ /* mapping from the src BTF type IDs to dst ones */
+ int *btf_type_map;
+};
+
+/* single .BTF.ext data section */
+struct btf_ext_sec_data {
+ size_t rec_cnt;
+ __u32 rec_sz;
+ void *recs;
+};
+
+struct glob_sym {
+ /* ELF symbol index */
+ int sym_idx;
+ /* associated section id for .ksyms, .kconfig, etc, but not .extern */
+ int sec_id;
+ /* extern name offset in STRTAB */
+ int name_off;
+ /* optional associated BTF type ID */
+ int btf_id;
+ /* BTF type ID to which VAR/FUNC type is pointing to; used for
+ * rewriting types when extern VAR/FUNC is resolved to a concrete
+ * definition
+ */
+ int underlying_btf_id;
+ /* sec_var index in the corresponding dst_sec, if exists */
+ int var_idx;
+
+ /* extern or resolved/global symbol */
+ bool is_extern;
+ /* weak or strong symbol, never goes back from strong to weak */
+ bool is_weak;
+};
+
+struct dst_sec {
+ char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+
+ bool ephemeral;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* final output section size */
+ int sec_sz;
+ /* final output contents of the section */
+ void *raw_data;
+
+ /* corresponding STT_SECTION symbol index in SYMTAB */
+ int sec_sym_idx;
+
+ /* section's DATASEC variable info, emitted on BTF finalization */
+ bool has_btf;
+ int sec_var_cnt;
+ struct btf_var_secinfo *sec_vars;
+
+ /* section's .BTF.ext data */
+ struct btf_ext_sec_data func_info;
+ struct btf_ext_sec_data line_info;
+ struct btf_ext_sec_data core_relo_info;
+};
+
+struct bpf_linker {
+ char *filename;
+ int fd;
+ Elf *elf;
+ Elf64_Ehdr *elf_hdr;
+
+ /* Output sections metadata */
+ struct dst_sec *secs;
+ int sec_cnt;
+
+ struct strset *strtab_strs; /* STRTAB unique strings */
+ size_t strtab_sec_idx; /* STRTAB section index */
+ size_t symtab_sec_idx; /* SYMTAB section index */
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ /* global (including extern) ELF symbols */
+ int glob_sym_cnt;
+ struct glob_sym *glob_syms;
+};
+
+#define pr_warn_elf(fmt, ...) \
+ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
+
+static int init_output_elf(struct bpf_linker *linker, const char *file);
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj);
+static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_btf(struct src_obj *obj);
+static int linker_sanity_check_btf_ext(struct src_obj *obj);
+static int linker_fixup_btf(struct src_obj *obj);
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx);
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
+
+static int finalize_btf(struct bpf_linker *linker);
+static int finalize_btf_ext(struct bpf_linker *linker);
+
+void bpf_linker__free(struct bpf_linker *linker)
+{
+ int i;
+
+ if (!linker)
+ return;
+
+ free(linker->filename);
+
+ if (linker->elf)
+ elf_end(linker->elf);
+
+ if (linker->fd >= 0)
+ close(linker->fd);
+
+ strset__free(linker->strtab_strs);
+
+ btf__free(linker->btf);
+ btf_ext__free(linker->btf_ext);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ free(sec->sec_name);
+ free(sec->raw_data);
+ free(sec->sec_vars);
+
+ free(sec->func_info.recs);
+ free(sec->line_info.recs);
+ free(sec->core_relo_info.recs);
+ }
+ free(linker->secs);
+
+ free(linker->glob_syms);
+ free(linker);
+}
+
+struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts)
+{
+ struct bpf_linker *linker;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_linker_opts))
+ return errno = EINVAL, NULL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn_elf("libelf initialization failed");
+ return errno = EINVAL, NULL;
+ }
+
+ linker = calloc(1, sizeof(*linker));
+ if (!linker)
+ return errno = ENOMEM, NULL;
+
+ linker->fd = -1;
+
+ err = init_output_elf(linker, filename);
+ if (err)
+ goto err_out;
+
+ return linker;
+
+err_out:
+ bpf_linker__free(linker);
+ return errno = -err, NULL;
+}
+
+static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *secs = linker->secs, *sec;
+ size_t new_cnt = linker->sec_cnt ? linker->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + linker->sec_cnt, 0, (new_cnt - linker->sec_cnt) * sizeof(*secs));
+
+ linker->secs = secs;
+ linker->sec_cnt = new_cnt;
+
+ sec = &linker->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = strdup(sec_name);
+ if (!sec->sec_name)
+ return NULL;
+
+ return sec;
+}
+
+static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms, *sym;
+ size_t sym_cnt = symtab->sec_sz / sizeof(*sym);
+
+ syms = libbpf_reallocarray(symtab->raw_data, sym_cnt + 1, sizeof(*sym));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+
+ symtab->raw_data = syms;
+ symtab->sec_sz += sizeof(*sym);
+ symtab->shdr->sh_size += sizeof(*sym);
+ symtab->data->d_size += sizeof(*sym);
+
+ if (sym_idx)
+ *sym_idx = sym_cnt;
+
+ return sym;
+}
+
+static int init_output_elf(struct bpf_linker *linker, const char *file)
+{
+ int err, str_off;
+ Elf64_Sym *init_sym;
+ struct dst_sec *sec;
+
+ linker->filename = strdup(file);
+ if (!linker->filename)
+ return -ENOMEM;
+
+ linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
+ if (linker->fd < 0) {
+ err = -errno;
+ pr_warn("failed to create '%s': %d\n", file, err);
+ return err;
+ }
+
+ linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL);
+ if (!linker->elf) {
+ pr_warn_elf("failed to create ELF object");
+ return -EINVAL;
+ }
+
+ /* ELF header */
+ linker->elf_hdr = elf64_newehdr(linker->elf);
+ if (!linker->elf_hdr) {
+ pr_warn_elf("failed to create ELF header");
+ return -EINVAL;
+ }
+
+ linker->elf_hdr->e_machine = EM_BPF;
+ linker->elf_hdr->e_type = ET_REL;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER__"
+#endif
+
+ /* STRTAB */
+ /* initialize strset with an empty string to conform to ELF */
+ linker->strtab_strs = strset__new(INT_MAX, "", sizeof(""));
+ if (libbpf_get_error(linker->strtab_strs))
+ return libbpf_get_error(linker->strtab_strs);
+
+ sec = add_dst_sec(linker, ".strtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create STRTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create STRTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->elf_hdr->e_shstrndx = sec->sec_idx;
+ linker->strtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_STRTAB;
+ sec->shdr->sh_flags = SHF_STRINGS;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = 0;
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 1;
+ sec->shdr->sh_size = sec->sec_sz = 0;
+ sec->shdr->sh_entsize = 0;
+
+ /* SYMTAB */
+ sec = add_dst_sec(linker, ".symtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create SYMTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create SYMTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->symtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_SYMTAB;
+ sec->shdr->sh_flags = 0;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = linker->strtab_sec_idx;
+ /* sh_info should be one greater than the index of the last local
+ * symbol (i.e., binding is STB_LOCAL). But why and who cares?
+ */
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 8;
+ sec->shdr->sh_entsize = sizeof(Elf64_Sym);
+
+ /* .BTF */
+ linker->btf = btf__new_empty();
+ err = libbpf_get_error(linker->btf);
+ if (err)
+ return err;
+
+ /* add the special all-zero symbol */
+ init_sym = add_new_sym(linker, NULL);
+ if (!init_sym)
+ return -EINVAL;
+
+ init_sym->st_name = 0;
+ init_sym->st_info = 0;
+ init_sym->st_other = 0;
+ init_sym->st_shndx = SHN_UNDEF;
+ init_sym->st_value = 0;
+ init_sym->st_size = 0;
+
+ return 0;
+}
+
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts)
+{
+ struct src_obj obj = {};
+ int err = 0;
+
+ if (!OPTS_VALID(opts, bpf_linker_file_opts))
+ return libbpf_err(-EINVAL);
+
+ if (!linker->elf)
+ return libbpf_err(-EINVAL);
+
+ err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
+ err = err ?: linker_append_sec_data(linker, &obj);
+ err = err ?: linker_append_elf_syms(linker, &obj);
+ err = err ?: linker_append_elf_relos(linker, &obj);
+ err = err ?: linker_append_btf(linker, &obj);
+ err = err ?: linker_append_btf_ext(linker, &obj);
+
+ /* free up src_obj resources */
+ free(obj.btf_type_map);
+ btf__free(obj.btf);
+ btf_ext__free(obj.btf_ext);
+ free(obj.secs);
+ free(obj.sym_map);
+ if (obj.elf)
+ elf_end(obj.elf);
+ if (obj.fd >= 0)
+ close(obj.fd);
+
+ return libbpf_err(err);
+}
+
+static bool is_dwarf_sec_name(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool is_ignored_sec(struct src_sec *sec)
+{
+ Elf64_Shdr *shdr = sec->shdr;
+ const char *name = sec->sec_name;
+
+ /* no special handling of .strtab */
+ if (shdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (shdr->sh_type == SHT_LLVM_ADDRSIG)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size == 0 &&
+ strcmp(sec->sec_name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_dwarf_sec_name(sec->sec_name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_dwarf_sec_name(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *secs = obj->secs, *sec;
+ size_t new_cnt = obj->sec_cnt ? obj->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + obj->sec_cnt, 0, (new_cnt - obj->sec_cnt) * sizeof(*secs));
+
+ obj->secs = secs;
+ obj->sec_cnt = new_cnt;
+
+ sec = &obj->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = sec_name;
+
+ return sec;
+}
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ const int host_endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ const int host_endianness = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER__"
+#endif
+ int err = 0;
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Ehdr *ehdr;
+ Elf64_Shdr *shdr;
+ struct src_sec *sec;
+
+ pr_debug("linker: adding object file '%s'...\n", filename);
+
+ obj->filename = filename;
+
+ obj->fd = open(filename, O_RDONLY | O_CLOEXEC);
+ if (obj->fd < 0) {
+ err = -errno;
+ pr_warn("failed to open file '%s': %d\n", filename, err);
+ return err;
+ }
+ obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL);
+ if (!obj->elf) {
+ err = -errno;
+ pr_warn_elf("failed to parse ELF file '%s'", filename);
+ return err;
+ }
+
+ /* Sanity check ELF file high-level properties */
+ ehdr = elf64_getehdr(obj->elf);
+ if (!ehdr) {
+ err = -errno;
+ pr_warn_elf("failed to get ELF header for %s", filename);
+ return err;
+ }
+ if (ehdr->e_ident[EI_DATA] != host_endianness) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported byte order of ELF file %s", filename);
+ return err;
+ }
+ if (ehdr->e_type != ET_REL
+ || ehdr->e_machine != EM_BPF
+ || ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported kind of ELF file %s", filename);
+ return err;
+ }
+
+ if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) {
+ err = -errno;
+ pr_warn_elf("failed to get SHSTRTAB section index for %s", filename);
+ return err;
+ }
+
+ scn = NULL;
+ while ((scn = elf_nextscn(obj->elf, scn)) != NULL) {
+ size_t sec_idx = elf_ndxscn(scn);
+ const char *sec_name;
+
+ shdr = elf64_getshdr(scn);
+ if (!shdr) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu header for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name);
+ if (!sec_name) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu name for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu (%s) data from %s",
+ sec_idx, sec_name, filename);
+ return err;
+ }
+
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = scn;
+ sec->shdr = shdr;
+ sec->data = data;
+ sec->sec_idx = elf_ndxscn(scn);
+
+ if (is_ignored_sec(sec)) {
+ sec->skipped = true;
+ continue;
+ }
+
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ if (obj->symtab_sec_idx) {
+ err = -EOPNOTSUPP;
+ pr_warn("multiple SYMTAB sections found, not supported\n");
+ return err;
+ }
+ obj->symtab_sec_idx = sec_idx;
+ break;
+ case SHT_STRTAB:
+ /* we'll construct our own string table */
+ break;
+ case SHT_PROGBITS:
+ if (strcmp(sec_name, BTF_ELF_SEC) == 0) {
+ obj->btf = btf__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
+ pr_warn("failed to parse .BTF from %s: %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+ if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) {
+ obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+
+ /* data & code */
+ break;
+ case SHT_NOBITS:
+ /* BSS */
+ break;
+ case SHT_REL:
+ /* relocations */
+ break;
+ default:
+ pr_warn("unrecognized section #%zu (%s) in %s\n",
+ sec_idx, sec_name, filename);
+ err = -EINVAL;
+ return err;
+ }
+ }
+
+ err = err ?: linker_sanity_check_elf(obj);
+ err = err ?: linker_sanity_check_btf(obj);
+ err = err ?: linker_sanity_check_btf_ext(obj);
+ err = err ?: linker_fixup_btf(obj);
+
+ return err;
+}
+
+static int linker_sanity_check_elf(struct src_obj *obj)
+{
+ struct src_sec *sec;
+ int i, err;
+
+ if (!obj->symtab_sec_idx) {
+ pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+ if (!obj->shstrs_sec_idx) {
+ pr_warn("ELF is missing section headers STRTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (sec->sec_name[0] == '\0') {
+ pr_warn("ELF section #%zu has empty name in %s\n", sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (is_dwarf_sec_name(sec->sec_name))
+ continue;
+
+ if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) {
+ pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+ obj->filename);
+ return -EINVAL;
+ }
+ if (sec->shdr->sh_addralign != sec->data->d_align) {
+ pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+ (long long unsigned)sec->data->d_align, obj->filename);
+ return -EINVAL;
+ }
+
+ if (sec->shdr->sh_size != sec->data->d_size) {
+ pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+ (long long unsigned)sec->data->d_size, obj->filename);
+ return -EINVAL;
+ }
+
+ switch (sec->shdr->sh_type) {
+ case SHT_SYMTAB:
+ err = linker_sanity_check_elf_symtab(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_STRTAB:
+ break;
+ case SHT_PROGBITS:
+ if (sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+ obj->filename);
+ return -EINVAL;
+ }
+ }
+ break;
+ case SHT_NOBITS:
+ break;
+ case SHT_REL:
+ err = linker_sanity_check_elf_relos(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_LLVM_ADDRSIG:
+ break;
+ default:
+ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec;
+ Elf64_Sym *sym;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_link];
+ if (link_sec->shdr->sh_type != SHT_STRTAB) {
+ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ sym = sec->data->d_buf;
+ for (i = 0; i < n; i++, sym++) {
+ int sym_type = ELF64_ST_TYPE(sym->st_info);
+ int sym_bind = ELF64_ST_BIND(sym->st_info);
+ int sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+
+ if (i == 0) {
+ if (sym->st_name != 0 || sym->st_info != 0
+ || sym->st_other != 0 || sym->st_shndx != 0
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #0 is invalid in %s\n", obj->filename);
+ return -EINVAL;
+ }
+ continue;
+ }
+ if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n",
+ i, sec->sec_idx, sym_bind);
+ return -EINVAL;
+ }
+ if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n",
+ i, sec->sec_idx, sym_vis);
+ return -EINVAL;
+ }
+ if (sym->st_shndx == 0) {
+ if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #%d is invalid extern symbol in %s\n",
+ i, obj->filename);
+
+ return -EINVAL;
+ }
+ continue;
+ }
+ if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) {
+ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+ i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+ return -EINVAL;
+ }
+ if (sym_type == STT_SECTION) {
+ if (sym->st_value != 0)
+ return -EINVAL;
+ continue;
+ }
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec, *sym_sec;
+ Elf64_Rel *relo;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ /* SHT_REL's sh_link should point to SYMTAB */
+ if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ /* SHT_REL's sh_info points to relocated section */
+ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_info];
+
+ /* .rel<secname> -> <secname> pattern is followed */
+ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+ pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+ sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ /* don't further validate relocations for ignored sections */
+ if (link_sec->skipped)
+ return 0;
+
+ /* relocatable section is data or instructions */
+ if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) {
+ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+
+ /* check sanity of each relocation */
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ relo = sec->data->d_buf;
+ sym_sec = &obj->secs[obj->symtab_sec_idx];
+ for (i = 0; i < n; i++, relo++) {
+ size_t sym_idx = ELF64_R_SYM(relo->r_info);
+ size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+ sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
+ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+ i, sec->sec_idx, sym_type, obj->filename);
+ return -EINVAL;
+ }
+
+ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_btf_type_id(__u32 *type_id, void *ctx)
+{
+ struct btf *btf = ctx;
+
+ if (*type_id >= btf__type_cnt(btf))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int check_btf_str_off(__u32 *str_off, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *s;
+
+ s = btf__str_by_offset(btf, *str_off);
+
+ if (!s)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int linker_sanity_check_btf(struct src_obj *obj)
+{
+ struct btf_type *t;
+ int i, n, err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(obj->btf, i);
+
+ err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf);
+ err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_btf_ext(struct src_obj *obj)
+{
+ int err = 0;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ /* can't use .BTF.ext without .BTF */
+ if (!obj->btf)
+ return -EINVAL;
+
+ err = err ?: btf_ext_visit_type_ids(obj->btf_ext, check_btf_type_id, obj->btf);
+ err = err ?: btf_ext_visit_str_offs(obj->btf_ext, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ dst_sec->sec_sz = 0;
+ dst_sec->sec_idx = 0;
+ dst_sec->ephemeral = src_sec->ephemeral;
+
+ /* ephemeral sections are just thin section shells lacking most parts */
+ if (src_sec->ephemeral)
+ return 0;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -ENOMEM;
+ data = elf_newdata(scn);
+ if (!data)
+ return -ENOMEM;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -ENOMEM;
+
+ dst_sec->scn = scn;
+ dst_sec->shdr = shdr;
+ dst_sec->data = data;
+ dst_sec->sec_idx = elf_ndxscn(scn);
+
+ name_off = strset__add_str(linker->strtab_strs, src_sec->sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = src_sec->shdr->sh_type;
+ shdr->sh_flags = src_sec->shdr->sh_flags;
+ shdr->sh_size = 0;
+ /* sh_link and sh_info have different meaning for different types of
+ * sections, so we leave it up to the caller code to fill them in, if
+ * necessary
+ */
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = src_sec->shdr->sh_addralign;
+ shdr->sh_entsize = src_sec->shdr->sh_entsize;
+
+ data->d_type = src_sec->data->d_type;
+ data->d_size = 0;
+ data->d_buf = NULL;
+ data->d_align = src_sec->data->d_align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *sec;
+ int i;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static bool secs_match(struct dst_sec *dst, struct src_sec *src)
+{
+ if (dst->ephemeral || src->ephemeral)
+ return true;
+
+ if (dst->shdr->sh_type != src->shdr->sh_type) {
+ pr_warn("sec %s types mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_flags != src->shdr->sh_flags) {
+ pr_warn("sec %s flags mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_entsize != src->shdr->sh_entsize) {
+ pr_warn("sec %s entsize mismatch\n", dst->sec_name);
+ return false;
+ }
+
+ return true;
+}
+
+static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ if (dst_sec->sec_sz != src_sec->shdr->sh_size)
+ return false;
+ if (memcmp(dst_sec->raw_data, src_sec->data->d_buf, dst_sec->sec_sz) != 0)
+ return false;
+ return true;
+}
+
+static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src)
+{
+ void *tmp;
+ size_t dst_align, src_align;
+ size_t dst_align_sz, dst_final_sz;
+ int err;
+
+ /* Ephemeral source section doesn't contribute anything to ELF
+ * section data.
+ */
+ if (src->ephemeral)
+ return 0;
+
+ /* Some sections (like .maps) can contain both externs (and thus be
+ * ephemeral) and non-externs (map definitions). So it's possible that
+ * it has to be "upgraded" from ephemeral to non-ephemeral when the
+ * first non-ephemeral entity appears. In such case, we add ELF
+ * section, data, etc.
+ */
+ if (dst->ephemeral) {
+ err = init_sec(linker, dst, src);
+ if (err)
+ return err;
+ }
+
+ dst_align = dst->shdr->sh_addralign;
+ src_align = src->shdr->sh_addralign;
+ if (dst_align == 0)
+ dst_align = 1;
+ if (dst_align < src_align)
+ dst_align = src_align;
+
+ dst_align_sz = (dst->sec_sz + dst_align - 1) / dst_align * dst_align;
+
+ /* no need to re-align final size */
+ dst_final_sz = dst_align_sz + src->shdr->sh_size;
+
+ if (src->shdr->sh_type != SHT_NOBITS) {
+ tmp = realloc(dst->raw_data, dst_final_sz);
+ /* If dst_align_sz == 0, realloc() behaves in a special way:
+ * 1. When dst->raw_data is NULL it returns:
+ * "either NULL or a pointer suitable to be passed to free()" [1].
+ * 2. When dst->raw_data is not-NULL it frees dst->raw_data and returns NULL,
+ * thus invalidating any "pointer suitable to be passed to free()" obtained
+ * at step (1).
+ *
+ * The dst_align_sz > 0 check avoids error exit after (2), otherwise
+ * dst->raw_data would be freed again in bpf_linker__free().
+ *
+ * [1] man 3 realloc
+ */
+ if (!tmp && dst_align_sz > 0)
+ return -ENOMEM;
+ dst->raw_data = tmp;
+
+ /* pad dst section, if it's alignment forced size increase */
+ memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz);
+ /* now copy src data at a properly aligned offset */
+ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size);
+ }
+
+ dst->sec_sz = dst_final_sz;
+ dst->shdr->sh_size = dst_final_sz;
+ dst->data->d_size = dst_final_sz;
+
+ dst->shdr->sh_addralign = dst_align;
+ dst->data->d_align = dst_align;
+
+ src->dst_off = dst_align_sz;
+
+ return 0;
+}
+
+static bool is_data_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped)
+ return false;
+ /* ephemeral sections are data sections, e.g., .kconfig, .ksyms */
+ if (sec->ephemeral)
+ return true;
+ return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS;
+}
+
+static bool is_relo_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped || sec->ephemeral)
+ return false;
+ return sec->shdr->sh_type == SHT_REL;
+}
+
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj)
+{
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+
+ src_sec = &obj->secs[i];
+ if (!is_data_sec(src_sec))
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else {
+ if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* "license" and "version" sections are deduped */
+ if (strcmp(src_sec->sec_name, "license") == 0
+ || strcmp(src_sec->sec_name, "version") == 0) {
+ if (!sec_content_is_same(dst_sec, src_sec)) {
+ pr_warn("non-identical contents of section '%s' are not supported\n", src_sec->sec_name);
+ return -EINVAL;
+ }
+ src_sec->skipped = true;
+ src_sec->dst_id = dst_sec->id;
+ continue;
+ }
+ }
+
+ /* record mapped section index */
+ src_sec->dst_id = dst_sec->id;
+
+ err = extend_sec(linker, dst_sec, src_sec);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err;
+ int str_sec_idx = symtab->shdr->sh_link;
+ const char *sym_name;
+
+ obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
+ if (!obj->sym_map)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++, sym++) {
+ /* We already validated all-zero symbol #0 and we already
+ * appended it preventively to the final SYMTAB, so skip it.
+ */
+ if (i == 0)
+ continue;
+
+ sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!sym_name) {
+ pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
+ return -EINVAL;
+ }
+
+ err = linker_append_elf_sym(linker, obj, sym, sym_name, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms = symtab->raw_data;
+
+ return &syms[sym_idx];
+}
+
+static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name)
+{
+ struct glob_sym *glob_sym;
+ const char *name;
+ int i;
+
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ glob_sym = &linker->glob_syms[i];
+ name = strset__data(linker->strtab_strs) + glob_sym->name_off;
+
+ if (strcmp(name, sym_name) == 0)
+ return glob_sym;
+ }
+
+ return NULL;
+}
+
+static struct glob_sym *add_glob_sym(struct bpf_linker *linker)
+{
+ struct glob_sym *syms, *sym;
+
+ syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1,
+ sizeof(*linker->glob_syms));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[linker->glob_sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+ sym->var_idx = -1;
+
+ linker->glob_syms = syms;
+ linker->glob_sym_cnt++;
+
+ return sym;
+}
+
+static bool glob_sym_btf_matches(const char *sym_name, bool exact,
+ const struct btf *btf1, __u32 id1,
+ const struct btf *btf2, __u32 id2)
+{
+ const struct btf_type *t1, *t2;
+ bool is_static1, is_static2;
+ const char *n1, *n2;
+ int i, n;
+
+recur:
+ n1 = n2 = NULL;
+ t1 = skip_mods_and_typedefs(btf1, id1, &id1);
+ t2 = skip_mods_and_typedefs(btf2, id2, &id2);
+
+ /* check if only one side is FWD, otherwise handle with common logic */
+ if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) {
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n",
+ sym_name, n1, n2);
+ return false;
+ }
+ /* validate if FWD kind matches concrete kind */
+ if (btf_is_fwd(t1)) {
+ if (btf_kflag(t1) && btf_is_union(t2))
+ return true;
+ if (!btf_kflag(t1) && btf_is_struct(t2))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2));
+ } else {
+ if (btf_kflag(t2) && btf_is_union(t1))
+ return true;
+ if (!btf_kflag(t2) && btf_is_struct(t1))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1));
+ }
+ return false;
+ }
+
+ if (btf_kind(t1) != btf_kind(t2)) {
+ pr_warn("global '%s': incompatible BTF kinds %s and %s\n",
+ sym_name, btf_kind_str(t1), btf_kind_str(t2));
+ return false;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible %s names '%s' and '%s'\n",
+ sym_name, btf_kind_str(t1), n1, n2);
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_UNKN: /* void */
+ case BTF_KIND_FWD:
+ return true;
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* ignore encoding for int and enum values for enum */
+ if (t1->size != t2->size) {
+ pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
+ sym_name, btf_kind_str(t1), n1, t1->size, t2->size);
+ return false;
+ }
+ return true;
+ case BTF_KIND_PTR:
+ /* just validate overall shape of the referenced type, so no
+ * contents comparison for struct/union, and allowd fwd vs
+ * struct/union
+ */
+ exact = false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ /* ignore index type and array size */
+ id1 = btf_array(t1)->type;
+ id2 = btf_array(t2)->type;
+ goto recur;
+ case BTF_KIND_FUNC:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC;
+ is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_VAR:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC;
+ is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+
+ if (!exact)
+ return true;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s fields %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ n1 = btf__str_by_offset(btf1, m1->name_off);
+ n2 = btf__str_by_offset(btf2, m2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n",
+ sym_name, i, n1, n2);
+ return false;
+ }
+ if (m1->offset != m2->offset) {
+ pr_warn("global '%s': incompatible field #%d ('%s') offsets\n",
+ sym_name, i, n1);
+ return false;
+ }
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m1, *m2;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s params %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ /* ignore func arg names */
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ /* now check return type as well */
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ }
+
+ /* skip_mods_and_typedefs() make this impossible */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ /* DATASECs are never compared with each other */
+ case BTF_KIND_DATASEC:
+ default:
+ pr_warn("global '%s': unsupported BTF kind %s\n",
+ sym_name, btf_kind_str(t1));
+ return false;
+ }
+}
+
+static bool map_defs_match(const char *sym_name,
+ const struct btf *main_btf,
+ const struct btf_map_def *main_def,
+ const struct btf_map_def *main_inner_def,
+ const struct btf *extra_btf,
+ const struct btf_map_def *extra_def,
+ const struct btf_map_def *extra_inner_def)
+{
+ const char *reason;
+
+ if (main_def->map_type != extra_def->map_type) {
+ reason = "type";
+ goto mismatch;
+ }
+
+ /* check key type/size match */
+ if (main_def->key_size != extra_def->key_size) {
+ reason = "key_size";
+ goto mismatch;
+ }
+ if (!!main_def->key_type_id != !!extra_def->key_type_id) {
+ reason = "key type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_KEY_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->key_type_id,
+ extra_btf, extra_def->key_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ /* validate value type/size match */
+ if (main_def->value_size != extra_def->value_size) {
+ reason = "value_size";
+ goto mismatch;
+ }
+ if (!!main_def->value_type_id != !!extra_def->value_type_id) {
+ reason = "value type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_VALUE_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->value_type_id,
+ extra_btf, extra_def->value_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ if (main_def->max_entries != extra_def->max_entries) {
+ reason = "max_entries";
+ goto mismatch;
+ }
+ if (main_def->map_flags != extra_def->map_flags) {
+ reason = "map_flags";
+ goto mismatch;
+ }
+ if (main_def->numa_node != extra_def->numa_node) {
+ reason = "numa_node";
+ goto mismatch;
+ }
+ if (main_def->pinning != extra_def->pinning) {
+ reason = "pinning";
+ goto mismatch;
+ }
+
+ if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) {
+ reason = "inner map";
+ goto mismatch;
+ }
+
+ if (main_def->parts & MAP_DEF_INNER_MAP) {
+ char inner_map_name[128];
+
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name);
+
+ return map_defs_match(inner_map_name,
+ main_btf, main_inner_def, NULL,
+ extra_btf, extra_inner_def, NULL);
+ }
+
+ return true;
+
+mismatch:
+ pr_warn("global '%s': map %s mismatch\n", sym_name, reason);
+ return false;
+}
+
+static bool glob_map_defs_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, int btf_id)
+{
+ struct btf_map_def dst_def = {}, dst_inner_def = {};
+ struct btf_map_def src_def = {}, src_inner_def = {};
+ const struct btf_type *t;
+ int err;
+
+ t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(t)) {
+ pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id);
+ return false;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+
+ err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def);
+ if (err) {
+ pr_warn("global '%s': invalid map definition\n", sym_name);
+ return false;
+ }
+
+ /* re-parse existing map definition */
+ t = btf__type_by_id(linker->btf, glob_sym->btf_id);
+ t = skip_mods_and_typedefs(linker->btf, t->type, NULL);
+ err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def);
+ if (err) {
+ /* this should not happen, because we already validated it */
+ pr_warn("global '%s': invalid dst map definition\n", sym_name);
+ return false;
+ }
+
+ /* Currently extern map definition has to be complete and match
+ * concrete map definition exactly. This restriction might be lifted
+ * in the future.
+ */
+ return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def,
+ obj->btf, &src_def, &src_inner_def);
+}
+
+static bool glob_syms_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
+{
+ const struct btf_type *src_t;
+
+ /* if we are dealing with externs, BTF types describing both global
+ * and extern VARs/FUNCs should be completely present in all files
+ */
+ if (!glob_sym->btf_id || !btf_id) {
+ pr_warn("BTF info is missing for global symbol '%s'\n", sym_name);
+ return false;
+ }
+
+ src_t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(src_t) && !btf_is_func(src_t)) {
+ pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n",
+ btf_kind_str(src_t), sym_name);
+ return false;
+ }
+
+ /* deal with .maps definitions specially */
+ if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0)
+ return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id);
+
+ if (!glob_sym_btf_matches(sym_name, true /*exact*/,
+ linker->btf, glob_sym->btf_id, obj->btf, btf_id))
+ return false;
+
+ return true;
+}
+
+static bool btf_is_non_static(const struct btf_type *t)
+{
+ return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC)
+ || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC);
+}
+
+static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
+ int *out_btf_sec_id, int *out_btf_id)
+{
+ int i, j, n, m, btf_id = 0;
+ const struct btf_type *t;
+ const struct btf_var_secinfo *vi;
+ const char *name;
+
+ if (!obj->btf) {
+ pr_warn("failed to find BTF info for object '%s'\n", obj->filename);
+ return -EINVAL;
+ }
+
+ n = btf__type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ t = btf__type_by_id(obj->btf, i);
+
+ /* some global and extern FUNCs and VARs might not be associated with any
+ * DATASEC, so try to detect them in the same pass
+ */
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, sym_name) != 0)
+ continue;
+
+ /* remember and still try to find DATASEC */
+ btf_id = i;
+ continue;
+ }
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ t = btf__type_by_id(obj->btf, vi->type);
+ name = btf__str_by_offset(obj->btf, t->name_off);
+
+ if (strcmp(name, sym_name) != 0)
+ continue;
+ if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC)
+ continue;
+ if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC)
+ continue;
+
+ if (btf_id && btf_id != vi->type) {
+ pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n",
+ sym_name, btf_id, vi->type);
+ return -EINVAL;
+ }
+
+ *out_btf_sec_id = i;
+ *out_btf_id = vi->type;
+
+ return 0;
+ }
+ }
+
+ /* free-floating extern or global FUNC */
+ if (btf_id) {
+ *out_btf_sec_id = 0;
+ *out_btf_id = btf_id;
+ return 0;
+ }
+
+ pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name);
+ return -ENOENT;
+}
+
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *sec;
+ int i;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static int complete_extern_btf_info(struct btf *dst_btf, int dst_id,
+ struct btf *src_btf, int src_id)
+{
+ struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id);
+ struct btf_type *src_t = btf_type_by_id(src_btf, src_id);
+ struct btf_param *src_p, *dst_p;
+ const char *s;
+ int i, n, off;
+
+ /* We already made sure that source and destination types (FUNC or
+ * VAR) match in terms of types and argument names.
+ */
+ if (btf_is_var(dst_t)) {
+ btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ return 0;
+ }
+
+ dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0);
+
+ /* now onto FUNC_PROTO types */
+ src_t = btf_type_by_id(src_btf, src_t->type);
+ dst_t = btf_type_by_id(dst_btf, dst_t->type);
+
+ /* Fill in all the argument names, which for extern FUNCs are missing.
+ * We'll end up with two copies of FUNCs/VARs for externs, but that
+ * will be taken care of by BTF dedup at the very end.
+ * It might be that BTF types for extern in one file has less/more BTF
+ * information (e.g., FWD instead of full STRUCT/UNION information),
+ * but that should be (in most cases, subject to BTF dedup rules)
+ * handled and resolved by BTF dedup algorithm as well, so we won't
+ * worry about it. Our only job is to make sure that argument names
+ * are populated on both sides, otherwise BTF dedup will pedantically
+ * consider them different.
+ */
+ src_p = btf_params(src_t);
+ dst_p = btf_params(dst_t);
+ for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) {
+ if (!src_p->name_off)
+ continue;
+
+ /* src_btf has more complete info, so add name to dst_btf */
+ s = btf__str_by_offset(src_btf, src_p->name_off);
+ off = btf__add_str(dst_btf, s);
+ if (off < 0)
+ return off;
+ dst_p->name_off = off;
+ }
+ return 0;
+}
+
+static void sym_update_bind(Elf64_Sym *sym, int sym_bind)
+{
+ sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info));
+}
+
+static void sym_update_type(Elf64_Sym *sym, int sym_type)
+{
+ sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type);
+}
+
+static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
+{
+ /* libelf doesn't provide setters for ST_VISIBILITY,
+ * but it is stored in the lower 2 bits of st_other
+ */
+ sym->st_other &= ~0x03;
+ sym->st_other |= sym_vis;
+}
+
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx)
+{
+ struct src_sec *src_sec = NULL;
+ struct dst_sec *dst_sec = NULL;
+ struct glob_sym *glob_sym = NULL;
+ int name_off, sym_type, sym_bind, sym_vis, err;
+ int btf_sec_id = 0, btf_id = 0;
+ size_t dst_sym_idx;
+ Elf64_Sym *dst_sym;
+ bool sym_is_extern;
+
+ sym_type = ELF64_ST_TYPE(sym->st_info);
+ sym_bind = ELF64_ST_BIND(sym->st_info);
+ sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+ sym_is_extern = sym->st_shndx == SHN_UNDEF;
+
+ if (sym_is_extern) {
+ if (!obj->btf) {
+ pr_warn("externs without BTF info are not supported\n");
+ return -ENOTSUP;
+ }
+ } else if (sym->st_shndx < SHN_LORESERVE) {
+ src_sec = &obj->secs[sym->st_shndx];
+ if (src_sec->skipped)
+ return 0;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* allow only one STT_SECTION symbol per section */
+ if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) {
+ obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
+ return 0;
+ }
+ }
+
+ if (sym_bind == STB_LOCAL)
+ goto add_sym;
+
+ /* find matching BTF info */
+ err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id);
+ if (err)
+ return err;
+
+ if (sym_is_extern && btf_sec_id) {
+ const char *sec_name = NULL;
+ const struct btf_type *t;
+
+ t = btf__type_by_id(obj->btf, btf_sec_id);
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+
+ /* Clang puts unannotated extern vars into
+ * '.extern' BTF DATASEC. Treat them the same
+ * as unannotated extern funcs (which are
+ * currently not put into any DATASECs).
+ * Those don't have associated src_sec/dst_sec.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) {
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("failed to find matching ELF sec '%s'\n", sec_name);
+ return -ENOENT;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+ }
+ }
+
+ glob_sym = find_glob_sym(linker, sym_name);
+ if (glob_sym) {
+ /* Preventively resolve to existing symbol. This is
+ * needed for further relocation symbol remapping in
+ * the next step of linking.
+ */
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+
+ /* If both symbols are non-externs, at least one of
+ * them has to be STB_WEAK, otherwise they are in
+ * a conflict with each other.
+ */
+ if (!sym_is_extern && !glob_sym->is_extern
+ && !glob_sym->is_weak && sym_bind != STB_WEAK) {
+ pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n",
+ src_sym_idx, sym_name, obj->filename);
+ return -EINVAL;
+ }
+
+ if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id))
+ return -EINVAL;
+
+ dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx);
+
+ /* If new symbol is strong, then force dst_sym to be strong as
+ * well; this way a mix of weak and non-weak extern
+ * definitions will end up being strong.
+ */
+ if (sym_bind == STB_GLOBAL) {
+ /* We still need to preserve type (NOTYPE or
+ * OBJECT/FUNC, depending on whether the symbol is
+ * extern or not)
+ */
+ sym_update_bind(dst_sym, STB_GLOBAL);
+ glob_sym->is_weak = false;
+ }
+
+ /* Non-default visibility is "contaminating", with stricter
+ * visibility overwriting more permissive ones, even if more
+ * permissive visibility comes from just an extern definition.
+ * Currently only STV_DEFAULT and STV_HIDDEN are allowed and
+ * ensured by ELF symbol sanity checks above.
+ */
+ if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other))
+ sym_update_visibility(dst_sym, sym_vis);
+
+ /* If the new symbol is extern, then regardless if
+ * existing symbol is extern or resolved global, just
+ * keep the existing one untouched.
+ */
+ if (sym_is_extern)
+ return 0;
+
+ /* If existing symbol is a strong resolved symbol, bail out,
+ * because we lost resolution battle have nothing to
+ * contribute. We already checked abover that there is no
+ * strong-strong conflict. We also already tightened binding
+ * and visibility, so nothing else to contribute at that point.
+ */
+ if (!glob_sym->is_extern && sym_bind == STB_WEAK)
+ return 0;
+
+ /* At this point, new symbol is strong non-extern,
+ * so overwrite glob_sym with new symbol information.
+ * Preserve binding and visibility.
+ */
+ sym_update_type(dst_sym, sym_type);
+ dst_sym->st_shndx = dst_sec->sec_idx;
+ dst_sym->st_value = src_sec->dst_off + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ /* see comment below about dst_sec->id vs dst_sec->sec_idx */
+ glob_sym->sec_id = dst_sec->id;
+ glob_sym->is_extern = false;
+
+ if (complete_extern_btf_info(linker->btf, glob_sym->btf_id,
+ obj->btf, btf_id))
+ return -EINVAL;
+
+ /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */
+ glob_sym->underlying_btf_id = 0;
+
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+ return 0;
+ }
+
+add_sym:
+ name_off = strset__add_str(linker->strtab_strs, sym_name);
+ if (name_off < 0)
+ return name_off;
+
+ dst_sym = add_new_sym(linker, &dst_sym_idx);
+ if (!dst_sym)
+ return -ENOMEM;
+
+ dst_sym->st_name = name_off;
+ dst_sym->st_info = sym->st_info;
+ dst_sym->st_other = sym->st_other;
+ dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx;
+ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ obj->sym_map[src_sym_idx] = dst_sym_idx;
+
+ if (sym_type == STT_SECTION && dst_sym) {
+ dst_sec->sec_sym_idx = dst_sym_idx;
+ dst_sym->st_value = 0;
+ }
+
+ if (sym_bind != STB_LOCAL) {
+ glob_sym = add_glob_sym(linker);
+ if (!glob_sym)
+ return -ENOMEM;
+
+ glob_sym->sym_idx = dst_sym_idx;
+ /* we use dst_sec->id (and not dst_sec->sec_idx), because
+ * ephemeral sections (.kconfig, .ksyms, etc) don't have
+ * sec_idx (as they don't have corresponding ELF section), but
+ * still have id. .extern doesn't have even ephemeral section
+ * associated with it, so dst_sec->id == dst_sec->sec_idx == 0.
+ */
+ glob_sym->sec_id = dst_sec ? dst_sec->id : 0;
+ glob_sym->name_off = name_off;
+ /* we will fill btf_id in during BTF merging step */
+ glob_sym->btf_id = 0;
+ glob_sym->is_extern = sym_is_extern;
+ glob_sym->is_weak = sym_bind == STB_WEAK;
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec, *src_linked_sec;
+ struct dst_sec *dst_sec, *dst_linked_sec;
+ Elf64_Rel *src_rel, *dst_rel;
+ int j, n;
+
+ src_sec = &obj->secs[i];
+ if (!is_relo_sec(src_sec))
+ continue;
+
+ /* shdr->sh_info points to relocatable section */
+ src_linked_sec = &obj->secs[src_sec->shdr->sh_info];
+ if (src_linked_sec->skipped)
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("sections %s are not compatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* shdr->sh_link points to SYMTAB */
+ dst_sec->shdr->sh_link = linker->symtab_sec_idx;
+
+ /* shdr->sh_info points to relocated section */
+ dst_linked_sec = &linker->secs[src_linked_sec->dst_id];
+ dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
+
+ src_sec->dst_id = dst_sec->id;
+ err = extend_sec(linker, dst_sec, src_sec);
+ if (err)
+ return err;
+
+ src_rel = src_sec->data->d_buf;
+ dst_rel = dst_sec->raw_data + src_sec->dst_off;
+ n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
+ for (j = 0; j < n; j++, src_rel++, dst_rel++) {
+ size_t src_sym_idx, dst_sym_idx, sym_type;
+ Elf64_Sym *src_sym;
+
+ src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+ src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
+
+ dst_sym_idx = obj->sym_map[src_sym_idx];
+ dst_rel->r_offset += src_linked_sec->dst_off;
+ sym_type = ELF64_R_TYPE(src_rel->r_info);
+ dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
+
+ if (ELF64_ST_TYPE(src_sym->st_info) == STT_SECTION) {
+ struct src_sec *sec = &obj->secs[src_sym->st_shndx];
+ struct bpf_insn *insn;
+
+ if (src_linked_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ /* calls to the very first static function inside
+ * .text section at offset 0 will
+ * reference section symbol, not the
+ * function symbol. Fix that up,
+ * otherwise it won't be possible to
+ * relocate calls to two different
+ * static functions with the same name
+ * (rom two different object files)
+ */
+ insn = dst_linked_sec->raw_data + dst_rel->r_offset;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ insn->imm += sec->dst_off / sizeof(struct bpf_insn);
+ else
+ insn->imm += sec->dst_off;
+ } else {
+ pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n");
+ return -EINVAL;
+ }
+ }
+
+ }
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
+ int sym_type, const char *sym_name)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ int str_sec_idx = symtab->shdr->sh_link;
+ const char *name;
+
+ for (i = 0; i < n; i++, sym++) {
+ if (sym->st_shndx != sec_idx)
+ continue;
+ if (ELF64_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+
+ name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!name)
+ return NULL;
+
+ if (strcmp(sym_name, name) != 0)
+ continue;
+
+ return sym;
+ }
+
+ return NULL;
+}
+
+static int linker_fixup_btf(struct src_obj *obj)
+{
+ const char *sec_name;
+ struct src_sec *sec;
+ int i, j, n, m;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ struct btf_var_secinfo *vi;
+ struct btf_type *t;
+
+ t = btf_type_by_id(obj->btf, i);
+ if (btf_kind(t) != BTF_KIND_DATASEC)
+ continue;
+
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+ sec = find_src_sec_by_name(obj, sec_name);
+ if (sec) {
+ /* record actual section size, unless ephemeral */
+ if (sec->shdr)
+ t->size = sec->shdr->sh_size;
+ } else {
+ /* BTF can have some sections that are not represented
+ * in ELF, e.g., .kconfig, .ksyms, .extern, which are used
+ * for special extern variables.
+ *
+ * For all but one such special (ephemeral)
+ * sections, we pre-create "section shells" to be able
+ * to keep track of extra per-section metadata later
+ * (e.g., those BTF extern variables).
+ *
+ * .extern is even more special, though, because it
+ * contains extern variables that need to be resolved
+ * by static linker, not libbpf and kernel. When such
+ * externs are resolved, we are going to remove them
+ * from .extern BTF section and might end up not
+ * needing it at all. Each resolved extern should have
+ * matching non-extern VAR/FUNC in other sections.
+ *
+ * We do support leaving some of the externs
+ * unresolved, though, to support cases of building
+ * libraries, which will later be linked against final
+ * BPF applications. So if at finalization we still
+ * see unresolved externs, we'll create .extern
+ * section on our own.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) == 0)
+ continue;
+
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->ephemeral = true;
+ sec->sec_idx = 0; /* will match UNDEF shndx in ELF */
+ }
+
+ /* remember ELF section and its BTF type ID match */
+ sec->sec_type_id = i;
+
+ /* fix up variable offsets */
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type);
+ const char *var_name = btf__str_by_offset(obj->btf, vt->name_off);
+ int var_linkage = btf_var(vt)->linkage;
+ Elf64_Sym *sym;
+
+ /* no need to patch up static or extern vars */
+ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED)
+ continue;
+
+ sym = find_sym_by_name(obj, sec->sec_idx, STT_OBJECT, var_name);
+ if (!sym) {
+ pr_warn("failed to find symbol for variable '%s' in section '%s'\n", var_name, sec_name);
+ return -ENOENT;
+ }
+
+ vi->offset = sym->st_value;
+ }
+ }
+
+ return 0;
+}
+
+static int remap_type_id(__u32 *type_id, void *ctx)
+{
+ int *id_map = ctx;
+ int new_id = id_map[*type_id];
+
+ /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */
+ if (new_id == 0 && *type_id != 0) {
+ pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id);
+ return -EINVAL;
+ }
+
+ *type_id = id_map[*type_id];
+
+ return 0;
+}
+
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_type *t;
+ int i, j, n, start_id, id;
+ const char *name;
+
+ if (!obj->btf)
+ return 0;
+
+ start_id = btf__type_cnt(linker->btf);
+ n = btf__type_cnt(obj->btf);
+
+ obj->btf_type_map = calloc(n + 1, sizeof(int));
+ if (!obj->btf_type_map)
+ return -ENOMEM;
+
+ for (i = 1; i < n; i++) {
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf__type_by_id(obj->btf, i);
+
+ /* DATASECs are handled specially below */
+ if (btf_kind(t) == BTF_KIND_DATASEC)
+ continue;
+
+ if (btf_is_non_static(t)) {
+ /* there should be glob_sym already */
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+
+ /* VARs without corresponding glob_sym are those that
+ * belong to skipped/deduplicated sections (i.e.,
+ * license and version), so just skip them
+ */
+ if (!glob_sym)
+ continue;
+
+ /* linker_append_elf_sym() might have requested
+ * updating underlying type ID, if extern was resolved
+ * to strong symbol or weak got upgraded to non-weak
+ */
+ if (glob_sym->underlying_btf_id == 0)
+ glob_sym->underlying_btf_id = -t->type;
+
+ /* globals from previous object files that match our
+ * VAR/FUNC already have a corresponding associated
+ * BTF type, so just make sure to use it
+ */
+ if (glob_sym->btf_id) {
+ /* reuse existing BTF type for global var/func */
+ obj->btf_type_map[i] = glob_sym->btf_id;
+ continue;
+ }
+ }
+
+ id = btf__add_type(linker->btf, obj->btf, t);
+ if (id < 0) {
+ pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
+ return id;
+ }
+
+ obj->btf_type_map[i] = id;
+
+ /* record just appended BTF type for var/func */
+ if (glob_sym) {
+ glob_sym->btf_id = id;
+ glob_sym->underlying_btf_id = -t->type;
+ }
+ }
+
+ /* remap all the types except DATASECs */
+ n = btf__type_cnt(linker->btf);
+ for (i = start_id; i < n; i++) {
+ struct btf_type *dst_t = btf_type_by_id(linker->btf, i);
+
+ if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map))
+ return -EINVAL;
+ }
+
+ /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's
+ * actual type), if necessary
+ */
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ struct glob_sym *glob_sym = &linker->glob_syms[i];
+ struct btf_type *glob_t;
+
+ if (glob_sym->underlying_btf_id >= 0)
+ continue;
+
+ glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id];
+
+ glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id);
+ glob_t->type = glob_sym->underlying_btf_id;
+ }
+
+ /* append DATASEC info */
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ const struct btf_var_secinfo *src_var;
+ struct btf_var_secinfo *dst_var;
+
+ src_sec = &obj->secs[i];
+ if (!src_sec->sec_type_id || src_sec->skipped)
+ continue;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* Mark section as having BTF regardless of the presence of
+ * variables. In some cases compiler might generate empty BTF
+ * with no variables information. E.g., when promoting local
+ * array/structure variable initial values and BPF object
+ * file otherwise has no read-only static variables in
+ * .rodata. We need to preserve such empty BTF and just set
+ * correct section size.
+ */
+ dst_sec->has_btf = true;
+
+ t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
+ src_var = btf_var_secinfos(t);
+ n = btf_vlen(t);
+ for (j = 0; j < n; j++, src_var++) {
+ void *sec_vars = dst_sec->sec_vars;
+ int new_id = obj->btf_type_map[src_var->type];
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf_type_by_id(linker->btf, new_id);
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(linker->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+ if (glob_sym->sec_id != dst_sec->id) {
+ pr_warn("global '%s': section mismatch %d vs %d\n",
+ name, glob_sym->sec_id, dst_sec->id);
+ return -EINVAL;
+ }
+ }
+
+ /* If there is already a member (VAR or FUNC) mapped
+ * to the same type, don't add a duplicate entry.
+ * This will happen when multiple object files define
+ * the same extern VARs/FUNCs.
+ */
+ if (glob_sym && glob_sym->var_idx >= 0) {
+ __s64 sz;
+
+ dst_var = &dst_sec->sec_vars[glob_sym->var_idx];
+ /* Because underlying BTF type might have
+ * changed, so might its size have changed, so
+ * re-calculate and update it in sec_var.
+ */
+ sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id);
+ if (sz < 0) {
+ pr_warn("global '%s': failed to resolve size of underlying type: %d\n",
+ name, (int)sz);
+ return -EINVAL;
+ }
+ dst_var->size = sz;
+ continue;
+ }
+
+ sec_vars = libbpf_reallocarray(sec_vars,
+ dst_sec->sec_var_cnt + 1,
+ sizeof(*dst_sec->sec_vars));
+ if (!sec_vars)
+ return -ENOMEM;
+
+ dst_sec->sec_vars = sec_vars;
+ dst_sec->sec_var_cnt++;
+
+ dst_var = &dst_sec->sec_vars[dst_sec->sec_var_cnt - 1];
+ dst_var->type = obj->btf_type_map[src_var->type];
+ dst_var->size = src_var->size;
+ dst_var->offset = src_sec->dst_off + src_var->offset;
+
+ if (glob_sym)
+ glob_sym->var_idx = dst_sec->sec_var_cnt - 1;
+ }
+ }
+
+ return 0;
+}
+
+static void *add_btf_ext_rec(struct btf_ext_sec_data *ext_data, const void *src_rec)
+{
+ void *tmp;
+
+ tmp = libbpf_reallocarray(ext_data->recs, ext_data->rec_cnt + 1, ext_data->rec_sz);
+ if (!tmp)
+ return NULL;
+ ext_data->recs = tmp;
+
+ tmp += ext_data->rec_cnt * ext_data->rec_sz;
+ memcpy(tmp, src_rec, ext_data->rec_sz);
+
+ ext_data->rec_cnt++;
+
+ return tmp;
+}
+
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_ext_info_sec *ext_sec;
+ const char *sec_name, *s;
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ int rec_sz, str_off, i;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ rec_sz = obj->btf_ext->func_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->func_info, ext_sec) {
+ struct bpf_func_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->func_info.rec_sz == 0)
+ dst_sec->func_info.rec_sz = rec_sz;
+ if (dst_sec->func_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->func_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->func_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+ }
+ }
+
+ rec_sz = obj->btf_ext->line_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->line_info, ext_sec) {
+ struct bpf_line_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->line_info.rec_sz == 0)
+ dst_sec->line_info.rec_sz = rec_sz;
+ if (dst_sec->line_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->line_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->line_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->file_name_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->file_name_off = str_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->line_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->line_off = str_off;
+
+ /* dst_rec->line_col is fine */
+ }
+ }
+
+ rec_sz = obj->btf_ext->core_relo_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->core_relo_info, ext_sec) {
+ struct bpf_core_relo *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->core_relo_info.rec_sz == 0)
+ dst_sec->core_relo_info.rec_sz = rec_sz;
+ if (dst_sec->core_relo_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->core_relo_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->core_relo_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+
+ s = btf__str_by_offset(obj->btf, src_rec->access_str_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->access_str_off = str_off;
+
+ /* dst_rec->kind is fine */
+ }
+ }
+
+ return 0;
+}
+
+int bpf_linker__finalize(struct bpf_linker *linker)
+{
+ struct dst_sec *sec;
+ size_t strs_sz;
+ const void *strs;
+ int err, i;
+
+ if (!linker->elf)
+ return libbpf_err(-EINVAL);
+
+ err = finalize_btf(linker);
+ if (err)
+ return libbpf_err(err);
+
+ /* Finalize strings */
+ strs_sz = strset__data_size(linker->strtab_strs);
+ strs = strset__data(linker->strtab_strs);
+
+ sec = &linker->secs[linker->strtab_sec_idx];
+ sec->data->d_align = 1;
+ sec->data->d_off = 0LL;
+ sec->data->d_buf = (void *)strs;
+ sec->data->d_type = ELF_T_BYTE;
+ sec->data->d_size = strs_sz;
+ sec->shdr->sh_size = strs_sz;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ /* STRTAB is handled specially above */
+ if (sec->sec_idx == linker->strtab_sec_idx)
+ continue;
+
+ /* special ephemeral sections (.ksyms, .kconfig, etc) */
+ if (!sec->scn)
+ continue;
+
+ sec->data->d_buf = sec->raw_data;
+ }
+
+ /* Finalize ELF layout */
+ if (elf_update(linker->elf, ELF_C_NULL) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to finalize ELF layout");
+ return libbpf_err(err);
+ }
+
+ /* Write out final ELF contents */
+ if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to write ELF contents");
+ return libbpf_err(err);
+ }
+
+ elf_end(linker->elf);
+ close(linker->fd);
+
+ linker->elf = NULL;
+ linker->fd = -1;
+
+ return 0;
+}
+
+static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,
+ size_t align, const void *raw_data, size_t raw_sz)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ name_off = strset__add_str(linker->strtab_strs, sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -ENOMEM;
+ data = elf_newdata(scn);
+ if (!data)
+ return -ENOMEM;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -EINVAL;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_flags = 0;
+ shdr->sh_size = raw_sz;
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = align;
+ shdr->sh_entsize = 0;
+
+ data->d_type = ELF_T_BYTE;
+ data->d_size = raw_sz;
+ data->d_buf = (void *)raw_data;
+ data->d_align = align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static int finalize_btf(struct bpf_linker *linker)
+{
+ LIBBPF_OPTS(btf_dedup_opts, opts);
+ struct btf *btf = linker->btf;
+ const void *raw_data;
+ int i, j, id, err;
+ __u32 raw_sz;
+
+ /* bail out if no BTF data was produced */
+ if (btf__type_cnt(linker->btf) == 1)
+ return 0;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (!sec->has_btf)
+ continue;
+
+ id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
+ if (id < 0) {
+ pr_warn("failed to add consolidated BTF type for datasec '%s': %d\n",
+ sec->sec_name, id);
+ return id;
+ }
+
+ for (j = 0; j < sec->sec_var_cnt; j++) {
+ struct btf_var_secinfo *vi = &sec->sec_vars[j];
+
+ if (btf__add_datasec_var_info(btf, vi->type, vi->offset, vi->size))
+ return -EINVAL;
+ }
+ }
+
+ err = finalize_btf_ext(linker);
+ if (err) {
+ pr_warn(".BTF.ext generation failed: %d\n", err);
+ return err;
+ }
+
+ opts.btf_ext = linker->btf_ext;
+ err = btf__dedup(linker->btf, &opts);
+ if (err) {
+ pr_warn("BTF dedup failed: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF section */
+ raw_data = btf__raw_data(linker->btf, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF ELF section: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF.ext section */
+ if (linker->btf_ext) {
+ raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF.ext ELF section: %d\n", err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int emit_btf_ext_data(struct bpf_linker *linker, void *output,
+ const char *sec_name, struct btf_ext_sec_data *sec_data)
+{
+ struct btf_ext_info_sec *sec_info;
+ void *cur = output;
+ int str_off;
+ size_t sz;
+
+ if (!sec_data->rec_cnt)
+ return 0;
+
+ str_off = btf__add_str(linker->btf, sec_name);
+ if (str_off < 0)
+ return -ENOMEM;
+
+ sec_info = cur;
+ sec_info->sec_name_off = str_off;
+ sec_info->num_info = sec_data->rec_cnt;
+ cur += sizeof(struct btf_ext_info_sec);
+
+ sz = sec_data->rec_cnt * sec_data->rec_sz;
+ memcpy(cur, sec_data->recs, sz);
+ cur += sz;
+
+ return cur - output;
+}
+
+static int finalize_btf_ext(struct bpf_linker *linker)
+{
+ size_t funcs_sz = 0, lines_sz = 0, core_relos_sz = 0, total_sz = 0;
+ size_t func_rec_sz = 0, line_rec_sz = 0, core_relo_rec_sz = 0;
+ struct btf_ext_header *hdr;
+ void *data, *cur;
+ int i, err, sz;
+
+ /* validate that all sections have the same .BTF.ext record sizes
+ * and calculate total data size for each type of data (func info,
+ * line info, core relos)
+ */
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (sec->func_info.rec_cnt) {
+ if (func_rec_sz == 0)
+ func_rec_sz = sec->func_info.rec_sz;
+ if (func_rec_sz != sec->func_info.rec_sz) {
+ pr_warn("mismatch in func_info record size %zu != %u\n",
+ func_rec_sz, sec->func_info.rec_sz);
+ return -EINVAL;
+ }
+
+ funcs_sz += sizeof(struct btf_ext_info_sec) + func_rec_sz * sec->func_info.rec_cnt;
+ }
+ if (sec->line_info.rec_cnt) {
+ if (line_rec_sz == 0)
+ line_rec_sz = sec->line_info.rec_sz;
+ if (line_rec_sz != sec->line_info.rec_sz) {
+ pr_warn("mismatch in line_info record size %zu != %u\n",
+ line_rec_sz, sec->line_info.rec_sz);
+ return -EINVAL;
+ }
+
+ lines_sz += sizeof(struct btf_ext_info_sec) + line_rec_sz * sec->line_info.rec_cnt;
+ }
+ if (sec->core_relo_info.rec_cnt) {
+ if (core_relo_rec_sz == 0)
+ core_relo_rec_sz = sec->core_relo_info.rec_sz;
+ if (core_relo_rec_sz != sec->core_relo_info.rec_sz) {
+ pr_warn("mismatch in core_relo_info record size %zu != %u\n",
+ core_relo_rec_sz, sec->core_relo_info.rec_sz);
+ return -EINVAL;
+ }
+
+ core_relos_sz += sizeof(struct btf_ext_info_sec) + core_relo_rec_sz * sec->core_relo_info.rec_cnt;
+ }
+ }
+
+ if (!funcs_sz && !lines_sz && !core_relos_sz)
+ return 0;
+
+ total_sz += sizeof(struct btf_ext_header);
+ if (funcs_sz) {
+ funcs_sz += sizeof(__u32); /* record size prefix */
+ total_sz += funcs_sz;
+ }
+ if (lines_sz) {
+ lines_sz += sizeof(__u32); /* record size prefix */
+ total_sz += lines_sz;
+ }
+ if (core_relos_sz) {
+ core_relos_sz += sizeof(__u32); /* record size prefix */
+ total_sz += core_relos_sz;
+ }
+
+ cur = data = calloc(1, total_sz);
+ if (!data)
+ return -ENOMEM;
+
+ hdr = cur;
+ hdr->magic = BTF_MAGIC;
+ hdr->version = BTF_VERSION;
+ hdr->flags = 0;
+ hdr->hdr_len = sizeof(struct btf_ext_header);
+ cur += sizeof(struct btf_ext_header);
+
+ /* All offsets are in bytes relative to the end of this header */
+ hdr->func_info_off = 0;
+ hdr->func_info_len = funcs_sz;
+ hdr->line_info_off = funcs_sz;
+ hdr->line_info_len = lines_sz;
+ hdr->core_relo_off = funcs_sz + lines_sz;
+ hdr->core_relo_len = core_relos_sz;
+
+ if (funcs_sz) {
+ *(__u32 *)cur = func_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (lines_sz) {
+ *(__u32 *)cur = line_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (core_relos_sz) {
+ *(__u32 *)cur = core_relo_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ linker->btf_ext = btf_ext__new(data, total_sz);
+ err = libbpf_get_error(linker->btf_ext);
+ if (err) {
+ linker->btf_ext = NULL;
+ pr_warn("failed to parse final .BTF.ext data: %d\n", err);
+ goto out;
+ }
+
+out:
+ free(data);
+ return err;
+}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 4dd73de00b6f..68a2def17175 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -4,8 +4,12 @@
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
+#include <arpa/inet.h>
#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
#include <linux/rtnetlink.h>
+#include <linux/netdev.h>
#include <sys/socket.h>
#include <errno.h>
#include <time.h>
@@ -24,13 +28,28 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
+struct xdp_link_info {
+ __u32 prog_id;
+ __u32 drv_prog_id;
+ __u32 hw_prog_id;
+ __u32 skb_prog_id;
+ __u8 attach_mode;
+};
+
struct xdp_id_md {
int ifindex;
__u32 flags;
struct xdp_link_info info;
+ __u64 feature_flags;
};
-static int libbpf_netlink_open(__u32 *nl_pid)
+struct xdp_features_md {
+ int ifindex;
+ __u32 xdp_zc_max_segs;
+ __u64 flags;
+};
+
+static int libbpf_netlink_open(__u32 *nl_pid, int proto)
{
struct sockaddr_nl sa;
socklen_t addrlen;
@@ -40,7 +59,7 @@ static int libbpf_netlink_open(__u32 *nl_pid)
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
if (sock < 0)
return -errno;
@@ -73,28 +92,86 @@ cleanup:
return ret;
}
-static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
- __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
- void *cookie)
+static void libbpf_netlink_close(int sock)
{
+ close(sock);
+}
+
+enum {
+ NL_CONT,
+ NL_NEXT,
+ NL_DONE,
+};
+
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
+static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
bool multipart = true;
struct nlmsgerr *err;
struct nlmsghdr *nh;
- char buf[4096];
int len, ret;
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
while (multipart) {
+start:
multipart = false;
- len = recv(sock, buf, sizeof(buf), 0);
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
if (len < 0) {
- ret = -errno;
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
+ if (len < 0) {
+ ret = len;
goto done;
}
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
@@ -115,111 +192,153 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
libbpf_nla_dump_errormsg(nh);
goto done;
case NLMSG_DONE:
- return 0;
+ ret = 0;
+ goto done;
default:
break;
}
if (_fn) {
ret = _fn(nh, fn, cookie);
- if (ret)
- return ret;
+ switch (ret) {
+ case NL_CONT:
+ break;
+ case NL_NEXT:
+ goto start;
+ case NL_DONE:
+ ret = 0;
+ goto done;
+ default:
+ goto done;
+ }
}
}
}
ret = 0;
done:
+ free(iov.iov_base);
return ret;
}
-static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
- __u32 flags)
+static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
+ int proto, __dump_nlmsg_t parse_msg,
+ libbpf_dump_nlmsg_t parse_attr,
+ void *cookie)
{
- int sock, seq = 0, ret;
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
__u32 nl_pid = 0;
+ int sock, ret;
- sock = libbpf_netlink_open(&nl_pid);
+ sock = libbpf_netlink_open(&nl_pid, proto);
if (sock < 0)
return sock;
+ req->nh.nlmsg_pid = 0;
+ req->nh.nlmsg_seq = time(NULL);
+
+ if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
+ parse_msg, parse_attr, cookie);
+out:
+ libbpf_netlink_close(sock);
+ return ret;
+}
+
+static int parse_genl_family_id(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct genlmsghdr *gnl = NLMSG_DATA(nh);
+ struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
+ struct nlattr *tb[CTRL_ATTR_FAMILY_ID + 1];
+ __u16 *id = cookie;
+
+ libbpf_nla_parse(tb, CTRL_ATTR_FAMILY_ID, na,
+ NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
+ if (!tb[CTRL_ATTR_FAMILY_ID])
+ return NL_CONT;
+
+ *id = libbpf_nla_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
+ return NL_DONE;
+}
+
+static int libbpf_netlink_resolve_genl_family_id(const char *name,
+ __u16 len, __u16 *id)
+{
+ struct libbpf_nla_req req = {
+ .nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN),
+ .nh.nlmsg_type = GENL_ID_CTRL,
+ .nh.nlmsg_flags = NLM_F_REQUEST,
+ .gnl.cmd = CTRL_CMD_GETFAMILY,
+ .gnl.version = 2,
+ };
+ int err;
+
+ err = nlattr_add(&req, CTRL_ATTR_FAMILY_NAME, name, len);
+ if (err < 0)
+ return err;
+
+ return libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
+ parse_genl_family_id, NULL, id);
+}
+
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+ __u32 flags)
+{
+ struct nlattr *nla;
+ int ret;
+ struct libbpf_nla_req req;
+
memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | IFLA_XDP;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
+ req.ifinfo.ifi_index = ifindex;
+
+ nla = nlattr_begin_nested(&req, IFLA_XDP);
+ if (!nla)
+ return -EMSGSIZE;
+ ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FLAGS;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
}
-
if (flags & XDP_FLAGS_REPLACE) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
+ sizeof(old_fd));
+ if (ret < 0)
+ return ret;
}
+ nlattr_end_nested(&req, nla);
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- ret = -errno;
- goto cleanup;
- }
- ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
-
-cleanup:
- close(sock);
- return ret;
+ return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
}
-int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
- const struct bpf_xdp_set_link_opts *opts)
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
{
- int old_fd = -1;
+ int old_prog_fd, err;
- if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
- return -EINVAL;
+ if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+ return libbpf_err(-EINVAL);
- if (OPTS_HAS(opts, old_fd)) {
- old_fd = OPTS_GET(opts, old_fd, -1);
+ old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+ if (old_prog_fd)
flags |= XDP_FLAGS_REPLACE;
- }
+ else
+ old_prog_fd = -1;
- return __bpf_set_link_xdp_fd_replace(ifindex, fd,
- old_fd,
- flags);
+ err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+ return libbpf_err(err);
}
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
{
- return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+ return bpf_xdp_attach(ifindex, -1, flags, opts);
}
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -231,6 +350,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh,
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+
if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
@@ -282,91 +402,521 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
-static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct genlmsghdr *gnl = NLMSG_DATA(nh);
+ struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
+ struct nlattr *tb[NETDEV_CMD_MAX + 1];
+ struct xdp_features_md *md = cookie;
+ __u32 ifindex;
+
+ libbpf_nla_parse(tb, NETDEV_CMD_MAX, na,
+ NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
+
+ if (!tb[NETDEV_A_DEV_IFINDEX] || !tb[NETDEV_A_DEV_XDP_FEATURES])
+ return NL_CONT;
+
+ ifindex = libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_IFINDEX]);
+ if (ifindex != md->ifindex)
+ return NL_CONT;
+
+ md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
+ if (tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS])
+ md->xdp_zc_max_segs =
+ libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]);
+ return NL_DONE;
+}
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
- size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
{
+ struct libbpf_nla_req req = {
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifinfo.ifi_family = AF_PACKET,
+ };
struct xdp_id_md xdp_id = {};
- int sock, ret;
- __u32 nl_pid = 0;
- __u32 mask;
+ struct xdp_features_md md = {
+ .ifindex = ifindex,
+ };
+ __u16 id;
+ int err;
- if (flags & ~XDP_FLAGS_MASK || !info_size)
- return -EINVAL;
+ if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+ return libbpf_err(-EINVAL);
- /* Check whether the single {HW,DRV,SKB} mode is set */
- flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
- mask = flags - 1;
- if (flags && flags & mask)
- return -EINVAL;
+ if (xdp_flags & ~XDP_FLAGS_MASK)
+ return libbpf_err(-EINVAL);
- sock = libbpf_netlink_open(&nl_pid);
- if (sock < 0)
- return sock;
+ /* Check whether the single {HW,DRV,SKB} mode is set */
+ xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+ if (xdp_flags & (xdp_flags - 1))
+ return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
- xdp_id.flags = flags;
+ xdp_id.flags = xdp_flags;
+
+ err = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, __dump_link_nlmsg,
+ get_xdp_info, &xdp_id);
+ if (err)
+ return libbpf_err(err);
- ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
- if (!ret) {
- size_t sz = min(info_size, sizeof(xdp_id.info));
+ OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+ OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+ OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+ OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+ OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
- memcpy(info, &xdp_id.info, sz);
- memset((void *) info + sz, 0, info_size - sz);
+ if (!OPTS_HAS(opts, feature_flags))
+ return 0;
+
+ err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
+ if (err < 0) {
+ if (err == -ENOENT) {
+ opts->feature_flags = 0;
+ goto skip_feature_flags;
+ }
+ return libbpf_err(err);
}
- close(sock);
- return ret;
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = id;
+ req.gnl.cmd = NETDEV_CMD_DEV_GET;
+ req.gnl.version = 2;
+
+ err = nlattr_add(&req, NETDEV_A_DEV_IFINDEX, &ifindex, sizeof(ifindex));
+ if (err < 0)
+ return libbpf_err(err);
+
+ err = libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
+ parse_xdp_features, NULL, &md);
+ if (err)
+ return libbpf_err(err);
+
+ OPTS_SET(opts, feature_flags, md.flags);
+ OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs);
+
+skip_feature_flags:
+ return 0;
}
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int ret;
+
+ ret = bpf_xdp_query(ifindex, flags, &opts);
+ if (ret)
+ return libbpf_err(ret);
+
flags &= XDP_FLAGS_MODES;
- if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
- return info->prog_id;
- if (flags & XDP_FLAGS_DRV_MODE)
- return info->drv_prog_id;
- if (flags & XDP_FLAGS_HW_MODE)
- return info->hw_prog_id;
- if (flags & XDP_FLAGS_SKB_MODE)
- return info->skb_prog_id;
+ if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+ *prog_id = opts.prog_id;
+ else if (flags & XDP_FLAGS_DRV_MODE)
+ *prog_id = opts.drv_prog_id;
+ else if (flags & XDP_FLAGS_HW_MODE)
+ *prog_id = opts.hw_prog_id;
+ else if (flags & XDP_FLAGS_SKB_MODE)
+ *prog_id = opts.skb_prog_id;
+ else
+ *prog_id = 0;
return 0;
}
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+
+static int clsact_config(struct libbpf_nla_req *req)
{
- struct xdp_link_info info;
+ req->tc.tcm_parent = TC_H_CLSACT;
+ req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+
+ return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
+}
+
+static int attach_point_to_config(struct bpf_tc_hook *hook,
+ qdisc_config_t *config)
+{
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ if (OPTS_GET(hook, parent, 0))
+ return -EINVAL;
+ *config = &clsact_config;
+ return 0;
+ case BPF_TC_CUSTOM:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
+ __u32 *parent)
+{
+ switch (attach_point) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ if (*parent)
+ return -EINVAL;
+ *parent = TC_H_MAKE(TC_H_CLSACT,
+ attach_point == BPF_TC_INGRESS ?
+ TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
+ break;
+ case BPF_TC_CUSTOM:
+ if (!*parent)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
+{
+ qdisc_config_t config;
int ret;
+ struct libbpf_nla_req req;
- ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
- if (!ret)
- *prog_id = get_xdp_id(&info, flags);
+ ret = attach_point_to_config(hook, &config);
+ if (ret < 0)
+ return ret;
- return ret;
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
+ req.nh.nlmsg_type = cmd;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
+
+ ret = config(&req);
+ if (ret < 0)
+ return ret;
+
+ return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
}
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
{
- struct {
- struct nlmsghdr nlh;
- struct ifinfomsg ifm;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nlh.nlmsg_type = RTM_GETLINK,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .ifm.ifi_family = AF_PACKET,
- };
- int seq = time(NULL);
+ return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
+}
+
+static int tc_qdisc_delete(struct bpf_tc_hook *hook)
+{
+ return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
+}
+
+int bpf_tc_hook_create(struct bpf_tc_hook *hook)
+{
+ int ret;
+
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ ret = tc_qdisc_create_excl(hook);
+ return libbpf_err(ret);
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush);
+
+int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
+{
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ return libbpf_err(tc_qdisc_delete(hook));
+ case BPF_TC_CUSTOM:
+ return libbpf_err(-EOPNOTSUPP);
+ default:
+ return libbpf_err(-EINVAL);
+ }
+}
+
+struct bpf_cb_ctx {
+ struct bpf_tc_opts *opts;
+ bool processed;
+};
+
+static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
+ bool unicast)
+{
+ struct nlattr *tbb[TCA_BPF_MAX + 1];
+ struct bpf_cb_ctx *info = cookie;
+
+ if (!info || !info->opts)
+ return -EINVAL;
+ if (unicast && info->processed)
+ return -EINVAL;
+ if (!tb[TCA_OPTIONS])
+ return NL_CONT;
+
+ libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
+ if (!tbb[TCA_BPF_ID])
+ return -EINVAL;
+
+ OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
+ OPTS_SET(info->opts, handle, tc->tcm_handle);
+ OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
+
+ info->processed = true;
+ return unicast ? NL_NEXT : NL_DONE;
+}
+
+static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct tcmsg *tc = NLMSG_DATA(nh);
+ struct nlattr *tb[TCA_MAX + 1];
+
+ libbpf_nla_parse(tb, TCA_MAX,
+ (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
+ NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
+ if (!tb[TCA_KIND])
+ return NL_CONT;
+ return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
+}
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
+{
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ char name[256];
+ int len, ret;
+
+ memset(&info, 0, info_len);
+ ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (ret < 0)
+ return ret;
+
+ ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
+ len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
+ if (len < 0)
return -errno;
+ if (len >= sizeof(name))
+ return -ENAMETOOLONG;
+ return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
+}
+
+int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+ struct nlattr *nla;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || !prog_fd || prog_id)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+ if (flags & ~BPF_TC_F_REPLACE)
+ return libbpf_err(-EINVAL);
+
+ flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
+ NLM_F_ECHO | flags;
+ req.nh.nlmsg_type = RTM_NEWTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nla = nlattr_begin_nested(&req, TCA_OPTIONS);
+ if (!nla)
+ return libbpf_err(-EMSGSIZE);
+ ret = tc_add_fd_and_name(&req, prog_fd);
+ if (ret < 0)
+ return libbpf_err(ret);
+ bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
+ ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nlattr_end_nested(&req, nla);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
+ &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush)
+{
+ __u32 protocol = 0, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct libbpf_nla_req req;
+
+ if (!hook ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return -EINVAL;
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
- dump_link_nlmsg, cookie);
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id)
+ return -EINVAL;
+ if (priority > UINT16_MAX)
+ return -EINVAL;
+ if (!flush) {
+ if (!handle || !priority)
+ return -EINVAL;
+ protocol = ETH_P_ALL;
+ } else {
+ if (handle || priority)
+ return -EINVAL;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_DELTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ if (!flush) {
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+ }
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return ret;
+ req.tc.tcm_parent = parent;
+
+ if (!flush) {
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return ret;
+ }
+
+ return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
+}
+
+int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts)
+{
+ int ret;
+
+ if (!opts)
+ return libbpf_err(-EINVAL);
+
+ ret = __bpf_tc_detach(hook, opts, false);
+ return libbpf_err(ret);
+}
+
+int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id ||
+ !handle || !priority)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_GETTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
+ &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index b607fa9852b1..975e265eab3b 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -27,12 +27,12 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
int totlen = NLA_ALIGN(nla->nla_len);
*remaining -= totlen;
- return (struct nlattr *) ((char *) nla + totlen);
+ return (struct nlattr *)((void *)nla + totlen);
}
static int nla_ok(const struct nlattr *nla, int remaining)
{
- return remaining >= sizeof(*nla) &&
+ return remaining >= (int)sizeof(*nla) &&
nla->nla_len >= sizeof(*nla) &&
nla->nla_len <= remaining;
}
@@ -178,7 +178,7 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
hlen += nlmsg_len(&err->msg);
attr = (struct nlattr *) ((void *) err + hlen);
- alen = nlh->nlmsg_len - hlen;
+ alen = (void *)nlh + nlh->nlmsg_len - (void *)attr;
if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
extack_policy) != 0) {
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 6cc3ac91690f..d92d1c1de700 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -10,7 +10,12 @@
#define __LIBBPF_NLATTR_H
#include <stdint.h>
+#include <string.h>
+#include <errno.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+
/* avoid multiple definition of netlink features */
#define __LINUX_NETLINK_H
@@ -49,6 +54,16 @@ struct libbpf_nla_policy {
uint16_t maxlen;
};
+struct libbpf_nla_req {
+ struct nlmsghdr nh;
+ union {
+ struct ifinfomsg ifinfo;
+ struct tcmsg tc;
+ struct genlmsghdr gnl;
+ };
+ char buf[128];
+};
+
/**
* @ingroup attr
* Iterate over a stream of attributes
@@ -68,7 +83,7 @@ struct libbpf_nla_policy {
*/
static inline void *libbpf_nla_data(const struct nlattr *nla)
{
- return (char *) nla + NLA_HDRLEN;
+ return (void *)nla + NLA_HDRLEN;
}
static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
@@ -76,11 +91,21 @@ static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
return *(uint8_t *)libbpf_nla_data(nla);
}
+static inline uint16_t libbpf_nla_getattr_u16(const struct nlattr *nla)
+{
+ return *(uint16_t *)libbpf_nla_data(nla);
+}
+
static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
{
return *(uint32_t *)libbpf_nla_data(nla);
}
+static inline uint64_t libbpf_nla_getattr_u64(const struct nlattr *nla)
+{
+ return *(uint64_t *)libbpf_nla_data(nla);
+}
+
static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
{
return (const char *)libbpf_nla_data(nla);
@@ -103,4 +128,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+static inline struct nlattr *nla_data(struct nlattr *nla)
+{
+ return (struct nlattr *)((void *)nla + NLA_HDRLEN);
+}
+
+static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
+{
+ return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
+}
+
+static inline int nlattr_add(struct libbpf_nla_req *req, int type,
+ const void *data, int len)
+{
+ struct nlattr *nla;
+
+ if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req))
+ return -EMSGSIZE;
+ if (!!data != !!len)
+ return -EINVAL;
+
+ nla = req_tail(req);
+ nla->nla_type = type;
+ nla->nla_len = NLA_HDRLEN + len;
+ if (data)
+ memcpy(nla_data(nla), data, len);
+ req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len);
+ return 0;
+}
+
+static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type)
+{
+ struct nlattr *tail;
+
+ tail = req_tail(req);
+ if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0))
+ return NULL;
+ return tail;
+}
+
+static inline void nlattr_end_nested(struct libbpf_nla_req *req,
+ struct nlattr *tail)
+{
+ tail->nla_len = (void *)req_tail(req) - (void *)tail;
+}
+
#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
new file mode 100644
index 000000000000..63a4d5ad12d1
--- /dev/null
+++ b/tools/lib/bpf/relo_core.c
@@ -0,0 +1,1687 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#ifdef __KERNEL__
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/string.h>
+#include <linux/bpf_verifier.h>
+#include "relo_core.h"
+
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ return btf_type_str(t);
+}
+
+static bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
+{
+ return btf_type_skip_modifiers(btf, id, res_id);
+}
+
+static const char *btf__name_by_offset(const struct btf *btf, u32 offset)
+{
+ return btf_name_by_offset(btf, offset);
+}
+
+static s64 btf__resolve_size(const struct btf *btf, u32 type_id)
+{
+ const struct btf_type *t;
+ int size;
+
+ t = btf_type_by_id(btf, type_id);
+ t = btf_resolve_size(btf, t, &size);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ return size;
+}
+
+enum libbpf_print_level {
+ LIBBPF_WARN,
+ LIBBPF_INFO,
+ LIBBPF_DEBUG,
+};
+
+#undef pr_warn
+#undef pr_info
+#undef pr_debug
+#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__)
+#else
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+#endif
+
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf_type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_CORE_FIELD_EXISTS: return "field_exists";
+ case BPF_CORE_FIELD_SIGNED: return "signed";
+ case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_CORE_TYPE_EXISTS: return "type_exists";
+ case BPF_CORE_TYPE_MATCHES: return "type_matches";
+ case BPF_CORE_TYPE_SIZE: return "type_size";
+ case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_CORE_FIELD_BYTE_OFFSET:
+ case BPF_CORE_FIELD_BYTE_SIZE:
+ case BPF_CORE_FIELD_EXISTS:
+ case BPF_CORE_FIELD_SIGNED:
+ case BPF_CORE_FIELD_LSHIFT_U64:
+ case BPF_CORE_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_CORE_TYPE_ID_LOCAL:
+ case BPF_CORE_TYPE_ID_TARGET:
+ case BPF_CORE_TYPE_EXISTS:
+ case BPF_CORE_TYPE_MATCHES:
+ case BPF_CORE_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_CORE_ENUMVAL_EXISTS:
+ case BPF_CORE_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf_type_by_id(local_btf, local_id);
+ targ_type = btf_type_by_id(targ_btf, targ_id);
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ if (level <= 0)
+ return -EINVAL;
+
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+ level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - initial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+ const struct bpf_core_relo *relo,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ const char *name, *spec_str;
+ __u32 id, name_off;
+ __s64 sz;
+
+ spec_str = btf__name_by_offset(btf, relo->access_str_off);
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+ spec->root_type_id = relo->type_id;
+ spec->relo_kind = relo->kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo->kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ t = skip_mods_and_typedefs(btf, relo->type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
+ spec->len++;
+
+ if (core_relo_is_enumval_based(relo->kind)) {
+ if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off
+ : btf_enum64(t)[access_idx].name_off;
+ acc->name = btf__name_by_offset(btf, name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo->kind))
+ return -EINVAL;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+ bool flex;
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - any two FLOATs are always compatible;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_FLOAT:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf_type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+ __u32 name_off;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES)
+ return bpf_core_types_match(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ else
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_any_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0; i < btf_vlen(targ_type); i++) {
+ if (btf_is_enum(targ_type))
+ name_off = btf_enum(targ_type)[i].name_off;
+ else
+ name_off = btf_enum64(targ_type)[i].name_off;
+
+ targ_name = btf__name_by_offset(targ_spec->btf, name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+ bool flex;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u64 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
+{
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ *field_sz = 0;
+
+ if (relo->kind == BPF_CORE_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf_type_by_id(spec->btf, acc->type_id);
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
+ } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, field_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_CORE_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
+ break;
+ case BPF_CORE_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_CORE_FIELD_SIGNED:
+ *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) ||
+ (btf_is_int(mt) && (btf_int_encoding(mt) & BTF_INT_SIGNED));
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_CORE_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_CORE_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_CORE_FIELD_EXISTS:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u64 *val, bool *validate)
+{
+ __s64 sz;
+
+ /* by default, always check expected value in bpf_insn */
+ if (validate)
+ *validate = true;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_CORE_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ /* type ID, embedded in bpf_insn, might change during linking,
+ * so enforcing it is pointless
+ */
+ if (validate)
+ *validate = false;
+ break;
+ case BPF_CORE_TYPE_EXISTS:
+ case BPF_CORE_TYPE_MATCHES:
+ *val = 1;
+ break;
+ case BPF_CORE_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_CORE_TYPE_ID_LOCAL:
+ /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u64 *val)
+{
+ const struct btf_type *t;
+
+ switch (relo->kind) {
+ case BPF_CORE_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_CORE_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
+ if (btf_is_enum(t))
+ *val = btf_enum(t)[spec->spec[0].idx].val;
+ else
+ *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf_type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog_name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx, const struct bpf_core_relo *relo,
+ int relo_idx, const struct bpf_core_relo_res *res)
+{
+ __u64 orig_val, new_val;
+ __u8 class;
+
+ class = BPF_CLASS(insn->code);
+
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
+ */
+ if (is_ldimm64_insn(insn))
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+ return 0;
+ }
+
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (res->validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n",
+ prog_name, relo_idx,
+ insn_idx, insn->imm, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx,
+ (unsigned long long)orig_val, (unsigned long long)new_val);
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)new_val);
+ return -ERANGE;
+ }
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog_name, relo_idx, insn_idx);
+ goto poison;
+ }
+
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx, (unsigned long long)orig_val,
+ (unsigned long long)new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog_name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
+ break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64_insn(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog_name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n",
+ prog_name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ (unsigned long long)orig_val, (unsigned long long)new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = new_val >> 32;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n",
+ prog_name, relo_idx, insn_idx,
+ (unsigned long long)imm, (unsigned long long)new_val);
+ break;
+ }
+ default:
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog_name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const char *s;
+ __u32 type_id;
+ int i, len = 0;
+
+#define append_buf(fmt, args...) \
+ ({ \
+ int r; \
+ r = snprintf(buf, buf_sz, fmt, ##args); \
+ len += r; \
+ if (r >= buf_sz) \
+ r = buf_sz; \
+ buf += r; \
+ buf_sz -= r; \
+ })
+
+ type_id = spec->root_type_id;
+ t = btf_type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+
+ append_buf("<%s> [%u] %s %s",
+ core_relo_kind_str(spec->relo_kind),
+ type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+ if (core_relo_is_type_based(spec->relo_kind))
+ return len;
+
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e;
+ const char *fmt_str;
+
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u";
+ append_buf(fmt_str, s, e->val);
+ } else {
+ const struct btf_enum64 *e;
+ const char *fmt_str;
+
+ e = btf_enum64(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu";
+ append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e));
+ }
+ return len;
+ }
+
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ append_buf(".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ append_buf("[%u]", spec->spec[i].idx);
+ }
+
+ append_buf(" (");
+ for (i = 0; i < spec->raw_len; i++)
+ append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ append_buf(" @ offset %u)", spec->bit_offset / 8);
+ return len;
+ }
+
+ return len;
+#undef append_buf
+}
+
+/*
+ * Calculate CO-RE relocation target result.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accommodate
+ * imperfection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such a situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_core_relo associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_calc_relo_insn(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands,
+ struct bpf_core_spec *specs_scratch,
+ struct bpf_core_relo_res *targ_res)
+{
+ struct bpf_core_spec *local_spec = &specs_scratch[0];
+ struct bpf_core_spec *cand_spec = &specs_scratch[1];
+ struct bpf_core_spec *targ_spec = &specs_scratch[2];
+ struct bpf_core_relo_res cand_res;
+ const struct btf_type *local_type;
+ const char *local_name;
+ __u32 local_id;
+ char spec_buf[256];
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf_type_by_id(local_btf, local_id);
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (!local_name)
+ return -EINVAL;
+
+ err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec);
+ if (err) {
+ const char *spec_str;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str ?: "<?>", err);
+ return -EINVAL;
+ }
+
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec);
+ pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf);
+
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
+ /* bpf_insn's imm value could get out of sync during linking */
+ memset(targ_res, 0, sizeof(*targ_res));
+ targ_res->validate = false;
+ targ_res->poison = false;
+ targ_res->orig_val = local_spec->root_type_id;
+ targ_res->new_val = local_spec->root_type_id;
+ return 0;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(local_name)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0, j = 0; i < cands->len; i++) {
+ err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
+ cands->cands[i].id, cand_spec);
+ if (err < 0) {
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ",
+ prog_name, relo_idx, i, spec_buf, err);
+ return err;
+ }
+
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+ pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf);
+
+ if (err == 0)
+ continue;
+
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res);
+ if (err)
+ return err;
+
+ if (j == 0) {
+ *targ_res = cand_res;
+ *targ_spec = *cand_spec;
+ } else if (cand_spec->bit_offset != targ_spec->bit_offset) {
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec->bit_offset,
+ targ_spec->bit_offset);
+ return -EINVAL;
+ } else if (cand_res.poison != targ_res->poison ||
+ cand_res.new_val != targ_res->new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n",
+ prog_name, relo_idx,
+ cand_res.poison ? "failure" : "success",
+ (unsigned long long)cand_res.new_val,
+ targ_res->poison ? "failure" : "success",
+ (unsigned long long)targ_res->new_val);
+ return -EINVAL;
+ }
+
+ cands->cands[j++] = cands->cands[i];
+ }
+
+ /*
+ * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field
+ * existence checks or kernel version/config checks, it's expected
+ * that we might not find any candidates. In this case, if field
+ * wasn't found in any candidate, the list of candidates shouldn't
+ * change at all, we'll just handle relocating appropriately,
+ * depending on relo's kind.
+ */
+ if (j > 0)
+ cands->len = j;
+
+ /*
+ * If no candidates were found, it might be both a programmer error,
+ * as well as expected case, depending whether instruction w/
+ * relocation is guarded in some way that makes it unreachable (dead
+ * code) if relocation can't be resolved. This is handled in
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
+ * BPF helper call insn (using invalid helper ID). If that instruction
+ * is indeed unreachable, then it will be ignored and eliminated by
+ * verifier. If it was an error, then verifier will complain and point
+ * to a specific instruction number in its log.
+ */
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog_name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off,
+ const struct btf *targ_btf, size_t targ_name_off)
+{
+ const char *local_n, *targ_n;
+ size_t local_len, targ_len;
+
+ local_n = btf__name_by_offset(local_btf, local_name_off);
+ targ_n = btf__name_by_offset(targ_btf, targ_name_off);
+
+ if (str_is_empty(targ_n))
+ return str_is_empty(local_n);
+
+ targ_len = bpf_core_essential_name_len(targ_n);
+ local_len = bpf_core_essential_name_len(local_n);
+
+ return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0;
+}
+
+static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t,
+ const struct btf *targ_btf, const struct btf_type *targ_t)
+{
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, j;
+
+ if (local_t->size != targ_t->size)
+ return 0;
+
+ if (local_vlen > targ_vlen)
+ return 0;
+
+ /* iterate over the local enum's variants and make sure each has
+ * a symbolic name correspondent in the target
+ */
+ for (i = 0; i < local_vlen; i++) {
+ bool matched = false;
+ __u32 local_n_off, targ_n_off;
+
+ local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off :
+ btf_enum64(local_t)[i].name_off;
+
+ for (j = 0; j < targ_vlen; j++) {
+ targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off :
+ btf_enum64(targ_t)[j].name_off;
+
+ if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return 0;
+ }
+ return 1;
+}
+
+static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t,
+ const struct btf *targ_btf, const struct btf_type *targ_t,
+ bool behind_ptr, int level)
+{
+ const struct btf_member *local_m = btf_members(local_t);
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, j, err;
+
+ if (local_vlen > targ_vlen)
+ return 0;
+
+ /* check that all local members have a match in the target */
+ for (i = 0; i < local_vlen; i++, local_m++) {
+ const struct btf_member *targ_m = btf_members(targ_t);
+ bool matched = false;
+
+ for (j = 0; j < targ_vlen; j++, targ_m++) {
+ if (!bpf_core_names_match(local_btf, local_m->name_off,
+ targ_btf, targ_m->name_off))
+ continue;
+
+ err = __bpf_core_types_match(local_btf, local_m->type, targ_btf,
+ targ_m->type, behind_ptr, level - 1);
+ if (err < 0)
+ return err;
+ if (err > 0) {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return 0;
+ }
+ return 1;
+}
+
+/* Check that two types "match". This function assumes that root types were
+ * already checked for name match.
+ *
+ * The matching relation is defined as follows:
+ * - modifiers and typedefs are stripped (and, hence, effectively ignored)
+ * - generally speaking types need to be of same kind (struct vs. struct, union
+ * vs. union, etc.)
+ * - exceptions are struct/union behind a pointer which could also match a
+ * forward declaration of a struct or union, respectively, and enum vs.
+ * enum64 (see below)
+ * Then, depending on type:
+ * - integers:
+ * - match if size and signedness match
+ * - arrays & pointers:
+ * - target types are recursively matched
+ * - structs & unions:
+ * - local members need to exist in target with the same name
+ * - for each member we recursively check match unless it is already behind a
+ * pointer, in which case we only check matching names and compatible kind
+ * - enums:
+ * - local variants have to have a match in target by symbolic name (but not
+ * numeric value)
+ * - size has to match (but enum may match enum64 and vice versa)
+ * - function pointers:
+ * - number and position of arguments in local type has to match target
+ * - for each argument and the return value we recursively check match
+ */
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id, bool behind_ptr, int level)
+{
+ const struct btf_type *local_t, *targ_t;
+ int depth = 32; /* max recursion depth */
+ __u16 local_k, targ_k;
+
+ if (level <= 0)
+ return -EINVAL;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_t || !targ_t)
+ return -EINVAL;
+
+ /* While the name check happens after typedefs are skipped, root-level
+ * typedefs would still be name-matched as that's the contract with
+ * callers.
+ */
+ if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off))
+ return 0;
+
+ local_k = btf_kind(local_t);
+ targ_k = btf_kind(targ_t);
+
+ switch (local_k) {
+ case BTF_KIND_UNKN:
+ return local_k == targ_k;
+ case BTF_KIND_FWD: {
+ bool local_f = BTF_INFO_KFLAG(local_t->info);
+
+ if (behind_ptr) {
+ if (local_k == targ_k)
+ return local_f == BTF_INFO_KFLAG(targ_t->info);
+
+ /* for forward declarations kflag dictates whether the
+ * target is a struct (0) or union (1)
+ */
+ return (targ_k == BTF_KIND_STRUCT && !local_f) ||
+ (targ_k == BTF_KIND_UNION && local_f);
+ } else {
+ if (local_k != targ_k)
+ return 0;
+
+ /* match if the forward declaration is for the same kind */
+ return local_f == BTF_INFO_KFLAG(targ_t->info);
+ }
+ }
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ if (!btf_is_any_enum(targ_t))
+ return 0;
+
+ return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ if (behind_ptr) {
+ bool targ_f = BTF_INFO_KFLAG(targ_t->info);
+
+ if (local_k == targ_k)
+ return 1;
+
+ if (targ_k != BTF_KIND_FWD)
+ return 0;
+
+ return (local_k == BTF_KIND_UNION) == targ_f;
+ } else {
+ if (local_k != targ_k)
+ return 0;
+
+ return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t,
+ behind_ptr, level);
+ }
+ case BTF_KIND_INT: {
+ __u8 local_sgn;
+ __u8 targ_sgn;
+
+ if (local_k != targ_k)
+ return 0;
+
+ local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED;
+ targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED;
+
+ return local_t->size == targ_t->size && local_sgn == targ_sgn;
+ }
+ case BTF_KIND_PTR:
+ if (local_k != targ_k)
+ return 0;
+
+ behind_ptr = true;
+
+ local_id = local_t->type;
+ targ_id = targ_t->type;
+ goto recur;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *local_array = btf_array(local_t);
+ const struct btf_array *targ_array = btf_array(targ_t);
+
+ if (local_k != targ_k)
+ return 0;
+
+ if (local_array->nelems != targ_array->nelems)
+ return 0;
+
+ local_id = local_array->type;
+ targ_id = targ_array->type;
+ goto recur;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_t);
+ struct btf_param *targ_p = btf_params(targ_t);
+ __u16 local_vlen = btf_vlen(local_t);
+ __u16 targ_vlen = btf_vlen(targ_t);
+ int i, err;
+
+ if (local_k != targ_k)
+ return 0;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ err = __bpf_core_types_match(local_btf, local_p->type, targ_btf,
+ targ_p->type, behind_ptr, level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ local_id = local_t->type;
+ targ_id = targ_t->type;
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_t), local_id, targ_id);
+ return 0;
+ }
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
new file mode 100644
index 000000000000..1c0566daf8e8
--- /dev/null
+++ b/tools/lib/bpf/relo_core.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+#include <linux/bpf.h>
+
+struct bpf_core_cand {
+ const struct btf *btf;
+ __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+ struct bpf_core_cand *cands;
+ int len;
+};
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+struct bpf_core_relo_res {
+ /* expected value in the instruction, unless validate == false */
+ __u64 orig_val;
+ /* new value that needs to be patched up to */
+ __u64 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id);
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id, bool behind_ptr, int level);
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+ __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+
+int bpf_core_calc_relo_insn(const char *prog_name,
+ const struct bpf_core_relo *relo, int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands,
+ struct bpf_core_spec *specs_scratch,
+ struct bpf_core_relo_res *targ_res);
+
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx, const struct bpf_core_relo *relo,
+ int relo_idx, const struct bpf_core_relo_res *res);
+
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+ const struct bpf_core_relo *relo,
+ struct bpf_core_spec *spec);
+
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec);
+
+#endif
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..aacb64278a01 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,6 +16,7 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
+#include <time.h>
#include "libbpf.h"
#include "libbpf_internal.h"
@@ -33,13 +34,30 @@ struct ring {
struct ring_buffer {
struct epoll_event *events;
- struct ring *rings;
+ struct ring **rings;
size_t page_size;
int epoll_fd;
int ring_cnt;
};
-static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+struct user_ring_buffer {
+ struct epoll_event event;
+ unsigned long *consumer_pos;
+ unsigned long *producer_pos;
+ void *data;
+ unsigned long mask;
+ size_t page_size;
+ int map_fd;
+ int epoll_fd;
+};
+
+/* 8-byte ring buffer header structure */
+struct ringbuf_hdr {
+ __u32 len;
+ __u32 pad;
+};
+
+static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r)
{
if (r->consumer_pos) {
munmap(r->consumer_pos, rb->page_size);
@@ -49,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
r->producer_pos = NULL;
}
+
+ free(r);
}
/* Add extra RINGBUF maps to this ring buffer manager */
@@ -59,37 +79,40 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
__u32 len = sizeof(info);
struct epoll_event *e;
struct ring *r;
+ __u64 mmap_sz;
void *tmp;
int err;
memset(&info, 0, sizeof(info));
- err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+ err = bpf_map_get_info_by_fd(map_fd, &info, &len);
if (err) {
err = -errno;
pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
if (info.type != BPF_MAP_TYPE_RINGBUF) {
pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
map_fd);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->rings = tmp;
tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->events = tmp;
- r = &rb->rings[rb->ring_cnt];
- memset(r, 0, sizeof(*r));
+ r = calloc(1, sizeof(*r));
+ if (!r)
+ return libbpf_err(-ENOMEM);
+ rb->rings[rb->ring_cnt] = r;
r->map_fd = map_fd;
r->sample_cb = sample_cb;
@@ -97,28 +120,31 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
r->mask = info.max_entries - 1;
/* Map writable consumer page */
- tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
- map_fd, 0);
+ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
map_fd, err);
- return err;
+ goto err_out;
}
r->consumer_pos = tmp;
/* Map read-only producer page and data pages. We map twice as big
* data size to allow simple reading of samples that wrap around the
* end of a ring buffer. See kernel implementation for details.
- * */
- tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
- MAP_SHARED, map_fd, rb->page_size);
+ */
+ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+ if (mmap_sz != (__u64)(size_t)mmap_sz) {
+ err = -E2BIG;
+ pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
+ goto err_out;
+ }
+ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
- ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
map_fd, err);
- return err;
+ goto err_out;
}
r->producer_pos = tmp;
r->data = tmp + rb->page_size;
@@ -130,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
e->data.fd = rb->ring_cnt;
if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
err = -errno;
- ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
map_fd, err);
- return err;
+ goto err_out;
}
rb->ring_cnt++;
return 0;
+
+err_out:
+ ringbuf_free_ring(rb, r);
+ return libbpf_err(err);
}
void ring_buffer__free(struct ring_buffer *rb)
@@ -148,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb)
return;
for (i = 0; i < rb->ring_cnt; ++i)
- ringbuf_unmap_ring(rb, &rb->rings[i]);
+ ringbuf_free_ring(rb, rb->rings[i]);
if (rb->epoll_fd >= 0)
close(rb->epoll_fd);
@@ -165,11 +194,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
int err;
if (!OPTS_VALID(opts, ring_buffer_opts))
- return NULL;
+ return errno = EINVAL, NULL;
rb = calloc(1, sizeof(*rb));
if (!rb)
- return NULL;
+ return errno = ENOMEM, NULL;
rb->page_size = getpagesize();
@@ -188,7 +217,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
err_out:
ring_buffer__free(rb);
- return NULL;
+ return errno = -err, NULL;
}
static inline int roundup_len(__u32 len)
@@ -202,9 +231,11 @@ static inline int roundup_len(__u32 len)
return (len + 7) / 8 * 8;
}
-static int ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring *r)
{
- int *len_ptr, len, err, cnt = 0;
+ int *len_ptr, len, err;
+ /* 64-bit to avoid overflow in case of extreme application behavior */
+ int64_t cnt = 0;
unsigned long cons_pos, prod_pos;
bool got_new_data;
void *sample;
@@ -227,7 +258,7 @@ static int ringbuf_process_ring(struct ring* r)
if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
err = r->sample_cb(r->ctx, sample, len);
- if (err) {
+ if (err < 0) {
/* update consumer pos and bail out */
smp_store_release(r->consumer_pos,
cons_pos);
@@ -244,41 +275,372 @@ done:
}
/* Consume available ring buffer(s) data without event polling.
- * Returns number of records consumed across all registered ring buffers, or
- * negative number if any of the callbacks return error.
+ * Returns number of records consumed across all registered ring buffers (or
+ * INT_MAX, whichever is less), or negative number if any of the callbacks
+ * return error.
*/
int ring_buffer__consume(struct ring_buffer *rb)
{
- int i, err, res = 0;
+ int64_t err, res = 0;
+ int i;
for (i = 0; i < rb->ring_cnt; i++) {
- struct ring *ring = &rb->rings[i];
+ struct ring *ring = rb->rings[i];
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
+ return libbpf_err(err);
res += err;
}
+ if (res > INT_MAX)
+ return INT_MAX;
return res;
}
/* Poll for available data and consume records, if any are available.
- * Returns number of records consumed, or negative number, if any of the
- * registered callbacks returned error.
+ * Returns number of records consumed (or INT_MAX, whichever is less), or
+ * negative number, if any of the registered callbacks returned error.
*/
int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
{
- int i, cnt, err, res = 0;
+ int i, cnt;
+ int64_t err, res = 0;
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ if (cnt < 0)
+ return libbpf_err(-errno);
+
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
- struct ring *ring = &rb->rings[ring_id];
+ struct ring *ring = rb->rings[ring_id];
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
- res += cnt;
+ return libbpf_err(err);
+ res += err;
+ }
+ if (res > INT_MAX)
+ return INT_MAX;
+ return res;
+}
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) */
+int ring_buffer__epoll_fd(const struct ring_buffer *rb)
+{
+ return rb->epoll_fd;
+}
+
+struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx)
+{
+ if (idx >= rb->ring_cnt)
+ return errno = ERANGE, NULL;
+
+ return rb->rings[idx];
+}
+
+unsigned long ring__consumer_pos(const struct ring *r)
+{
+ /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */
+ return smp_load_acquire(r->consumer_pos);
+}
+
+unsigned long ring__producer_pos(const struct ring *r)
+{
+ /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in
+ * the kernel.
+ */
+ return smp_load_acquire(r->producer_pos);
+}
+
+size_t ring__avail_data_size(const struct ring *r)
+{
+ unsigned long cons_pos, prod_pos;
+
+ cons_pos = ring__consumer_pos(r);
+ prod_pos = ring__producer_pos(r);
+ return prod_pos - cons_pos;
+}
+
+size_t ring__size(const struct ring *r)
+{
+ return r->mask + 1;
+}
+
+int ring__map_fd(const struct ring *r)
+{
+ return r->map_fd;
+}
+
+int ring__consume(struct ring *r)
+{
+ int64_t res;
+
+ res = ringbuf_process_ring(r);
+ if (res < 0)
+ return libbpf_err(res);
+
+ return res > INT_MAX ? INT_MAX : res;
+}
+
+static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
+{
+ if (rb->consumer_pos) {
+ munmap(rb->consumer_pos, rb->page_size);
+ rb->consumer_pos = NULL;
+ }
+ if (rb->producer_pos) {
+ munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
+ rb->producer_pos = NULL;
+ }
+}
+
+void user_ring_buffer__free(struct user_ring_buffer *rb)
+{
+ if (!rb)
+ return;
+
+ user_ringbuf_unmap_ring(rb);
+
+ if (rb->epoll_fd >= 0)
+ close(rb->epoll_fd);
+
+ free(rb);
+}
+
+static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
+{
+ struct bpf_map_info info;
+ __u32 len = sizeof(info);
+ __u64 mmap_sz;
+ void *tmp;
+ struct epoll_event *rb_epoll;
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_map_get_info_by_fd(map_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
+ return err;
}
- return cnt < 0 ? -errno : res;
+
+ if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
+ pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
+ return -EINVAL;
+ }
+
+ rb->map_fd = map_fd;
+ rb->mask = info.max_entries - 1;
+
+ /* Map read-only consumer page */
+ tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ rb->consumer_pos = tmp;
+
+ /* Map read-write the producer page and data pages. We map the data
+ * region as twice the total size of the ring buffer to allow the
+ * simple reading and writing of samples that wrap around the end of
+ * the buffer. See the kernel implementation for details.
+ */
+ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+ if (mmap_sz != (__u64)(size_t)mmap_sz) {
+ pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries);
+ return -E2BIG;
+ }
+ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ rb->producer_pos = tmp;
+ rb->data = tmp + rb->page_size;
+
+ rb_epoll = &rb->event;
+ rb_epoll->events = EPOLLOUT;
+ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
+ return err;
+ }
+
+ return 0;
+}
+
+struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
+{
+ struct user_ring_buffer *rb;
+ int err;
+
+ if (!OPTS_VALID(opts, user_ring_buffer_opts))
+ return errno = EINVAL, NULL;
+
+ rb = calloc(1, sizeof(*rb));
+ if (!rb)
+ return errno = ENOMEM, NULL;
+
+ rb->page_size = getpagesize();
+
+ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (rb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
+ goto err_out;
+ }
+
+ err = user_ringbuf_map(rb, map_fd);
+ if (err)
+ goto err_out;
+
+ return rb;
+
+err_out:
+ user_ring_buffer__free(rb);
+ return errno = -err, NULL;
+}
+
+static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
+{
+ __u32 new_len;
+ struct ringbuf_hdr *hdr;
+ uintptr_t hdr_offset;
+
+ hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
+ hdr = rb->data + (hdr_offset & rb->mask);
+
+ new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
+ if (discard)
+ new_len |= BPF_RINGBUF_DISCARD_BIT;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ __atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
+}
+
+void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
+{
+ user_ringbuf_commit(rb, sample, true);
+}
+
+void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
+{
+ user_ringbuf_commit(rb, sample, false);
+}
+
+void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
+{
+ __u32 avail_size, total_size, max_size;
+ /* 64-bit to avoid overflow in case of extreme application behavior */
+ __u64 cons_pos, prod_pos;
+ struct ringbuf_hdr *hdr;
+
+ /* The top two bits are used as special flags */
+ if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT))
+ return errno = E2BIG, NULL;
+
+ /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ cons_pos = smp_load_acquire(rb->consumer_pos);
+ /* Synchronizes with smp_store_release() in user_ringbuf_commit() */
+ prod_pos = smp_load_acquire(rb->producer_pos);
+
+ max_size = rb->mask + 1;
+ avail_size = max_size - (prod_pos - cons_pos);
+ /* Round up total size to a multiple of 8. */
+ total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
+
+ if (total_size > max_size)
+ return errno = E2BIG, NULL;
+
+ if (avail_size < total_size)
+ return errno = ENOSPC, NULL;
+
+ hdr = rb->data + (prod_pos & rb->mask);
+ hdr->len = size | BPF_RINGBUF_BUSY_BIT;
+ hdr->pad = 0;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ smp_store_release(rb->producer_pos, prod_pos + total_size);
+
+ return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
+}
+
+static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
+{
+ __u64 start_ns, end_ns, ns_per_s = 1000000000;
+
+ start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
+ end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
+
+ return end_ns - start_ns;
+}
+
+void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
+{
+ void *sample;
+ int err, ms_remaining = timeout_ms;
+ struct timespec start;
+
+ if (timeout_ms < 0 && timeout_ms != -1)
+ return errno = EINVAL, NULL;
+
+ if (timeout_ms != -1) {
+ err = clock_gettime(CLOCK_MONOTONIC, &start);
+ if (err)
+ return NULL;
+ }
+
+ do {
+ int cnt, ms_elapsed;
+ struct timespec curr;
+ __u64 ns_per_ms = 1000000;
+
+ sample = user_ring_buffer__reserve(rb, size);
+ if (sample)
+ return sample;
+ else if (errno != ENOSPC)
+ return NULL;
+
+ /* The kernel guarantees at least one event notification
+ * delivery whenever at least one sample is drained from the
+ * ring buffer in an invocation to bpf_ringbuf_drain(). Other
+ * additional events may be delivered at any time, but only one
+ * event is guaranteed per bpf_ringbuf_drain() invocation,
+ * provided that a sample is drained, and the BPF program did
+ * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
+ * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
+ * wakeup event will be delivered even if no samples are
+ * drained.
+ */
+ cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
+ if (cnt < 0)
+ return NULL;
+
+ if (timeout_ms == -1)
+ continue;
+
+ err = clock_gettime(CLOCK_MONOTONIC, &curr);
+ if (err)
+ return NULL;
+
+ ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
+ ms_remaining = timeout_ms - ms_elapsed;
+ } while (ms_remaining > 0);
+
+ /* Try one more time to reserve a sample after the specified timeout has elapsed. */
+ return user_ring_buffer__reserve(rb, size);
}
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
new file mode 100644
index 000000000000..1e82ab06c3eb
--- /dev/null
+++ b/tools/lib/bpf/skel_internal.h
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __SKEL_INTERNAL_H
+#define __SKEL_INTERNAL_H
+
+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif
+
+#ifndef __NR_bpf
+# if defined(__mips__) && defined(_ABIO32)
+# define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+# define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+# define __NR_bpf 5315
+# endif
+#endif
+
+/* This file is a base header for auto-generated *.lskel.h files.
+ * Its contents will change and may become part of auto-generation in the future.
+ *
+ * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent
+ * and will change from one version of libbpf to another and features
+ * requested during loader program generation.
+ */
+struct bpf_map_desc {
+ /* output of the loader prog */
+ int map_fd;
+ /* input for the loader prog */
+ __u32 max_entries;
+ __aligned_u64 initial_value;
+};
+struct bpf_prog_desc {
+ int prog_fd;
+};
+
+enum {
+ BPF_SKEL_KERNEL = (1ULL << 0),
+};
+
+struct bpf_loader_ctx {
+ __u32 sz;
+ __u32 flags;
+ __u32 log_level;
+ __u32 log_size;
+ __u64 log_buf;
+};
+
+struct bpf_load_and_run_opts {
+ struct bpf_loader_ctx *ctx;
+ const void *data;
+ const void *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+ const char *errstr;
+};
+
+long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
+static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+#ifdef __KERNEL__
+ return kern_sys_bpf(cmd, attr, size);
+#else
+ return syscall(__NR_bpf, cmd, attr, size);
+#endif
+}
+
+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+ return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+ struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL);
+
+ if (!ctx)
+ return NULL;
+ ctx->flags |= BPF_SKEL_KERNEL;
+ return ctx;
+}
+
+static inline void skel_free(const void *p)
+{
+ kfree(p);
+}
+
+/* skel->bss/rodata maps are populated the following way:
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform probe_read_kernel() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value
+ * depending on bpf_loader_ctx->flags.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+ if (addr != ~0ULL)
+ kvfree(p);
+ /* When addr == ~0ULL the 'p' points to
+ * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+ */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+ void *addr;
+
+ addr = kvmalloc(val_sz, GFP_KERNEL);
+ if (!addr)
+ return NULL;
+ memcpy(addr, val, val_sz);
+ return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+ struct bpf_map *map;
+ void *addr = NULL;
+
+ kvfree((void *) (long) *init_val);
+ *init_val = ~0ULL;
+
+ /* At this point bpf_load_and_run() finished without error and
+ * 'fd' is a valid bpf map FD. All sanity checks below should succeed.
+ */
+ map = bpf_map_get(fd);
+ if (IS_ERR(map))
+ return NULL;
+ if (map->map_type != BPF_MAP_TYPE_ARRAY)
+ goto out;
+ addr = ((struct bpf_array *)map)->value;
+ /* the addr stays valid, since FD is not closed */
+out:
+ bpf_map_put(map);
+ return addr;
+}
+
+#else
+
+static inline void *skel_alloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+static inline void skel_free(void *p)
+{
+ free(p);
+}
+
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+ munmap(p, sz);
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+ void *addr;
+
+ addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (addr == (void *) -1)
+ return NULL;
+ memcpy(addr, val, val_sz);
+ return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+ void *addr;
+
+ addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0);
+ if (addr == (void *) -1)
+ return NULL;
+ return addr;
+}
+#endif
+
+static inline int skel_closenz(int fd)
+{
+ if (fd > 0)
+ return close(fd);
+ return -EINVAL;
+}
+
+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+#endif
+
+static inline int skel_map_create(enum bpf_map_type map_type,
+ const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+
+ attr.map_type = map_type;
+ strncpy(attr.map_name, map_name, sizeof(attr.map_name));
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+
+ return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz);
+}
+
+static inline int skel_map_update_elem(int fd, const void *key,
+ const void *value, __u64 flags)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+ attr.key = (long) key;
+ attr.value = (long) value;
+ attr.flags = flags;
+
+ return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_map_delete_elem(int fd, const void *key)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+ attr.key = (long)key;
+
+ return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_map_get_fd_by_id(__u32 id)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_id = id;
+
+ return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
+}
+
+static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.raw_tracepoint.name = (long) name;
+ attr.raw_tracepoint.prog_fd = prog_fd;
+
+ return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
+}
+
+static inline int skel_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.link_create.prog_fd = prog_fd;
+ attr.link_create.target_fd = target_fd;
+ attr.link_create.attach_type = attach_type;
+
+ return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
+}
+
+#ifdef __KERNEL__
+#define set_err
+#else
+#define set_err err = -errno
+#endif
+
+static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
+{
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
+ int map_fd = -1, prog_fd = -1, key = 0, err;
+ union bpf_attr attr;
+
+ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+ if (map_fd < 0) {
+ opts->errstr = "failed to create loader map";
+ set_err;
+ goto out;
+ }
+
+ err = skel_map_update_elem(map_fd, &key, opts->data, 0);
+ if (err < 0) {
+ opts->errstr = "failed to update loader map";
+ set_err;
+ goto out;
+ }
+
+ memset(&attr, 0, prog_load_attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SYSCALL;
+ attr.insns = (long) opts->insns;
+ attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
+ attr.license = (long) "Dual BSD/GPL";
+ memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
+ attr.fd_array = (long) &map_fd;
+ attr.log_level = opts->ctx->log_level;
+ attr.log_size = opts->ctx->log_size;
+ attr.log_buf = opts->ctx->log_buf;
+ attr.prog_flags = BPF_F_SLEEPABLE;
+ err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
+ if (prog_fd < 0) {
+ opts->errstr = "failed to load loader prog";
+ set_err;
+ goto out;
+ }
+
+ memset(&attr, 0, test_run_attr_sz);
+ attr.test.prog_fd = prog_fd;
+ attr.test.ctx_in = (long) opts->ctx;
+ attr.test.ctx_size_in = opts->ctx->sz;
+ err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz);
+ if (err < 0 || (int)attr.test.retval < 0) {
+ opts->errstr = "failed to execute loader prog";
+ if (err < 0) {
+ set_err;
+ } else {
+ err = (int)attr.test.retval;
+#ifndef __KERNEL__
+ errno = -err;
+#endif
+ }
+ goto out;
+ }
+ err = 0;
+out:
+ if (map_fd >= 0)
+ close(map_fd);
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return err;
+}
+
+#endif
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index a139334d57b6..626d7ffb03d6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H
+#define STRERR_BUFSIZE 128
+
char *libbpf_strerror_r(int err, char *dst, int len);
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
new file mode 100644
index 000000000000..2464bcbd04e0
--- /dev/null
+++ b/tools/lib/bpf/strset.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct strset {
+ void *strs_data;
+ size_t strs_data_len;
+ size_t strs_data_cap;
+ size_t strs_data_max_len;
+
+ /* lookup index for each unique string in strings set */
+ struct hashmap *strs_hash;
+};
+
+static size_t strset_hash_fn(long key, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str = s->strs_data + key;
+
+ return str_hash(str);
+}
+
+static bool strset_equal_fn(long key1, long key2, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str1 = s->strs_data + key1;
+ const char *str2 = s->strs_data + key2;
+
+ return strcmp(str1, str2) == 0;
+}
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz)
+{
+ struct strset *set = calloc(1, sizeof(*set));
+ struct hashmap *hash;
+ int err = -ENOMEM;
+
+ if (!set)
+ return ERR_PTR(-ENOMEM);
+
+ hash = hashmap__new(strset_hash_fn, strset_equal_fn, set);
+ if (IS_ERR(hash))
+ goto err_out;
+
+ set->strs_data_max_len = max_data_sz;
+ set->strs_hash = hash;
+
+ if (init_data) {
+ long off;
+
+ set->strs_data = malloc(init_data_sz);
+ if (!set->strs_data)
+ goto err_out;
+
+ memcpy(set->strs_data, init_data, init_data_sz);
+ set->strs_data_len = init_data_sz;
+ set->strs_data_cap = init_data_sz;
+
+ for (off = 0; off < set->strs_data_len; off += strlen(set->strs_data + off) + 1) {
+ /* hashmap__add() returns EEXIST if string with the same
+ * content already is in the hash map
+ */
+ err = hashmap__add(hash, off, off);
+ if (err == -EEXIST)
+ continue; /* duplicate */
+ if (err)
+ goto err_out;
+ }
+ }
+
+ return set;
+err_out:
+ strset__free(set);
+ return ERR_PTR(err);
+}
+
+void strset__free(struct strset *set)
+{
+ if (IS_ERR_OR_NULL(set))
+ return;
+
+ hashmap__free(set->strs_hash);
+ free(set->strs_data);
+ free(set);
+}
+
+size_t strset__data_size(const struct strset *set)
+{
+ return set->strs_data_len;
+}
+
+const char *strset__data(const struct strset *set)
+{
+ return set->strs_data;
+}
+
+static void *strset_add_str_mem(struct strset *set, size_t add_sz)
+{
+ return libbpf_add_mem(&set->strs_data, &set->strs_data_cap, 1,
+ set->strs_data_len, set->strs_data_max_len, add_sz);
+}
+
+/* Find string offset that corresponds to a given string *s*.
+ * Returns:
+ * - >0 offset into string data, if string is found;
+ * - -ENOENT, if string is not in the string data;
+ * - <0, on any other error.
+ */
+int strset__find_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+
+ /* see strset__add_str() for why we do this */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ if (hashmap__find(set->strs_hash, new_off, &old_off))
+ return old_off;
+
+ return -ENOENT;
+}
+
+/* Add a string s to the string data. If the string already exists, return its
+ * offset within string data.
+ * Returns:
+ * - > 0 offset into string data, on success;
+ * - < 0, on error.
+ */
+int strset__add_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+ int err;
+
+ /* Hashmap keys are always offsets within set->strs_data, so to even
+ * look up some string from the "outside", we need to first append it
+ * at the end, so that it can be addressed with an offset. Luckily,
+ * until set->strs_data_len is incremented, that string is just a piece
+ * of garbage for the rest of the code, so no harm, no foul. On the
+ * other hand, if the string is unique, it's already appended and
+ * ready to be used, only a simple set->strs_data_len increment away.
+ */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ /* Now attempt to add the string, but only if the string with the same
+ * contents doesn't exist already (HASHMAP_ADD strategy). If such
+ * string exists, we'll get its offset in old_off (that's old_key).
+ */
+ err = hashmap__insert(set->strs_hash, new_off, new_off,
+ HASHMAP_ADD, &old_off, NULL);
+ if (err == -EEXIST)
+ return old_off; /* duplicated string, return existing offset */
+ if (err)
+ return err;
+
+ set->strs_data_len += len; /* new unique string, adjust data length */
+ return new_off;
+}
diff --git a/tools/lib/bpf/strset.h b/tools/lib/bpf/strset.h
new file mode 100644
index 000000000000..b6ddf77a83c2
--- /dev/null
+++ b/tools/lib/bpf/strset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* Copyright (c) 2021 Facebook */
+#ifndef __LIBBPF_STRSET_H
+#define __LIBBPF_STRSET_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct strset;
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz);
+void strset__free(struct strset *set);
+
+const char *strset__data(const struct strset *set);
+size_t strset__data_size(const struct strset *set);
+
+int strset__find_str(struct strset *set, const char *s);
+int strset__add_str(struct strset *set, const char *s);
+
+#endif /* __LIBBPF_STRSET_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
new file mode 100644
index 000000000000..f6763300b26a
--- /dev/null
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef __USDT_BPF_H__
+#define __USDT_BPF_H__
+
+#include <linux/errno.h>
+#include "bpf_helpers.h"
+#include "bpf_tracing.h"
+
+/* Below types and maps are internal implementation details of libbpf's USDT
+ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
+ * be considered an unstable API as well and might be adjusted based on user
+ * feedback from using libbpf's USDT support in production.
+ */
+
+/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
+ * map that keeps track of USDT argument specifications. This might be
+ * necessary if there are a lot of USDT attachments.
+ */
+#ifndef BPF_USDT_MAX_SPEC_CNT
+#define BPF_USDT_MAX_SPEC_CNT 256
+#endif
+/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
+ * map that keeps track of IP (memory address) mapping to USDT argument
+ * specification.
+ * Note, if kernel supports BPF cookies, this map is not used and could be
+ * resized all the way to 1 to save a bit of memory.
+ */
+#ifndef BPF_USDT_MAX_IP_CNT
+#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
+#endif
+
+enum __bpf_usdt_arg_type {
+ BPF_USDT_ARG_CONST,
+ BPF_USDT_ARG_REG,
+ BPF_USDT_ARG_REG_DEREF,
+};
+
+struct __bpf_usdt_arg_spec {
+ /* u64 scalar interpreted depending on arg_type, see below */
+ __u64 val_off;
+ /* arg location case, see bpf_udst_arg() for details */
+ enum __bpf_usdt_arg_type arg_type;
+ /* offset of referenced register within struct pt_regs */
+ short reg_off;
+ /* whether arg should be interpreted as signed value */
+ bool arg_signed;
+ /* number of bits that need to be cleared and, optionally,
+ * sign-extended to cast arguments that are 1, 2, or 4 bytes
+ * long into final 8-byte u64/s64 value returned to user
+ */
+ char arg_bitshift;
+};
+
+/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
+#define BPF_USDT_MAX_ARG_CNT 12
+struct __bpf_usdt_spec {
+ struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
+ __type(key, int);
+ __type(value, struct __bpf_usdt_spec);
+} __bpf_usdt_specs SEC(".maps") __weak;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, BPF_USDT_MAX_IP_CNT);
+ __type(key, long);
+ __type(value, __u32);
+} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
+
+extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig;
+
+static __always_inline
+int __bpf_usdt_spec_id(struct pt_regs *ctx)
+{
+ if (!LINUX_HAS_BPF_COOKIE) {
+ long ip = PT_REGS_IP(ctx);
+ int *spec_id_ptr;
+
+ spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
+ return spec_id_ptr ? *spec_id_ptr : -ESRCH;
+ }
+
+ return bpf_get_attach_cookie(ctx);
+}
+
+/* Return number of USDT arguments defined for currently traced USDT. */
+__weak __hidden
+int bpf_usdt_arg_cnt(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ return spec->arg_cnt;
+}
+
+/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
+ * Returns 0 on success; negative error, otherwise.
+ * On error *res is guaranteed to be set to zero.
+ */
+__weak __hidden
+int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
+{
+ struct __bpf_usdt_spec *spec;
+ struct __bpf_usdt_arg_spec *arg_spec;
+ unsigned long val;
+ int err, spec_id;
+
+ *res = 0;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ if (arg_num >= BPF_USDT_MAX_ARG_CNT)
+ return -ENOENT;
+ barrier_var(arg_num);
+ if (arg_num >= spec->arg_cnt)
+ return -ENOENT;
+
+ arg_spec = &spec->args[arg_num];
+ switch (arg_spec->arg_type) {
+ case BPF_USDT_ARG_CONST:
+ /* Arg is just a constant ("-4@$-9" in USDT arg spec).
+ * value is recorded in arg_spec->val_off directly.
+ */
+ val = arg_spec->val_off;
+ break;
+ case BPF_USDT_ARG_REG:
+ /* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
+ * so we read the contents of that register directly from
+ * struct pt_regs. To keep things simple user-space parts
+ * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ break;
+ case BPF_USDT_ARG_REG_DEREF:
+ /* Arg is in memory addressed by register, plus some offset
+ * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
+ * identified like with BPF_USDT_ARG_REG case, and the offset
+ * is in arg_spec->val_off. We first fetch register contents
+ * from pt_regs, then do another user-space probe read to
+ * fetch argument value itself.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
+ * necessary upper arg_bitshift bits, with sign extension if argument
+ * is signed
+ */
+ val <<= arg_spec->arg_bitshift;
+ if (arg_spec->arg_signed)
+ val = ((long)val) >> arg_spec->arg_bitshift;
+ else
+ val = val >> arg_spec->arg_bitshift;
+ *res = val;
+ return 0;
+}
+
+/* Retrieve user-specified cookie value provided during attach as
+ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
+ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
+ * utilizing BPF cookies internally, so user can't use BPF cookie directly
+ * for USDT programs and has to use bpf_usdt_cookie() API instead.
+ */
+__weak __hidden
+long bpf_usdt_cookie(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return 0;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return 0;
+
+ return spec->usdt_cookie;
+}
+
+/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
+#define ___bpf_usdt_args0() ctx
+#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; })
+#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; })
+#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; })
+#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; })
+#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; })
+#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; })
+#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; })
+#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; })
+#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; })
+#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; })
+#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; })
+#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; })
+#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
+ * Original struct pt_regs * context is preserved as 'ctx' argument.
+ */
+#define BPF_USDT(name, args...) \
+name(struct pt_regs *ctx); \
+static __always_inline typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_usdt_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#endif /* __USDT_BPF_H__ */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
new file mode 100644
index 000000000000..93794f01bb67
--- /dev/null
+++ b/tools/lib/bpf/usdt.c
@@ -0,0 +1,1600 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <unistd.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+
+/* s8 will be marked as poison while it's a reg of riscv */
+#if defined(__riscv)
+#define rv_s8 s8
+#endif
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+/* libbpf's USDT support consists of BPF-side state/code and user-space
+ * state/code working together in concert. BPF-side parts are defined in
+ * usdt.bpf.h header library. User-space state is encapsulated by struct
+ * usdt_manager and all the supporting code centered around usdt_manager.
+ *
+ * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map
+ * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that
+ * don't support BPF cookie (see below). These two maps are implicitly
+ * embedded into user's end BPF object file when user's code included
+ * usdt.bpf.h. This means that libbpf doesn't do anything special to create
+ * these USDT support maps. They are created by normal libbpf logic of
+ * instantiating BPF maps when opening and loading BPF object.
+ *
+ * As such, libbpf is basically unaware of the need to do anything
+ * USDT-related until the very first call to bpf_program__attach_usdt(), which
+ * can be called by user explicitly or happen automatically during skeleton
+ * attach (or, equivalently, through generic bpf_program__attach() call). At
+ * this point, libbpf will instantiate and initialize struct usdt_manager and
+ * store it in bpf_object. USDT manager is per-BPF object construct, as each
+ * independent BPF object might or might not have USDT programs, and thus all
+ * the expected USDT-related state. There is no coordination between two
+ * bpf_object in parts of USDT attachment, they are oblivious of each other's
+ * existence and libbpf is just oblivious, dealing with bpf_object-specific
+ * USDT state.
+ *
+ * Quick crash course on USDTs.
+ *
+ * From user-space application's point of view, USDT is essentially just
+ * a slightly special function call that normally has zero overhead, unless it
+ * is being traced by some external entity (e.g, BPF-based tool). Here's how
+ * a typical application can trigger USDT probe:
+ *
+ * #include <sys/sdt.h> // provided by systemtap-sdt-devel package
+ * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h
+ *
+ * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);
+ *
+ * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each
+ * individual USDT has a fixed number of arguments (3 in the above example)
+ * and specifies values of each argument as if it was a function call.
+ *
+ * USDT call is actually not a function call, but is instead replaced by
+ * a single NOP instruction (thus zero overhead, effectively). But in addition
+ * to that, those USDT macros generate special SHT_NOTE ELF records in
+ * .note.stapsdt ELF section. Here's an example USDT definition as emitted by
+ * `readelf -n <binary>`:
+ *
+ * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors)
+ * Provider: test
+ * Name: usdt12
+ * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e
+ * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil
+ *
+ * In this case we have USDT test:usdt12 with 12 arguments.
+ *
+ * Location and base are offsets used to calculate absolute IP address of that
+ * NOP instruction that kernel can replace with an interrupt instruction to
+ * trigger instrumentation code (BPF program for all that we care about).
+ *
+ * Semaphore above is and optional feature. It records an address of a 2-byte
+ * refcount variable (normally in '.probes' ELF section) used for signaling if
+ * there is anything that is attached to USDT. This is useful for user
+ * applications if, for example, they need to prepare some arguments that are
+ * passed only to USDTs and preparation is expensive. By checking if USDT is
+ * "activated", an application can avoid paying those costs unnecessarily.
+ * Recent enough kernel has built-in support for automatically managing this
+ * refcount, which libbpf expects and relies on. If USDT is defined without
+ * associated semaphore, this value will be zero. See selftests for semaphore
+ * examples.
+ *
+ * Arguments is the most interesting part. This USDT specification string is
+ * providing information about all the USDT arguments and their locations. The
+ * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and
+ * whether the argument is signed or unsigned (negative size means signed).
+ * The part after @ sign is assembly-like definition of argument location
+ * (see [0] for more details). Technically, assembler can provide some pretty
+ * advanced definitions, but libbpf is currently supporting three most common
+ * cases:
+ * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9);
+ * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer
+ * whose value is in register %rdx";
+ * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which
+ * specifies signed 32-bit integer stored at offset -1204 bytes from
+ * memory address stored in %rbp.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ *
+ * During attachment, libbpf parses all the relevant USDT specifications and
+ * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side
+ * code through spec map. This allows BPF applications to quickly fetch the
+ * actual value at runtime using a simple BPF-side code.
+ *
+ * With basics out of the way, let's go over less immediately obvious aspects
+ * of supporting USDTs.
+ *
+ * First, there is no special USDT BPF program type. It is actually just
+ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe
+ * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference
+ * that uprobe is usually attached at the function entry, while USDT will
+ * normally will be somewhere inside the function. But it should always be
+ * pointing to NOP instruction, which makes such uprobes the fastest uprobe
+ * kind.
+ *
+ * Second, it's important to realize that such STAP_PROBEn(provider, name, ...)
+ * macro invocations can end up being inlined many-many times, depending on
+ * specifics of each individual user application. So single conceptual USDT
+ * (identified by provider:name pair of identifiers) is, generally speaking,
+ * multiple uprobe locations (USDT call sites) in different places in user
+ * application. Further, again due to inlining, each USDT call site might end
+ * up having the same argument #N be located in a different place. In one call
+ * site it could be a constant, in another will end up in a register, and in
+ * yet another could be some other register or even somewhere on the stack.
+ *
+ * As such, "attaching to USDT" means (in general case) attaching the same
+ * uprobe BPF program to multiple target locations in user application, each
+ * potentially having a completely different USDT spec associated with it.
+ * To wire all this up together libbpf allocates a unique integer spec ID for
+ * each unique USDT spec. Spec IDs are allocated as sequential small integers
+ * so that they can be used as keys in array BPF map (for performance reasons).
+ * Spec ID allocation and accounting is big part of what usdt_manager is
+ * about. This state has to be maintained per-BPF object and coordinate
+ * between different USDT attachments within the same BPF object.
+ *
+ * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out
+ * as struct usdt_spec. Each invocation of BPF program at runtime needs to
+ * know its associated spec ID. It gets it either through BPF cookie, which
+ * libbpf sets to spec ID during attach time, or, if kernel is too old to
+ * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such
+ * case. The latter means that some modes of operation can't be supported
+ * without BPF cookie. Such mode is attaching to shared library "generically",
+ * without specifying target process. In such case, it's impossible to
+ * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode
+ * is not supported without BPF cookie support.
+ *
+ * Note that libbpf is using BPF cookie functionality for its own internal
+ * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf
+ * provides conceptually equivalent USDT cookie support. It's still u64
+ * user-provided value that can be associated with USDT attachment. Note that
+ * this will be the same value for all USDT call sites within the same single
+ * *logical* USDT attachment. This makes sense because to user attaching to
+ * USDT is a single BPF program triggered for singular USDT probe. The fact
+ * that this is done at multiple actual locations is a mostly hidden
+ * implementation details. This USDT cookie value can be fetched with
+ * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h
+ *
+ * Lastly, while single USDT can have tons of USDT call sites, it doesn't
+ * necessarily have that many different USDT specs. It very well might be
+ * that 1000 USDT call sites only need 5 different USDT specs, because all the
+ * arguments are typically contained in a small set of registers or stack
+ * locations. As such, it's wasteful to allocate as many USDT spec IDs as
+ * there are USDT call sites. So libbpf tries to be frugal and performs
+ * on-the-fly deduplication during a single USDT attachment to only allocate
+ * the minimal required amount of unique USDT specs (and thus spec IDs). This
+ * is trivially achieved by using USDT spec string (Arguments string from USDT
+ * note) as a lookup key in a hashmap. USDT spec string uniquely defines
+ * everything about how to fetch USDT arguments, so two USDT call sites
+ * sharing USDT spec string can safely share the same USDT spec and spec ID.
+ * Note, this spec string deduplication is happening only during the same USDT
+ * attachment, so each USDT spec shares the same USDT cookie value. This is
+ * not generally true for other USDT attachments within the same BPF object,
+ * as even if USDT spec string is the same, USDT cookie value can be
+ * different. It was deemed excessive to try to deduplicate across independent
+ * USDT attachments by taking into account USDT spec string *and* USDT cookie
+ * value, which would complicated spec ID accounting significantly for little
+ * gain.
+ */
+
+#define USDT_BASE_SEC ".stapsdt.base"
+#define USDT_SEMA_SEC ".probes"
+#define USDT_NOTE_SEC ".note.stapsdt"
+#define USDT_NOTE_TYPE 3
+#define USDT_NOTE_NAME "stapsdt"
+
+/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */
+enum usdt_arg_type {
+ USDT_ARG_CONST,
+ USDT_ARG_REG,
+ USDT_ARG_REG_DEREF,
+};
+
+/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
+struct usdt_arg_spec {
+ __u64 val_off;
+ enum usdt_arg_type arg_type;
+ short reg_off;
+ bool arg_signed;
+ char arg_bitshift;
+};
+
+/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */
+#define USDT_MAX_ARG_CNT 12
+
+/* should match struct __bpf_usdt_spec from usdt.bpf.h */
+struct usdt_spec {
+ struct usdt_arg_spec args[USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct usdt_note {
+ const char *provider;
+ const char *name;
+ /* USDT args specification string, e.g.:
+ * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx"
+ */
+ const char *args;
+ long loc_addr;
+ long base_addr;
+ long sema_addr;
+};
+
+struct usdt_target {
+ long abs_ip;
+ long rel_ip;
+ long sema_off;
+ struct usdt_spec spec;
+ const char *spec_str;
+};
+
+struct usdt_manager {
+ struct bpf_map *specs_map;
+ struct bpf_map *ip_to_spec_id_map;
+
+ int *free_spec_ids;
+ size_t free_spec_cnt;
+ size_t next_free_spec_id;
+
+ bool has_bpf_cookie;
+ bool has_sema_refcnt;
+ bool has_uprobe_multi;
+};
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
+{
+ static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset";
+ struct usdt_manager *man;
+ struct bpf_map *specs_map, *ip_to_spec_id_map;
+
+ specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs");
+ ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id");
+ if (!specs_map || !ip_to_spec_id_map) {
+ pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n");
+ return ERR_PTR(-ESRCH);
+ }
+
+ man = calloc(1, sizeof(*man));
+ if (!man)
+ return ERR_PTR(-ENOMEM);
+
+ man->specs_map = specs_map;
+ man->ip_to_spec_id_map = ip_to_spec_id_map;
+
+ /* Detect if BPF cookie is supported for kprobes.
+ * We don't need IP-to-ID mapping if we can use BPF cookies.
+ * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value")
+ */
+ man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE);
+
+ /* Detect kernel support for automatic refcounting of USDT semaphore.
+ * If this is not supported, USDTs with semaphores will not be supported.
+ * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0;
+
+ /*
+ * Detect kernel support for uprobe multi link to be used for attaching
+ * usdt probes.
+ */
+ man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK);
+ return man;
+}
+
+void usdt_manager_free(struct usdt_manager *man)
+{
+ if (IS_ERR_OR_NULL(man))
+ return;
+
+ free(man->free_spec_ids);
+ free(man);
+}
+
+static int sanity_check_usdt_elf(Elf *elf, const char *path)
+{
+ GElf_Ehdr ehdr;
+ int endianness;
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path);
+ return -EBADF;
+ }
+
+ switch (gelf_getclass(elf)) {
+ case ELFCLASS64:
+ if (sizeof(void *) != 8) {
+ pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ case ELFCLASS32:
+ if (sizeof(void *) != 4) {
+ pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ default:
+ pr_warn("usdt: unsupported ELF class for '%s'\n", path);
+ return -EBADF;
+ }
+
+ if (!gelf_getehdr(elf, &ehdr))
+ return -EINVAL;
+
+ if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) {
+ pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n",
+ path, ehdr.e_type);
+ return -EBADF;
+ }
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ endianness = ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (endianness != ehdr.e_ident[EI_DATA]) {
+ pr_warn("usdt: ELF endianness mismatch for '%s'\n", path);
+ return -EBADF;
+ }
+
+ return 0;
+}
+
+static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn)
+{
+ Elf_Scn *sec = NULL;
+ size_t shstrndx;
+
+ if (elf_getshdrstrndx(elf, &shstrndx))
+ return -EINVAL;
+
+ /* check if ELF is corrupted and avoid calling elf_strptr if yes */
+ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL))
+ return -EINVAL;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ char *name;
+
+ if (!gelf_getshdr(sec, shdr))
+ return -EINVAL;
+
+ name = elf_strptr(elf, shstrndx, shdr->sh_name);
+ if (name && strcmp(sec_name, name) == 0) {
+ *scn = sec;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+struct elf_seg {
+ long start;
+ long end;
+ long offset;
+ bool is_exec;
+};
+
+static int cmp_elf_segs(const void *_a, const void *_b)
+{
+ const struct elf_seg *a = _a;
+ const struct elf_seg *b = _b;
+
+ return a->start < b->start ? -1 : 1;
+}
+
+static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ GElf_Phdr phdr;
+ size_t n;
+ int i, err;
+ struct elf_seg *seg;
+ void *tmp;
+
+ *seg_cnt = 0;
+
+ if (elf_getphdrnum(elf, &n)) {
+ err = -errno;
+ return err;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getphdr(elf, i, &phdr)) {
+ err = -errno;
+ return err;
+ }
+
+ pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n",
+ i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset,
+ (long)phdr.p_type, (long)phdr.p_flags);
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp)
+ return -ENOMEM;
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ (*seg_cnt)++;
+
+ seg->start = phdr.p_vaddr;
+ seg->end = phdr.p_vaddr + phdr.p_memsz;
+ seg->offset = phdr.p_offset;
+ seg->is_exec = phdr.p_flags & PF_X;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path);
+ return -ESRCH;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ return 0;
+}
+
+static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ char path[PATH_MAX], line[PATH_MAX], mode[16];
+ size_t seg_start, seg_end, seg_off;
+ struct elf_seg *seg;
+ int tmp_pid, i, err;
+ FILE *f;
+
+ *seg_cnt = 0;
+
+ /* Handle containerized binaries only accessible from
+ * /proc/<pid>/root/<path>. They will be reported as just /<path> in
+ * /proc/<pid>/maps.
+ */
+ if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid)
+ goto proceed;
+
+ if (!realpath(lib_path, path)) {
+ pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n",
+ lib_path, -errno);
+ libbpf_strlcpy(path, lib_path, sizeof(path));
+ }
+
+proceed:
+ sprintf(line, "/proc/%d/maps", pid);
+ f = fopen(line, "re");
+ if (!f) {
+ err = -errno;
+ pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n",
+ line, lib_path, err);
+ return err;
+ }
+
+ /* We need to handle lines with no path at the end:
+ *
+ * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so
+ * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0
+ * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so
+ */
+ while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n",
+ &seg_start, &seg_end, mode, &seg_off, line) == 5) {
+ void *tmp;
+
+ /* to handle no path case (see above) we need to capture line
+ * without skipping any whitespaces. So we need to strip
+ * leading whitespaces manually here
+ */
+ i = 0;
+ while (isblank(line[i]))
+ i++;
+ if (strcmp(line + i, path) != 0)
+ continue;
+
+ pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n",
+ path, seg_start, seg_end, mode, seg_off);
+
+ /* ignore non-executable sections for shared libs */
+ if (mode[2] != 'x')
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ *seg_cnt += 1;
+
+ seg->start = seg_start;
+ seg->end = seg_end;
+ seg->offset = seg_off;
+ seg->is_exec = true;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n",
+ lib_path, path, pid);
+ err = -ESRCH;
+ goto err_out;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ err = 0;
+err_out:
+ fclose(f);
+ return err;
+}
+
+static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr)
+{
+ struct elf_seg *seg;
+ int i;
+
+ /* for ELF binaries (both executables and shared libraries), we are
+ * given virtual address (absolute for executables, relative for
+ * libraries) which should match address range of [seg_start, seg_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->start <= virtaddr && virtaddr < seg->end)
+ return seg;
+ }
+ return NULL;
+}
+
+static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset)
+{
+ struct elf_seg *seg;
+ int i;
+
+ /* for VMA segments from /proc/<pid>/maps file, provided "address" is
+ * actually a file offset, so should be fall within logical
+ * offset-based range of [offset_start, offset_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start))
+ return seg;
+ }
+ return NULL;
+}
+
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *usdt_note);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
+
+static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
+ const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
+ struct usdt_target **out_targets, size_t *out_target_cnt)
+{
+ size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
+ struct elf_seg *segs = NULL, *vma_segs = NULL;
+ struct usdt_target *targets = NULL, *target;
+ long base_addr = 0;
+ Elf_Scn *notes_scn, *base_scn;
+ GElf_Shdr base_shdr, notes_shdr;
+ GElf_Ehdr ehdr;
+ GElf_Nhdr nhdr;
+ Elf_Data *data;
+ int err;
+
+ *out_targets = NULL;
+ *out_target_cnt = 0;
+
+ err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, &notes_shdr, &notes_scn);
+ if (err) {
+ pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path);
+ return err;
+ }
+
+ if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) {
+ pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path);
+ return -EINVAL;
+ }
+
+ err = parse_elf_segs(elf, path, &segs, &seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err);
+ goto err_out;
+ }
+
+ /* .stapsdt.base ELF section is optional, but is used for prelink
+ * offset compensation (see a big comment further below)
+ */
+ if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0)
+ base_addr = base_shdr.sh_addr;
+
+ data = elf_getdata(notes_scn, 0);
+ off = 0;
+ while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) {
+ long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0;
+ struct usdt_note note;
+ struct elf_seg *seg = NULL;
+ void *tmp;
+
+ err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+ if (err)
+ goto err_out;
+
+ if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0)
+ continue;
+
+ /* We need to compensate "prelink effect". See [0] for details,
+ * relevant parts quoted here:
+ *
+ * Each SDT probe also expands into a non-allocated ELF note. You can
+ * find this by looking at SHT_NOTE sections and decoding the format;
+ * see below for details. Because the note is non-allocated, it means
+ * there is no runtime cost, and also preserved in both stripped files
+ * and .debug files.
+ *
+ * However, this means that prelink won't adjust the note's contents
+ * for address offsets. Instead, this is done via the .stapsdt.base
+ * section. This is a special section that is added to the text. We
+ * will only ever have one of these sections in a final link and it
+ * will only ever be one byte long. Nothing about this section itself
+ * matters, we just use it as a marker to detect prelink address
+ * adjustments.
+ *
+ * Each probe note records the link-time address of the .stapsdt.base
+ * section alongside the probe PC address. The decoder compares the
+ * base address stored in the note with the .stapsdt.base section's
+ * sh_addr. Initially these are the same, but the section header will
+ * be adjusted by prelink. So the decoder applies the difference to
+ * the probe PC address to get the correct prelinked PC address; the
+ * same adjustment is applied to the semaphore address, if any.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+ usdt_abs_ip = note.loc_addr;
+ if (base_addr)
+ usdt_abs_ip += base_addr - note.base_addr;
+
+ /* When attaching uprobes (which is what USDTs basically are)
+ * kernel expects file offset to be specified, not a relative
+ * virtual address, so we need to translate virtual address to
+ * file offset, for both ET_EXEC and ET_DYN binaries.
+ */
+ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_abs_ip);
+ goto err_out;
+ }
+ if (!seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ usdt_abs_ip);
+ goto err_out;
+ }
+ /* translate from virtual address to file offset */
+ usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset;
+
+ if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) {
+ /* If we don't have BPF cookie support but need to
+ * attach to a shared library, we'll need to know and
+ * record absolute addresses of attach points due to
+ * the need to lookup USDT spec by absolute IP of
+ * triggered uprobe. Doing this resolution is only
+ * possible when we have a specific PID of the process
+ * that's using specified shared library. BPF cookie
+ * removes the absolute address limitation as we don't
+ * need to do this lookup (we just use BPF cookie as
+ * an index of USDT spec), so for newer kernels with
+ * BPF cookie support libbpf supports USDT attachment
+ * to shared libraries with no PID filter.
+ */
+ if (pid < 0) {
+ pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n");
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ /* vma_segs are lazily initialized only if necessary */
+ if (vma_seg_cnt == 0) {
+ err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
+ pid, path, err);
+ goto err_out;
+ }
+ }
+
+ seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_rel_ip);
+ goto err_out;
+ }
+
+ usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip;
+ }
+
+ pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path,
+ note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
+ seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
+
+ /* Adjust semaphore address to be a file offset */
+ if (note.sema_addr) {
+ if (!man->has_sema_refcnt) {
+ pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
+ usdt_provider, usdt_name, path);
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ seg = find_elf_seg(segs, seg_cnt, note.sema_addr);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
+ usdt_provider, usdt_name, path, note.sema_addr);
+ goto err_out;
+ }
+ if (seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ note.sema_addr);
+ goto err_out;
+ }
+
+ usdt_sema_off = note.sema_addr - seg->start + seg->offset;
+
+ pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
+ path, note.sema_addr, note.base_addr, usdt_sema_off,
+ seg->start, seg->end, seg->offset);
+ }
+
+ /* Record adjusted addresses and offsets and parse USDT spec */
+ tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ targets = tmp;
+
+ target = &targets[target_cnt];
+ memset(target, 0, sizeof(*target));
+
+ target->abs_ip = usdt_abs_ip;
+ target->rel_ip = usdt_rel_ip;
+ target->sema_off = usdt_sema_off;
+
+ /* notes.args references strings from ELF itself, so they can
+ * be referenced safely until elf_end() call
+ */
+ target->spec_str = note.args;
+
+ err = parse_usdt_spec(&target->spec, &note, usdt_cookie);
+ if (err)
+ goto err_out;
+
+ target_cnt++;
+ }
+
+ *out_targets = targets;
+ *out_target_cnt = target_cnt;
+ err = target_cnt;
+
+err_out:
+ free(segs);
+ free(vma_segs);
+ if (err < 0)
+ free(targets);
+ return err;
+}
+
+struct bpf_link_usdt {
+ struct bpf_link link;
+
+ struct usdt_manager *usdt_man;
+
+ size_t spec_cnt;
+ int *spec_ids;
+
+ size_t uprobe_cnt;
+ struct {
+ long abs_ip;
+ struct bpf_link *link;
+ } *uprobes;
+
+ struct bpf_link *multi_link;
+};
+
+static int bpf_link_usdt_detach(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+ struct usdt_manager *man = usdt_link->usdt_man;
+ int i;
+
+ bpf_link__destroy(usdt_link->multi_link);
+
+ /* When having multi_link, uprobe_cnt is 0 */
+ for (i = 0; i < usdt_link->uprobe_cnt; i++) {
+ /* detach underlying uprobe link */
+ bpf_link__destroy(usdt_link->uprobes[i].link);
+ /* there is no need to update specs map because it will be
+ * unconditionally overwritten on subsequent USDT attaches,
+ * but if BPF cookies are not used we need to remove entry
+ * from ip_to_spec_id map, otherwise we'll run into false
+ * conflicting IP errors
+ */
+ if (!man->has_bpf_cookie) {
+ /* not much we can do about errors here */
+ (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map),
+ &usdt_link->uprobes[i].abs_ip);
+ }
+ }
+
+ /* try to return the list of previously used spec IDs to usdt_manager
+ * for future reuse for subsequent USDT attaches
+ */
+ if (!man->free_spec_ids) {
+ /* if there were no free spec IDs yet, just transfer our IDs */
+ man->free_spec_ids = usdt_link->spec_ids;
+ man->free_spec_cnt = usdt_link->spec_cnt;
+ usdt_link->spec_ids = NULL;
+ } else {
+ /* otherwise concat IDs */
+ size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt;
+ int *new_free_ids;
+
+ new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt,
+ sizeof(*new_free_ids));
+ /* If we couldn't resize free_spec_ids, we'll just leak
+ * a bunch of free IDs; this is very unlikely to happen and if
+ * system is so exhausted on memory, it's the least of user's
+ * concerns, probably.
+ * So just do our best here to return those IDs to usdt_manager.
+ * Another edge case when we can legitimately get NULL is when
+ * new_cnt is zero, which can happen in some edge cases, so we
+ * need to be careful about that.
+ */
+ if (new_free_ids || new_cnt == 0) {
+ memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids,
+ usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids));
+ man->free_spec_ids = new_free_ids;
+ man->free_spec_cnt = new_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void bpf_link_usdt_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+
+ free(usdt_link->spec_ids);
+ free(usdt_link->uprobes);
+ free(usdt_link);
+}
+
+static size_t specs_hash_fn(long key, void *ctx)
+{
+ return str_hash((char *)key);
+}
+
+static bool specs_equal_fn(long key1, long key2, void *ctx)
+{
+ return strcmp((char *)key1, (char *)key2) == 0;
+}
+
+static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
+ struct bpf_link_usdt *link, struct usdt_target *target,
+ int *spec_id, bool *is_new)
+{
+ long tmp;
+ void *new_ids;
+ int err;
+
+ /* check if we already allocated spec ID for this spec string */
+ if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
+ *spec_id = tmp;
+ *is_new = false;
+ return 0;
+ }
+
+ /* otherwise it's a new ID that needs to be set up in specs map and
+ * returned back to usdt_manager when USDT link is detached
+ */
+ new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
+ if (!new_ids)
+ return -ENOMEM;
+ link->spec_ids = new_ids;
+
+ /* get next free spec ID, giving preference to free list, if not empty */
+ if (man->free_spec_cnt) {
+ *spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, *spec_id);
+ if (err)
+ return err;
+
+ man->free_spec_cnt--;
+ } else {
+ /* don't allocate spec ID bigger than what fits in specs map */
+ if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map))
+ return -E2BIG;
+
+ *spec_id = man->next_free_spec_id;
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, *spec_id);
+ if (err)
+ return err;
+
+ man->next_free_spec_id++;
+ }
+
+ /* remember new spec ID in the link for later return back to free list on detach */
+ link->spec_ids[link->spec_cnt] = *spec_id;
+ link->spec_cnt++;
+ *is_new = true;
+ return 0;
+}
+
+struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie)
+{
+ unsigned long *offsets = NULL, *ref_ctr_offsets = NULL;
+ int i, err, spec_map_fd, ip_map_fd;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct hashmap *specs_hash = NULL;
+ struct bpf_link_usdt *link = NULL;
+ struct usdt_target *targets = NULL;
+ __u64 *cookies = NULL;
+ struct elf_fd elf_fd;
+ size_t target_cnt;
+
+ spec_map_fd = bpf_map__fd(man->specs_map);
+ ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map);
+
+ err = elf_open(path, &elf_fd);
+ if (err)
+ return libbpf_err_ptr(err);
+
+ err = sanity_check_usdt_elf(elf_fd.elf, path);
+ if (err)
+ goto err_out;
+
+ /* normalize PID filter */
+ if (pid < 0)
+ pid = -1;
+ else if (pid == 0)
+ pid = getpid();
+
+ /* discover USDT in given binary, optionally limiting
+ * activations to a given PID, if pid > 0
+ */
+ err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name,
+ usdt_cookie, &targets, &target_cnt);
+ if (err <= 0) {
+ err = (err == 0) ? -ENOENT : err;
+ goto err_out;
+ }
+
+ specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL);
+ if (IS_ERR(specs_hash)) {
+ err = PTR_ERR(specs_hash);
+ goto err_out;
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ link->usdt_man = man;
+ link->link.detach = &bpf_link_usdt_detach;
+ link->link.dealloc = &bpf_link_usdt_dealloc;
+
+ if (man->has_uprobe_multi) {
+ offsets = calloc(target_cnt, sizeof(*offsets));
+ cookies = calloc(target_cnt, sizeof(*cookies));
+ ref_ctr_offsets = calloc(target_cnt, sizeof(*ref_ctr_offsets));
+
+ if (!offsets || !ref_ctr_offsets || !cookies) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ } else {
+ link->uprobes = calloc(target_cnt, sizeof(*link->uprobes));
+ if (!link->uprobes) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+
+ for (i = 0; i < target_cnt; i++) {
+ struct usdt_target *target = &targets[i];
+ struct bpf_link *uprobe_link;
+ bool is_new;
+ int spec_id;
+
+ /* Spec ID can be either reused or newly allocated. If it is
+ * newly allocated, we'll need to fill out spec map, otherwise
+ * entire spec should be valid and can be just used by a new
+ * uprobe. We reuse spec when USDT arg spec is identical. We
+ * also never share specs between two different USDT
+ * attachments ("links"), so all the reused specs already
+ * share USDT cookie value implicitly.
+ */
+ err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new);
+ if (err)
+ goto err_out;
+
+ if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) {
+ err = -errno;
+ pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n",
+ spec_id, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+ if (!man->has_bpf_cookie &&
+ bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) {
+ err = -errno;
+ if (err == -EEXIST) {
+ pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n",
+ spec_id, usdt_provider, usdt_name, path);
+ } else {
+ pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n",
+ target->abs_ip, spec_id, usdt_provider, usdt_name,
+ path, err);
+ }
+ goto err_out;
+ }
+
+ if (man->has_uprobe_multi) {
+ offsets[i] = target->rel_ip;
+ ref_ctr_offsets[i] = target->sema_off;
+ cookies[i] = spec_id;
+ } else {
+ opts.ref_ctr_offset = target->sema_off;
+ opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0;
+ uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path,
+ target->rel_ip, &opts);
+ err = libbpf_get_error(uprobe_link);
+ if (err) {
+ pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n",
+ i, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+
+ link->uprobes[i].link = uprobe_link;
+ link->uprobes[i].abs_ip = target->abs_ip;
+ link->uprobe_cnt++;
+ }
+ }
+
+ if (man->has_uprobe_multi) {
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts_multi,
+ .ref_ctr_offsets = ref_ctr_offsets,
+ .offsets = offsets,
+ .cookies = cookies,
+ .cnt = target_cnt,
+ );
+
+ link->multi_link = bpf_program__attach_uprobe_multi(prog, pid, path,
+ NULL, &opts_multi);
+ if (!link->multi_link) {
+ err = -errno;
+ pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n",
+ usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+
+ free(offsets);
+ free(ref_ctr_offsets);
+ free(cookies);
+ }
+
+ free(targets);
+ hashmap__free(specs_hash);
+ elf_close(&elf_fd);
+ return &link->link;
+
+err_out:
+ free(offsets);
+ free(ref_ctr_offsets);
+ free(cookies);
+
+ if (link)
+ bpf_link__destroy(&link->link);
+ free(targets);
+ hashmap__free(specs_hash);
+ elf_close(&elf_fd);
+ return libbpf_err_ptr(err);
+}
+
+/* Parse out USDT ELF note from '.note.stapsdt' section.
+ * Logic inspired by perf's code.
+ */
+static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
+ const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *note)
+{
+ const char *provider, *name, *args;
+ long addrs[3];
+ size_t len;
+
+ /* sanity check USDT note name and type first */
+ if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0)
+ return -EINVAL;
+ if (nhdr->n_type != USDT_NOTE_TYPE)
+ return -EINVAL;
+
+ /* sanity check USDT note contents ("description" in ELF terminology) */
+ len = nhdr->n_descsz;
+ data = data + desc_off;
+
+ /* +3 is the very minimum required to store three empty strings */
+ if (len < sizeof(addrs) + 3)
+ return -EINVAL;
+
+ /* get location, base, and semaphore addrs */
+ memcpy(&addrs, data, sizeof(addrs));
+
+ /* parse string fields: provider, name, args */
+ provider = data + sizeof(addrs);
+
+ name = (const char *)memchr(provider, '\0', data + len - provider);
+ if (!name) /* non-zero-terminated provider */
+ return -EINVAL;
+ name++;
+ if (name >= data + len || *name == '\0') /* missing or empty name */
+ return -EINVAL;
+
+ args = memchr(name, '\0', data + len - name);
+ if (!args) /* non-zero-terminated name */
+ return -EINVAL;
+ ++args;
+ if (args >= data + len) /* missing arguments spec */
+ return -EINVAL;
+
+ note->provider = provider;
+ note->name = name;
+ if (*args == '\0' || *args == ':')
+ note->args = "";
+ else
+ note->args = args;
+ note->loc_addr = addrs[0];
+ note->base_addr = addrs[1];
+ note->sema_addr = addrs[2];
+
+ return 0;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie)
+{
+ struct usdt_arg_spec *arg;
+ const char *s;
+ int arg_sz, len;
+
+ spec->usdt_cookie = usdt_cookie;
+ spec->arg_cnt = 0;
+
+ s = note->args;
+ while (s[0]) {
+ if (spec->arg_cnt >= USDT_MAX_ARG_CNT) {
+ pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n",
+ USDT_MAX_ARG_CNT, note->provider, note->name, note->args);
+ return -E2BIG;
+ }
+
+ arg = &spec->args[spec->arg_cnt];
+ len = parse_usdt_arg(s, spec->arg_cnt, arg, &arg_sz);
+ if (len < 0)
+ return len;
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ spec->arg_cnt, s, arg_sz);
+ return -EINVAL;
+ }
+
+ s += len;
+ spec->arg_cnt++;
+ }
+
+ return 0;
+}
+
+/* Architecture-specific logic for parsing USDT argument location specs */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *names[4];
+ size_t pt_regs_off;
+ } reg_map[] = {
+#ifdef __x86_64__
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64)
+#else
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32)
+#endif
+ { {"rip", "eip", "", ""}, reg_off(rip, eip) },
+ { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) },
+ { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) },
+ { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) },
+ { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) },
+ { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) },
+ { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) },
+ { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) },
+ { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) },
+#undef reg_off
+#ifdef __x86_64__
+ { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) },
+ { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) },
+ { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) },
+ { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) },
+ { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) },
+ { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) },
+ { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) },
+ { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) },
+#endif
+ };
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) {
+ if (strcmp(reg_name, reg_map[i].names[j]) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ char reg_name[16];
+ int len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ /* Memory dereference case, e.g., -4@-20(%rbp) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", arg_sz, reg_name, &len) == 2) {
+ /* Memory dereference case without offset, e.g., 8@(%rsp) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
+ /* Register read case, e.g., -4@%eax */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ $%ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@$71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__s390x__)
+
+/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ unsigned int reg;
+ int len;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", arg_sz, &off, &reg, &len) == 3) {
+ /* Memory dereference case, e.g., -2@-28(%r15) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %%r%u %n", arg_sz, &reg, &len) == 2) {
+ /* Register read case, e.g., -8@%r0 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__aarch64__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ int reg_num;
+
+ if (sscanf(reg_name, "x%d", &reg_num) == 1) {
+ if (reg_num >= 0 && reg_num < 31)
+ return offsetof(struct user_pt_regs, regs[reg_num]);
+ } else if (strcmp(reg_name, "sp") == 0) {
+ return offsetof(struct user_pt_regs, sp);
+ }
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ char reg_name[16];
+ int len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , %ld ] %n", arg_sz, reg_name, &off, &len) == 3) {
+ /* Memory dereference case, e.g., -4@[sp, 96] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) {
+ /* Memory dereference case, e.g., -4@[sp] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@x4 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__riscv)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *name;
+ size_t pt_regs_off;
+ } reg_map[] = {
+ { "ra", offsetof(struct user_regs_struct, ra) },
+ { "sp", offsetof(struct user_regs_struct, sp) },
+ { "gp", offsetof(struct user_regs_struct, gp) },
+ { "tp", offsetof(struct user_regs_struct, tp) },
+ { "a0", offsetof(struct user_regs_struct, a0) },
+ { "a1", offsetof(struct user_regs_struct, a1) },
+ { "a2", offsetof(struct user_regs_struct, a2) },
+ { "a3", offsetof(struct user_regs_struct, a3) },
+ { "a4", offsetof(struct user_regs_struct, a4) },
+ { "a5", offsetof(struct user_regs_struct, a5) },
+ { "a6", offsetof(struct user_regs_struct, a6) },
+ { "a7", offsetof(struct user_regs_struct, a7) },
+ { "s0", offsetof(struct user_regs_struct, s0) },
+ { "s1", offsetof(struct user_regs_struct, s1) },
+ { "s2", offsetof(struct user_regs_struct, s2) },
+ { "s3", offsetof(struct user_regs_struct, s3) },
+ { "s4", offsetof(struct user_regs_struct, s4) },
+ { "s5", offsetof(struct user_regs_struct, s5) },
+ { "s6", offsetof(struct user_regs_struct, s6) },
+ { "s7", offsetof(struct user_regs_struct, s7) },
+ { "s8", offsetof(struct user_regs_struct, rv_s8) },
+ { "s9", offsetof(struct user_regs_struct, s9) },
+ { "s10", offsetof(struct user_regs_struct, s10) },
+ { "s11", offsetof(struct user_regs_struct, s11) },
+ { "t0", offsetof(struct user_regs_struct, t0) },
+ { "t1", offsetof(struct user_regs_struct, t1) },
+ { "t2", offsetof(struct user_regs_struct, t2) },
+ { "t3", offsetof(struct user_regs_struct, t3) },
+ { "t4", offsetof(struct user_regs_struct, t4) },
+ { "t5", offsetof(struct user_regs_struct, t5) },
+ { "t6", offsetof(struct user_regs_struct, t6) },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ if (strcmp(reg_name, reg_map[i].name) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ char reg_name[16];
+ int len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ /* Memory dereference case, e.g., -8@-88(s0) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@a1 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__arm__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *name;
+ size_t pt_regs_off;
+ } reg_map[] = {
+ { "r0", offsetof(struct pt_regs, uregs[0]) },
+ { "r1", offsetof(struct pt_regs, uregs[1]) },
+ { "r2", offsetof(struct pt_regs, uregs[2]) },
+ { "r3", offsetof(struct pt_regs, uregs[3]) },
+ { "r4", offsetof(struct pt_regs, uregs[4]) },
+ { "r5", offsetof(struct pt_regs, uregs[5]) },
+ { "r6", offsetof(struct pt_regs, uregs[6]) },
+ { "r7", offsetof(struct pt_regs, uregs[7]) },
+ { "r8", offsetof(struct pt_regs, uregs[8]) },
+ { "r9", offsetof(struct pt_regs, uregs[9]) },
+ { "r10", offsetof(struct pt_regs, uregs[10]) },
+ { "fp", offsetof(struct pt_regs, uregs[11]) },
+ { "ip", offsetof(struct pt_regs, uregs[12]) },
+ { "sp", offsetof(struct pt_regs, uregs[13]) },
+ { "lr", offsetof(struct pt_regs, uregs[14]) },
+ { "pc", offsetof(struct pt_regs, uregs[15]) },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ if (strcmp(reg_name, reg_map[i].name) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ char reg_name[16];
+ int len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , #%ld ] %n",
+ arg_sz, reg_name, &off, &len) == 3) {
+ /* Memory dereference case, e.g., -4@[fp, #96] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) {
+ /* Memory dereference case, e.g., -4@[sp] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ #%ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@#5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@r4 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#else
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n");
+ return -ENOTSUP;
+}
+
+#endif
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
deleted file mode 100644
index e3c98c007825..000000000000
--- a/tools/lib/bpf/xsk.c
+++ /dev/null
@@ -1,927 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * AF_XDP user-space access library.
- *
- * Copyright(c) 2018 - 2019 Intel Corporation.
- *
- * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
- */
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <arpa/inet.h>
-#include <asm/barrier.h>
-#include <linux/compiler.h>
-#include <linux/ethtool.h>
-#include <linux/filter.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_xdp.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/sockios.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-
-#include "bpf.h"
-#include "libbpf.h"
-#include "libbpf_internal.h"
-#include "xsk.h"
-
-#ifndef SOL_XDP
- #define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
- #define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
- #define PF_XDP AF_XDP
-#endif
-
-struct xsk_umem {
- struct xsk_ring_prod *fill_save;
- struct xsk_ring_cons *comp_save;
- char *umem_area;
- struct xsk_umem_config config;
- int fd;
- int refcount;
- struct list_head ctx_list;
-};
-
-struct xsk_ctx {
- struct xsk_ring_prod *fill;
- struct xsk_ring_cons *comp;
- __u32 queue_id;
- struct xsk_umem *umem;
- int refcount;
- int ifindex;
- struct list_head list;
- int prog_fd;
- int xsks_map_fd;
- char ifname[IFNAMSIZ];
-};
-
-struct xsk_socket {
- struct xsk_ring_cons *rx;
- struct xsk_ring_prod *tx;
- __u64 outstanding_tx;
- struct xsk_ctx *ctx;
- struct xsk_socket_config config;
- int fd;
-};
-
-struct xsk_nl_info {
- bool xdp_prog_attached;
- int ifindex;
- int fd;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_ring_offset_v1 {
- __u64 producer;
- __u64 consumer;
- __u64 desc;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_mmap_offsets_v1 {
- struct xdp_ring_offset_v1 rx;
- struct xdp_ring_offset_v1 tx;
- struct xdp_ring_offset_v1 fr;
- struct xdp_ring_offset_v1 cr;
-};
-
-int xsk_umem__fd(const struct xsk_umem *umem)
-{
- return umem ? umem->fd : -EINVAL;
-}
-
-int xsk_socket__fd(const struct xsk_socket *xsk)
-{
- return xsk ? xsk->fd : -EINVAL;
-}
-
-static bool xsk_page_aligned(void *buffer)
-{
- unsigned long addr = (unsigned long)buffer;
-
- return !(addr & (getpagesize() - 1));
-}
-
-static void xsk_set_umem_config(struct xsk_umem_config *cfg,
- const struct xsk_umem_config *usr_cfg)
-{
- if (!usr_cfg) {
- cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
- cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
- cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
- return;
- }
-
- cfg->fill_size = usr_cfg->fill_size;
- cfg->comp_size = usr_cfg->comp_size;
- cfg->frame_size = usr_cfg->frame_size;
- cfg->frame_headroom = usr_cfg->frame_headroom;
- cfg->flags = usr_cfg->flags;
-}
-
-static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
- const struct xsk_socket_config *usr_cfg)
-{
- if (!usr_cfg) {
- cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg->libbpf_flags = 0;
- cfg->xdp_flags = 0;
- cfg->bind_flags = 0;
- return 0;
- }
-
- if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
- return -EINVAL;
-
- cfg->rx_size = usr_cfg->rx_size;
- cfg->tx_size = usr_cfg->tx_size;
- cfg->libbpf_flags = usr_cfg->libbpf_flags;
- cfg->xdp_flags = usr_cfg->xdp_flags;
- cfg->bind_flags = usr_cfg->bind_flags;
-
- return 0;
-}
-
-static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
-{
- struct xdp_mmap_offsets_v1 off_v1;
-
- /* getsockopt on a kernel <= 5.3 has no flags fields.
- * Copy over the offsets to the correct places in the >=5.4 format
- * and put the flags where they would have been on that kernel.
- */
- memcpy(&off_v1, off, sizeof(off_v1));
-
- off->rx.producer = off_v1.rx.producer;
- off->rx.consumer = off_v1.rx.consumer;
- off->rx.desc = off_v1.rx.desc;
- off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
-
- off->tx.producer = off_v1.tx.producer;
- off->tx.consumer = off_v1.tx.consumer;
- off->tx.desc = off_v1.tx.desc;
- off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
-
- off->fr.producer = off_v1.fr.producer;
- off->fr.consumer = off_v1.fr.consumer;
- off->fr.desc = off_v1.fr.desc;
- off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
-
- off->cr.producer = off_v1.cr.producer;
- off->cr.consumer = off_v1.cr.consumer;
- off->cr.desc = off_v1.cr.desc;
- off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
-}
-
-static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
-{
- socklen_t optlen;
- int err;
-
- optlen = sizeof(*off);
- err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
- if (err)
- return err;
-
- if (optlen == sizeof(*off))
- return 0;
-
- if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
- xsk_mmap_offsets_v1(off);
- return 0;
- }
-
- return -EINVAL;
-}
-
-static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp)
-{
- struct xdp_mmap_offsets off;
- void *map;
- int err;
-
- err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
- &umem->config.fill_size,
- sizeof(umem->config.fill_size));
- if (err)
- return -errno;
-
- err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
- &umem->config.comp_size,
- sizeof(umem->config.comp_size));
- if (err)
- return -errno;
-
- err = xsk_get_mmap_offsets(fd, &off);
- if (err)
- return -errno;
-
- map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
- XDP_UMEM_PGOFF_FILL_RING);
- if (map == MAP_FAILED)
- return -errno;
-
- fill->mask = umem->config.fill_size - 1;
- fill->size = umem->config.fill_size;
- fill->producer = map + off.fr.producer;
- fill->consumer = map + off.fr.consumer;
- fill->flags = map + off.fr.flags;
- fill->ring = map + off.fr.desc;
- fill->cached_cons = umem->config.fill_size;
-
- map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
- XDP_UMEM_PGOFF_COMPLETION_RING);
- if (map == MAP_FAILED) {
- err = -errno;
- goto out_mmap;
- }
-
- comp->mask = umem->config.comp_size - 1;
- comp->size = umem->config.comp_size;
- comp->producer = map + off.cr.producer;
- comp->consumer = map + off.cr.consumer;
- comp->flags = map + off.cr.flags;
- comp->ring = map + off.cr.desc;
-
- return 0;
-
-out_mmap:
- munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
- return err;
-}
-
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
- __u64 size, struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *usr_config)
-{
- struct xdp_umem_reg mr;
- struct xsk_umem *umem;
- int err;
-
- if (!umem_area || !umem_ptr || !fill || !comp)
- return -EFAULT;
- if (!size && !xsk_page_aligned(umem_area))
- return -EINVAL;
-
- umem = calloc(1, sizeof(*umem));
- if (!umem)
- return -ENOMEM;
-
- umem->fd = socket(AF_XDP, SOCK_RAW, 0);
- if (umem->fd < 0) {
- err = -errno;
- goto out_umem_alloc;
- }
-
- umem->umem_area = umem_area;
- INIT_LIST_HEAD(&umem->ctx_list);
- xsk_set_umem_config(&umem->config, usr_config);
-
- memset(&mr, 0, sizeof(mr));
- mr.addr = (uintptr_t)umem_area;
- mr.len = size;
- mr.chunk_size = umem->config.frame_size;
- mr.headroom = umem->config.frame_headroom;
- mr.flags = umem->config.flags;
-
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
- if (err) {
- err = -errno;
- goto out_socket;
- }
-
- err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
- if (err)
- goto out_socket;
-
- umem->fill_save = fill;
- umem->comp_save = comp;
- *umem_ptr = umem;
- return 0;
-
-out_socket:
- close(umem->fd);
-out_umem_alloc:
- free(umem);
- return err;
-}
-
-struct xsk_umem_config_v1 {
- __u32 fill_size;
- __u32 comp_size;
- __u32 frame_size;
- __u32 frame_headroom;
-};
-
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
- __u64 size, struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *usr_config)
-{
- struct xsk_umem_config config;
-
- memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
- config.flags = 0;
-
- return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
- &config);
-}
-COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
-DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
-
-static int xsk_load_xdp_prog(struct xsk_socket *xsk)
-{
- static const int log_buf_size = 16 * 1024;
- struct xsk_ctx *ctx = xsk->ctx;
- char log_buf[log_buf_size];
- int err, prog_fd;
-
- /* This is the C-program:
- * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
- * {
- * int ret, index = ctx->rx_queue_index;
- *
- * // A set entry here means that the correspnding queue_id
- * // has an active AF_XDP socket bound to it.
- * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
- * if (ret > 0)
- * return ret;
- *
- * // Fallback for pre-5.3 kernels, not supporting default
- * // action in the flags parameter.
- * if (bpf_map_lookup_elem(&xsks_map, &index))
- * return bpf_redirect_map(&xsks_map, index, 0);
- * return XDP_PASS;
- * }
- */
- struct bpf_insn prog[] = {
- /* r2 = *(u32 *)(r1 + 16) */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
- /* *(u32 *)(r10 - 4) = r2 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* r3 = XDP_PASS */
- BPF_MOV64_IMM(BPF_REG_3, 2),
- /* call bpf_redirect_map */
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- /* if w0 != 0 goto pc+13 */
- BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
- /* r2 = r10 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- /* r2 += -4 */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* call bpf_map_lookup_elem */
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- /* r1 = r0 */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- /* r0 = XDP_PASS */
- BPF_MOV64_IMM(BPF_REG_0, 2),
- /* if r1 == 0 goto pc+5 */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
- /* r2 = *(u32 *)(r10 - 4) */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* r3 = 0 */
- BPF_MOV64_IMM(BPF_REG_3, 0),
- /* call bpf_redirect_map */
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- /* The jumps are to this instruction */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
- "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
- log_buf_size);
- if (prog_fd < 0) {
- pr_warn("BPF log buffer:\n%s", log_buf);
- return prog_fd;
- }
-
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
- xsk->config.xdp_flags);
- if (err) {
- close(prog_fd);
- return err;
- }
-
- ctx->prog_fd = prog_fd;
- return 0;
-}
-
-static int xsk_get_max_queues(struct xsk_socket *xsk)
-{
- struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
- struct xsk_ctx *ctx = xsk->ctx;
- struct ifreq ifr = {};
- int fd, err, ret;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return -errno;
-
- ifr.ifr_data = (void *)&channels;
- memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
- ifr.ifr_name[IFNAMSIZ - 1] = '\0';
- err = ioctl(fd, SIOCETHTOOL, &ifr);
- if (err && errno != EOPNOTSUPP) {
- ret = -errno;
- goto out;
- }
-
- if (err) {
- /* If the device says it has no channels, then all traffic
- * is sent to a single stream, so max queues = 1.
- */
- ret = 1;
- } else {
- /* Take the max of rx, tx, combined. Drivers return
- * the number of channels in different ways.
- */
- ret = max(channels.max_rx, channels.max_tx);
- ret = max(ret, (int)channels.max_combined);
- }
-
-out:
- close(fd);
- return ret;
-}
-
-static int xsk_create_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
- int max_queues;
- int fd;
-
- max_queues = xsk_get_max_queues(xsk);
- if (max_queues < 0)
- return max_queues;
-
- fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
- sizeof(int), sizeof(int), max_queues, 0);
- if (fd < 0)
- return fd;
-
- ctx->xsks_map_fd = fd;
-
- return 0;
-}
-
-static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
-
- bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
- close(ctx->xsks_map_fd);
-}
-
-static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
-{
- __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
- __u32 map_len = sizeof(struct bpf_map_info);
- struct bpf_prog_info prog_info = {};
- struct xsk_ctx *ctx = xsk->ctx;
- struct bpf_map_info map_info;
- int fd, err;
-
- err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
- if (err)
- return err;
-
- num_maps = prog_info.nr_map_ids;
-
- map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
- if (!map_ids)
- return -ENOMEM;
-
- memset(&prog_info, 0, prog_len);
- prog_info.nr_map_ids = num_maps;
- prog_info.map_ids = (__u64)(unsigned long)map_ids;
-
- err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
- if (err)
- goto out_map_ids;
-
- ctx->xsks_map_fd = -1;
-
- for (i = 0; i < prog_info.nr_map_ids; i++) {
- fd = bpf_map_get_fd_by_id(map_ids[i]);
- if (fd < 0)
- continue;
-
- err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
- if (err) {
- close(fd);
- continue;
- }
-
- if (!strcmp(map_info.name, "xsks_map")) {
- ctx->xsks_map_fd = fd;
- continue;
- }
-
- close(fd);
- }
-
- err = 0;
- if (ctx->xsks_map_fd == -1)
- err = -ENOENT;
-
-out_map_ids:
- free(map_ids);
- return err;
-}
-
-static int xsk_set_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
-
- return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
- &xsk->fd, 0);
-}
-
-static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
- int err;
-
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
- xsk->config.xdp_flags);
- if (err)
- return err;
-
- if (!prog_id) {
- err = xsk_create_bpf_maps(xsk);
- if (err)
- return err;
-
- err = xsk_load_xdp_prog(xsk);
- if (err) {
- xsk_delete_bpf_maps(xsk);
- return err;
- }
- } else {
- ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (ctx->prog_fd < 0)
- return -errno;
- err = xsk_lookup_bpf_maps(xsk);
- if (err) {
- close(ctx->prog_fd);
- return err;
- }
- }
-
- if (xsk->rx)
- err = xsk_set_bpf_maps(xsk);
- if (err) {
- xsk_delete_bpf_maps(xsk);
- close(ctx->prog_fd);
- return err;
- }
-
- return 0;
-}
-
-static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
- __u32 queue_id)
-{
- struct xsk_ctx *ctx;
-
- if (list_empty(&umem->ctx_list))
- return NULL;
-
- list_for_each_entry(ctx, &umem->ctx_list, list) {
- if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
- ctx->refcount++;
- return ctx;
- }
- }
-
- return NULL;
-}
-
-static void xsk_put_ctx(struct xsk_ctx *ctx)
-{
- struct xsk_umem *umem = ctx->umem;
- struct xdp_mmap_offsets off;
- int err;
-
- if (--ctx->refcount == 0) {
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (!err) {
- munmap(ctx->fill->ring - off.fr.desc,
- off.fr.desc + umem->config.fill_size *
- sizeof(__u64));
- munmap(ctx->comp->ring - off.cr.desc,
- off.cr.desc + umem->config.comp_size *
- sizeof(__u64));
- }
-
- list_del(&ctx->list);
- free(ctx);
- }
-}
-
-static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
- struct xsk_umem *umem, int ifindex,
- const char *ifname, __u32 queue_id,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp)
-{
- struct xsk_ctx *ctx;
- int err;
-
- ctx = calloc(1, sizeof(*ctx));
- if (!ctx)
- return NULL;
-
- if (!umem->fill_save) {
- err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
- if (err) {
- free(ctx);
- return NULL;
- }
- } else if (umem->fill_save != fill || umem->comp_save != comp) {
- /* Copy over rings to new structs. */
- memcpy(fill, umem->fill_save, sizeof(*fill));
- memcpy(comp, umem->comp_save, sizeof(*comp));
- }
-
- ctx->ifindex = ifindex;
- ctx->refcount = 1;
- ctx->umem = umem;
- ctx->queue_id = queue_id;
- memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
- ctx->ifname[IFNAMSIZ - 1] = '\0';
-
- umem->fill_save = NULL;
- umem->comp_save = NULL;
- ctx->fill = fill;
- ctx->comp = comp;
- list_add(&ctx->list, &umem->ctx_list);
- return ctx;
-}
-
-int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
- const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx,
- struct xsk_ring_prod *tx,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_socket_config *usr_config)
-{
- void *rx_map = NULL, *tx_map = NULL;
- struct sockaddr_xdp sxdp = {};
- struct xdp_mmap_offsets off;
- struct xsk_socket *xsk;
- struct xsk_ctx *ctx;
- int err, ifindex;
-
- if (!umem || !xsk_ptr || !(rx || tx))
- return -EFAULT;
-
- xsk = calloc(1, sizeof(*xsk));
- if (!xsk)
- return -ENOMEM;
-
- err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
- if (err)
- goto out_xsk_alloc;
-
- xsk->outstanding_tx = 0;
- ifindex = if_nametoindex(ifname);
- if (!ifindex) {
- err = -errno;
- goto out_xsk_alloc;
- }
-
- if (umem->refcount++ > 0) {
- xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
- if (xsk->fd < 0) {
- err = -errno;
- goto out_xsk_alloc;
- }
- } else {
- xsk->fd = umem->fd;
- }
-
- ctx = xsk_get_ctx(umem, ifindex, queue_id);
- if (!ctx) {
- if (!fill || !comp) {
- err = -EFAULT;
- goto out_socket;
- }
-
- ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
- fill, comp);
- if (!ctx) {
- err = -ENOMEM;
- goto out_socket;
- }
- }
- xsk->ctx = ctx;
-
- if (rx) {
- err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
- &xsk->config.rx_size,
- sizeof(xsk->config.rx_size));
- if (err) {
- err = -errno;
- goto out_put_ctx;
- }
- }
- if (tx) {
- err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
- &xsk->config.tx_size,
- sizeof(xsk->config.tx_size));
- if (err) {
- err = -errno;
- goto out_put_ctx;
- }
- }
-
- err = xsk_get_mmap_offsets(xsk->fd, &off);
- if (err) {
- err = -errno;
- goto out_put_ctx;
- }
-
- if (rx) {
- rx_map = mmap(NULL, off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_RX_RING);
- if (rx_map == MAP_FAILED) {
- err = -errno;
- goto out_put_ctx;
- }
-
- rx->mask = xsk->config.rx_size - 1;
- rx->size = xsk->config.rx_size;
- rx->producer = rx_map + off.rx.producer;
- rx->consumer = rx_map + off.rx.consumer;
- rx->flags = rx_map + off.rx.flags;
- rx->ring = rx_map + off.rx.desc;
- rx->cached_prod = *rx->producer;
- rx->cached_cons = *rx->consumer;
- }
- xsk->rx = rx;
-
- if (tx) {
- tx_map = mmap(NULL, off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_TX_RING);
- if (tx_map == MAP_FAILED) {
- err = -errno;
- goto out_mmap_rx;
- }
-
- tx->mask = xsk->config.tx_size - 1;
- tx->size = xsk->config.tx_size;
- tx->producer = tx_map + off.tx.producer;
- tx->consumer = tx_map + off.tx.consumer;
- tx->flags = tx_map + off.tx.flags;
- tx->ring = tx_map + off.tx.desc;
- tx->cached_prod = *tx->producer;
- /* cached_cons is r->size bigger than the real consumer pointer
- * See xsk_prod_nb_free
- */
- tx->cached_cons = *tx->consumer + xsk->config.tx_size;
- }
- xsk->tx = tx;
-
- sxdp.sxdp_family = PF_XDP;
- sxdp.sxdp_ifindex = ctx->ifindex;
- sxdp.sxdp_queue_id = ctx->queue_id;
- if (umem->refcount > 1) {
- sxdp.sxdp_flags |= XDP_SHARED_UMEM;
- sxdp.sxdp_shared_umem_fd = umem->fd;
- } else {
- sxdp.sxdp_flags = xsk->config.bind_flags;
- }
-
- err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
- if (err) {
- err = -errno;
- goto out_mmap_tx;
- }
-
- ctx->prog_fd = -1;
-
- if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
- err = xsk_setup_xdp_prog(xsk);
- if (err)
- goto out_mmap_tx;
- }
-
- *xsk_ptr = xsk;
- return 0;
-
-out_mmap_tx:
- if (tx)
- munmap(tx_map, off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc));
-out_mmap_rx:
- if (rx)
- munmap(rx_map, off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc));
-out_put_ctx:
- xsk_put_ctx(ctx);
-out_socket:
- if (--umem->refcount)
- close(xsk->fd);
-out_xsk_alloc:
- free(xsk);
- return err;
-}
-
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
- const struct xsk_socket_config *usr_config)
-{
- return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
- rx, tx, umem->fill_save,
- umem->comp_save, usr_config);
-}
-
-int xsk_umem__delete(struct xsk_umem *umem)
-{
- if (!umem)
- return 0;
-
- if (umem->refcount)
- return -EBUSY;
-
- close(umem->fd);
- free(umem);
-
- return 0;
-}
-
-void xsk_socket__delete(struct xsk_socket *xsk)
-{
- size_t desc_sz = sizeof(struct xdp_desc);
- struct xsk_ctx *ctx = xsk->ctx;
- struct xdp_mmap_offsets off;
- int err;
-
- if (!xsk)
- return;
-
- if (ctx->prog_fd != -1) {
- xsk_delete_bpf_maps(xsk);
- close(ctx->prog_fd);
- }
-
- err = xsk_get_mmap_offsets(xsk->fd, &off);
- if (!err) {
- if (xsk->rx) {
- munmap(xsk->rx->ring - off.rx.desc,
- off.rx.desc + xsk->config.rx_size * desc_sz);
- }
- if (xsk->tx) {
- munmap(xsk->tx->ring - off.tx.desc,
- off.tx.desc + xsk->config.tx_size * desc_sz);
- }
- }
-
- xsk_put_ctx(ctx);
-
- ctx->umem->refcount--;
- /* Do not close an fd that also has an associated umem connected
- * to it.
- */
- if (xsk->fd != ctx->umem->fd)
- close(xsk->fd);
- free(xsk);
-}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
deleted file mode 100644
index 1069c46364ff..000000000000
--- a/tools/lib/bpf/xsk.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/*
- * AF_XDP user-space access library.
- *
- * Copyright(c) 2018 - 2019 Intel Corporation.
- *
- * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
- */
-
-#ifndef __LIBBPF_XSK_H
-#define __LIBBPF_XSK_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <linux/if_xdp.h>
-
-#include "libbpf.h"
-#include "libbpf_util.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Do not access these members directly. Use the functions below. */
-#define DEFINE_XSK_RING(name) \
-struct name { \
- __u32 cached_prod; \
- __u32 cached_cons; \
- __u32 mask; \
- __u32 size; \
- __u32 *producer; \
- __u32 *consumer; \
- void *ring; \
- __u32 *flags; \
-}
-
-DEFINE_XSK_RING(xsk_ring_prod);
-DEFINE_XSK_RING(xsk_ring_cons);
-
-/* For a detailed explanation on the memory barriers associated with the
- * ring, please take a look at net/xdp/xsk_queue.h.
- */
-
-struct xsk_umem;
-struct xsk_socket;
-
-static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
- __u32 idx)
-{
- __u64 *addrs = (__u64 *)fill->ring;
-
- return &addrs[idx & fill->mask];
-}
-
-static inline const __u64 *
-xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
-{
- const __u64 *addrs = (const __u64 *)comp->ring;
-
- return &addrs[idx & comp->mask];
-}
-
-static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
- __u32 idx)
-{
- struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
-
- return &descs[idx & tx->mask];
-}
-
-static inline const struct xdp_desc *
-xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
-{
- const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
-
- return &descs[idx & rx->mask];
-}
-
-static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
-{
- return *r->flags & XDP_RING_NEED_WAKEUP;
-}
-
-static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
-{
- __u32 free_entries = r->cached_cons - r->cached_prod;
-
- if (free_entries >= nb)
- return free_entries;
-
- /* Refresh the local tail pointer.
- * cached_cons is r->size bigger than the real consumer pointer so
- * that this addition can be avoided in the more frequently
- * executed code that computs free_entries in the beginning of
- * this function. Without this optimization it whould have been
- * free_entries = r->cached_prod - r->cached_cons + r->size.
- */
- r->cached_cons = *r->consumer + r->size;
-
- return r->cached_cons - r->cached_prod;
-}
-
-static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
-{
- __u32 entries = r->cached_prod - r->cached_cons;
-
- if (entries == 0) {
- r->cached_prod = *r->producer;
- entries = r->cached_prod - r->cached_cons;
- }
-
- return (entries > nb) ? nb : entries;
-}
-
-static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
- size_t nb, __u32 *idx)
-{
- if (xsk_prod_nb_free(prod, nb) < nb)
- return 0;
-
- *idx = prod->cached_prod;
- prod->cached_prod += nb;
-
- return nb;
-}
-
-static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
-{
- /* Make sure everything has been written to the ring before indicating
- * this to the kernel by writing the producer pointer.
- */
- libbpf_smp_wmb();
-
- *prod->producer += nb;
-}
-
-static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
- size_t nb, __u32 *idx)
-{
- size_t entries = xsk_cons_nb_avail(cons, nb);
-
- if (entries > 0) {
- /* Make sure we do not speculatively read the data before
- * we have received the packet buffers from the ring.
- */
- libbpf_smp_rmb();
-
- *idx = cons->cached_cons;
- cons->cached_cons += entries;
- }
-
- return entries;
-}
-
-static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
-{
- /* Make sure data has been read before indicating we are done
- * with the entries by updating the consumer pointer.
- */
- libbpf_smp_rwmb();
-
- *cons->consumer += nb;
-}
-
-static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-{
- return &((char *)umem_area)[addr];
-}
-
-static inline __u64 xsk_umem__extract_addr(__u64 addr)
-{
- return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static inline __u64 xsk_umem__extract_offset(__u64 addr)
-{
- return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
-{
- return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
-}
-
-LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
-
-#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
-#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
-#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
-#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
-#define XSK_UMEM__DEFAULT_FLAGS 0
-
-struct xsk_umem_config {
- __u32 fill_size;
- __u32 comp_size;
- __u32 frame_size;
- __u32 frame_headroom;
- __u32 flags;
-};
-
-/* Flags for the libbpf_flags field. */
-#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
-
-struct xsk_socket_config {
- __u32 rx_size;
- __u32 tx_size;
- __u32 libbpf_flags;
- __u32 xdp_flags;
- __u16 bind_flags;
-};
-
-/* Set config to NULL to get the default configuration. */
-LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
- void *umem_area, __u64 size,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *config);
-LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
- void *umem_area, __u64 size,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *config);
-LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
- void *umem_area, __u64 size,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_umem_config *config);
-LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
- const char *ifname, __u32 queue_id,
- struct xsk_umem *umem,
- struct xsk_ring_cons *rx,
- struct xsk_ring_prod *tx,
- const struct xsk_socket_config *config);
-LIBBPF_API int
-xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
- const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx,
- struct xsk_ring_prod *tx,
- struct xsk_ring_prod *fill,
- struct xsk_ring_cons *comp,
- const struct xsk_socket_config *config);
-
-/* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __LIBBPF_XSK_H */
diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
new file mode 100644
index 000000000000..3f26d629b2b4
--- /dev/null
+++ b/tools/lib/bpf/zip.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Routines for dealing with .zip archives.
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libbpf_internal.h"
+#include "zip.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+
+/* Specification of ZIP file format can be found here:
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * For a high level overview of the structure of a ZIP file see
+ * sections 4.3.1 - 4.3.6.
+ *
+ * Data structures appearing in ZIP files do not contain any
+ * padding and they might be misaligned. To allow us to safely
+ * operate on pointers to such structures and their members, we
+ * declare the types as packed.
+ */
+
+#define END_OF_CD_RECORD_MAGIC 0x06054b50
+
+/* See section 4.3.16 of the spec. */
+struct end_of_cd_record {
+ /* Magic value equal to END_OF_CD_RECORD_MAGIC */
+ __u32 magic;
+
+ /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
+ * Zip archive might span multiple files (disks).
+ */
+ __u16 this_disk;
+
+ /* Number of the file containing the beginning of the central directory or
+ * 0xFFFF if ZIP64 archive.
+ */
+ __u16 cd_disk;
+
+ /* Number of central directory records on this disk or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records;
+
+ /* Number of central directory records on all disks or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records_total;
+
+ /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
+ __u32 cd_size;
+
+ /* Offset of the central directory from the beginning of the archive or
+ * 0xFFFFFFFF if ZIP64 archive.
+ */
+ __u32 cd_offset;
+
+ /* Length of comment data following end of central directory record. */
+ __u16 comment_length;
+
+ /* Up to 64k of arbitrary bytes. */
+ /* uint8_t comment[comment_length] */
+} __attribute__((packed));
+
+#define CD_FILE_HEADER_MAGIC 0x02014b50
+#define FLAG_ENCRYPTED (1 << 0)
+#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
+
+/* See section 4.3.12 of the spec. */
+struct cd_file_header {
+ /* Magic value equal to CD_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ __u16 version;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+ __u16 file_comment_length;
+ /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
+ __u16 disk;
+ __u16 internal_attributes;
+ __u32 external_attributes;
+ /* Offset from the start of the disk containing the local file header to the
+ * start of the local file header.
+ */
+ __u32 offset;
+} __attribute__((packed));
+
+#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
+
+/* See section 4.3.7 of the spec. */
+struct local_file_header {
+ /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+} __attribute__((packed));
+
+#pragma GCC diagnostic pop
+
+struct zip_archive {
+ void *data;
+ __u32 size;
+ __u32 cd_offset;
+ __u32 cd_records;
+};
+
+static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
+{
+ if (offset + size > archive->size || offset > offset + size)
+ return NULL;
+
+ return archive->data + offset;
+}
+
+/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
+ * archive uses features which are not supported.
+ */
+static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
+{
+ __u16 comment_length, cd_records;
+ struct end_of_cd_record *eocd;
+ __u32 cd_offset, cd_size;
+
+ eocd = check_access(archive, offset, sizeof(*eocd));
+ if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
+ return -EINVAL;
+
+ comment_length = eocd->comment_length;
+ if (offset + sizeof(*eocd) + comment_length != archive->size)
+ return -EINVAL;
+
+ cd_records = eocd->cd_records;
+ if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
+ /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
+ return -ENOTSUP;
+
+ cd_offset = eocd->cd_offset;
+ cd_size = eocd->cd_size;
+ if (!check_access(archive, cd_offset, cd_size))
+ return -EINVAL;
+
+ archive->cd_offset = cd_offset;
+ archive->cd_records = cd_records;
+ return 0;
+}
+
+static int find_cd(struct zip_archive *archive)
+{
+ int64_t limit, offset;
+ int rc = -EINVAL;
+
+ if (archive->size <= sizeof(struct end_of_cd_record))
+ return -EINVAL;
+
+ /* Because the end of central directory ends with a variable length array of
+ * up to 0xFFFF bytes we can't know exactly where it starts and need to
+ * search for it at the end of the file, scanning the (limit, offset] range.
+ */
+ offset = archive->size - sizeof(struct end_of_cd_record);
+ limit = (int64_t)offset - (1 << 16);
+
+ for (; offset >= 0 && offset > limit && rc != 0; offset--) {
+ rc = try_parse_end_of_cd(archive, offset);
+ if (rc == -ENOTSUP)
+ break;
+ }
+ return rc;
+}
+
+struct zip_archive *zip_archive_open(const char *path)
+{
+ struct zip_archive *archive;
+ int err, fd;
+ off_t size;
+ void *data;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return ERR_PTR(-errno);
+
+ size = lseek(fd, 0, SEEK_END);
+ if (size == (off_t)-1 || size > UINT32_MAX) {
+ close(fd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return ERR_PTR(err);
+
+ archive = malloc(sizeof(*archive));
+ if (!archive) {
+ munmap(data, size);
+ return ERR_PTR(-ENOMEM);
+ };
+
+ archive->data = data;
+ archive->size = size;
+
+ err = find_cd(archive);
+ if (err) {
+ munmap(data, size);
+ free(archive);
+ return ERR_PTR(err);
+ }
+
+ return archive;
+}
+
+void zip_archive_close(struct zip_archive *archive)
+{
+ munmap(archive->data, archive->size);
+ free(archive);
+}
+
+static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
+ __u32 offset)
+{
+ struct local_file_header *lfh;
+
+ lfh = check_access(archive, offset, sizeof(*lfh));
+ if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
+ return NULL;
+
+ return lfh;
+}
+
+static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
+{
+ struct local_file_header *lfh;
+ __u32 compressed_size;
+ const char *name;
+ void *data;
+
+ lfh = local_file_header_at_offset(archive, offset);
+ if (!lfh)
+ return -EINVAL;
+
+ offset += sizeof(*lfh);
+ if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
+ return -EINVAL;
+
+ name = check_access(archive, offset, lfh->file_name_length);
+ if (!name)
+ return -EINVAL;
+
+ offset += lfh->file_name_length;
+ if (!check_access(archive, offset, lfh->extra_field_length))
+ return -EINVAL;
+
+ offset += lfh->extra_field_length;
+ compressed_size = lfh->compressed_size;
+ data = check_access(archive, offset, compressed_size);
+ if (!data)
+ return -EINVAL;
+
+ out->compression = lfh->compression;
+ out->name_length = lfh->file_name_length;
+ out->name = name;
+ out->data = data;
+ out->data_length = compressed_size;
+ out->data_offset = offset;
+
+ return 0;
+}
+
+int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
+ struct zip_entry *out)
+{
+ size_t file_name_length = strlen(file_name);
+ __u32 i, offset = archive->cd_offset;
+
+ for (i = 0; i < archive->cd_records; ++i) {
+ __u16 cdfh_name_length, cdfh_flags;
+ struct cd_file_header *cdfh;
+ const char *cdfh_name;
+
+ cdfh = check_access(archive, offset, sizeof(*cdfh));
+ if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
+ return -EINVAL;
+
+ offset += sizeof(*cdfh);
+ cdfh_name_length = cdfh->file_name_length;
+ cdfh_name = check_access(archive, offset, cdfh_name_length);
+ if (!cdfh_name)
+ return -EINVAL;
+
+ cdfh_flags = cdfh->flags;
+ if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
+ (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
+ file_name_length == cdfh_name_length &&
+ memcmp(file_name, archive->data + offset, file_name_length) == 0) {
+ return get_entry_at_offset(archive, cdfh->offset, out);
+ }
+
+ offset += cdfh_name_length;
+ offset += cdfh->extra_field_length;
+ offset += cdfh->file_comment_length;
+ }
+
+ return -ENOENT;
+}
diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
new file mode 100644
index 000000000000..1c1bb21fba76
--- /dev/null
+++ b/tools/lib/bpf/zip.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LIBBPF_ZIP_H
+#define __LIBBPF_ZIP_H
+
+#include <linux/types.h>
+
+/* Represents an open zip archive.
+ * Only basic ZIP files are supported, in particular the following are not
+ * supported:
+ * - encryption
+ * - streaming
+ * - multi-part ZIP files
+ * - ZIP64
+ */
+struct zip_archive;
+
+/* Carries information on name, compression method, and data corresponding to a
+ * file in a zip archive.
+ */
+struct zip_entry {
+ /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
+ __u16 compression;
+
+ /* Non-null terminated name of the file. */
+ const char *name;
+ /* Length of the file name. */
+ __u16 name_length;
+
+ /* Pointer to the file data. */
+ const void *data;
+ /* Length of the file data. */
+ __u32 data_length;
+ /* Offset of the file data within the archive. */
+ __u32 data_offset;
+};
+
+/* Open a zip archive. Returns NULL in case of an error. */
+struct zip_archive *zip_archive_open(const char *path);
+
+/* Close a zip archive and release resources. */
+void zip_archive_close(struct zip_archive *archive);
+
+/* Look up an entry corresponding to a file in given zip archive. */
+int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
+
+#endif
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index ac37022e9486..6a3dc167d30e 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -18,74 +18,74 @@
#include <linux/bitmap.h>
#include <linux/kernel.h>
-#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
- !defined(find_next_and_bit)
-
/*
- * This is a common helper function for find_next_bit, find_next_zero_bit, and
- * find_next_and_bit. The differences are:
- * - The "invert" argument, which is XORed with each fetched word before
- * searching it for one bits.
- * - The optional "addr2", which is anded with "addr1" if present.
+ * Common helper for find_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
*/
-static inline unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert)
-{
- unsigned long tmp;
-
- if (unlikely(start >= nbits))
- return nbits;
-
- tmp = addr1[start / BITS_PER_LONG];
- if (addr2)
- tmp &= addr2[start / BITS_PER_LONG];
- tmp ^= invert;
-
- /* Handle 1st word. */
- tmp &= BITMAP_FIRST_WORD_MASK(start);
- start = round_down(start, BITS_PER_LONG);
-
- while (!tmp) {
- start += BITS_PER_LONG;
- if (start >= nbits)
- return nbits;
-
- tmp = addr1[start / BITS_PER_LONG];
- if (addr2)
- tmp &= addr2[start / BITS_PER_LONG];
- tmp ^= invert;
- }
+#define FIND_FIRST_BIT(FETCH, MUNGE, size) \
+({ \
+ unsigned long idx, val, sz = (size); \
+ \
+ for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { \
+ val = (FETCH); \
+ if (val) { \
+ sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); \
+ break; \
+ } \
+ } \
+ \
+ sz; \
+})
- return min(start + __ffs(tmp), nbits);
-}
-#endif
+/*
+ * Common helper for find_next_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ */
+#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
+({ \
+ unsigned long mask, idx, tmp, sz = (size), __start = (start); \
+ \
+ if (unlikely(__start >= sz)) \
+ goto out; \
+ \
+ mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
+ idx = __start / BITS_PER_LONG; \
+ \
+ for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
+ if ((idx + 1) * BITS_PER_LONG >= sz) \
+ goto out; \
+ idx++; \
+ } \
+ \
+ sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
+out: \
+ sz; \
+})
-#ifndef find_next_bit
+#ifndef find_first_bit
/*
- * Find the next set bit in a memory region.
+ * Find the first set bit in a memory region.
*/
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
+unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
- return _find_next_bit(addr, NULL, size, offset, 0UL);
+ return FIND_FIRST_BIT(addr[idx], /* nop */, size);
}
#endif
-#ifndef find_first_bit
+#ifndef find_first_and_bit
/*
- * Find the first set bit in a memory region.
+ * Find the first set bit in two memory regions.
*/
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
{
- unsigned long idx;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx])
- return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
- }
-
- return size;
+ return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size);
}
#endif
@@ -93,32 +93,31 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
/*
* Find the first cleared bit in a memory region.
*/
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long idx;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx] != ~0UL)
- return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
- }
-
- return size;
+ return FIND_FIRST_BIT(~addr[idx], /* nop */, size);
}
#endif
-#ifndef find_next_zero_bit
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
+#ifndef find_next_bit
+unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
{
- return _find_next_bit(addr, NULL, size, offset, ~0UL);
+ return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
}
#endif
#ifndef find_next_and_bit
-unsigned long find_next_and_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long size,
- unsigned long offset)
+unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start);
+}
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start)
{
- return _find_next_bit(addr1, addr2, size, offset, 0UL);
+ return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
}
#endif
diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c
new file mode 100644
index 000000000000..10c067e3a8d2
--- /dev/null
+++ b/tools/lib/list_sort.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/list_sort.h>
+#include <linux/list.h>
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+__attribute__((nonnull(2,3,4)))
+static struct list_head *merge(void *priv, list_cmp_func_t cmp,
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head *head, **tail = &head;
+
+ for (;;) {
+ /* if equal, take 'a' -- important for sort stability */
+ if (cmp(priv, a, b) <= 0) {
+ *tail = a;
+ tail = &a->next;
+ a = a->next;
+ if (!a) {
+ *tail = b;
+ break;
+ }
+ } else {
+ *tail = b;
+ tail = &b->next;
+ b = b->next;
+ if (!b) {
+ *tail = a;
+ break;
+ }
+ }
+ }
+ return head;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure. This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+__attribute__((nonnull(2,3,4,5)))
+static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head,
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head *tail = head;
+ u8 count = 0;
+
+ for (;;) {
+ /* if equal, take 'a' -- important for sort stability */
+ if (cmp(priv, a, b) <= 0) {
+ tail->next = a;
+ a->prev = tail;
+ tail = a;
+ a = a->next;
+ if (!a)
+ break;
+ } else {
+ tail->next = b;
+ b->prev = tail;
+ tail = b;
+ b = b->next;
+ if (!b) {
+ b = a;
+ break;
+ }
+ }
+ }
+
+ /* Finish linking remainder of list b on to tail */
+ tail->next = b;
+ do {
+ /*
+ * If the merge is highly unbalanced (e.g. the input is
+ * already sorted), this loop may run many iterations.
+ * Continue callbacks to the client even though no
+ * element comparison is needed, so the client's cmp()
+ * routine can invoke cond_resched() periodically.
+ */
+ if (unlikely(!++count))
+ cmp(priv, b, b);
+ b->prev = tail;
+ tail = b;
+ b = b->next;
+ } while (b);
+
+ /* And the final links to make a circular doubly-linked list */
+ tail->next = head;
+ head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * The comparison function @cmp must return > 0 if @a should sort after
+ * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
+ * sort before @b *or* their original order should be preserved. It is
+ * always called with the element that came first in the input in @a,
+ * and list_sort is a stable sort, so it is not necessary to distinguish
+ * the @a < @b and @a == @b cases.
+ *
+ * This is compatible with two styles of @cmp function:
+ * - The traditional style which returns <0 / =0 / >0, or
+ * - Returning a boolean 0/1.
+ * The latter offers a chance to save a few cycles in the comparison
+ * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
+ *
+ * A good way to write a multi-word comparison is::
+ *
+ * if (a->high != b->high)
+ * return a->high > b->high;
+ * if (a->middle != b->middle)
+ * return a->middle > b->middle;
+ * return a->low > b->low;
+ *
+ *
+ * This mergesort is as eager as possible while always performing at least
+ * 2:1 balanced merges. Given two pending sublists of size 2^k, they are
+ * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
+ *
+ * Thus, it will avoid cache thrashing as long as 3*2^k elements can
+ * fit into the cache. Not quite as good as a fully-eager bottom-up
+ * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
+ * the common case that everything fits into L1.
+ *
+ *
+ * The merging is controlled by "count", the number of elements in the
+ * pending lists. This is beautifully simple code, but rather subtle.
+ *
+ * Each time we increment "count", we set one bit (bit k) and clear
+ * bits k-1 .. 0. Each time this happens (except the very first time
+ * for each bit, when count increments to 2^k), we merge two lists of
+ * size 2^k into one list of size 2^(k+1).
+ *
+ * This merge happens exactly when the count reaches an odd multiple of
+ * 2^k, which is when we have 2^k elements pending in smaller lists,
+ * so it's safe to merge away two lists of size 2^k.
+ *
+ * After this happens twice, we have created two lists of size 2^(k+1),
+ * which will be merged into a list of size 2^(k+2) before we create
+ * a third list of size 2^(k+1), so there are never more than two pending.
+ *
+ * The number of pending lists of size 2^k is determined by the
+ * state of bit k of "count" plus two extra pieces of information:
+ *
+ * - The state of bit k-1 (when k == 0, consider bit -1 always set), and
+ * - Whether the higher-order bits are zero or non-zero (i.e.
+ * is count >= 2^(k+1)).
+ *
+ * There are six states we distinguish. "x" represents some arbitrary
+ * bits, and "y" represents some arbitrary non-zero bits:
+ * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k
+ * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k
+ * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k
+ * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * (merge and loop back to state 2)
+ *
+ * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
+ * bit k-1 is set while the more significant bits are non-zero) and
+ * merge them away in the 5->2 transition. Note in particular that just
+ * before the 5->2 transition, all lower-order bits are 11 (state 3),
+ * so there is one list of each smaller size.
+ *
+ * When we reach the end of the input, we merge all the pending
+ * lists, from smallest to largest. If you work through cases 2 to
+ * 5 above, you can see that the number of elements we merge with a list
+ * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
+ * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
+ */
+__attribute__((nonnull(2,3)))
+void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp)
+{
+ struct list_head *list = head->next, *pending = NULL;
+ size_t count = 0; /* Count of pending */
+
+ if (list == head->prev) /* Zero or one elements */
+ return;
+
+ /* Convert to a null-terminated singly-linked list. */
+ head->prev->next = NULL;
+
+ /*
+ * Data structure invariants:
+ * - All lists are singly linked and null-terminated; prev
+ * pointers are not maintained.
+ * - pending is a prev-linked "list of lists" of sorted
+ * sublists awaiting further merging.
+ * - Each of the sorted sublists is power-of-two in size.
+ * - Sublists are sorted by size and age, smallest & newest at front.
+ * - There are zero to two sublists of each size.
+ * - A pair of pending sublists are merged as soon as the number
+ * of following pending elements equals their size (i.e.
+ * each time count reaches an odd multiple of that size).
+ * That ensures each later final merge will be at worst 2:1.
+ * - Each round consists of:
+ * - Merging the two sublists selected by the highest bit
+ * which flips when count is incremented, and
+ * - Adding an element from the input as a size-1 sublist.
+ */
+ do {
+ size_t bits;
+ struct list_head **tail = &pending;
+
+ /* Find the least-significant clear bit in count */
+ for (bits = count; bits & 1; bits >>= 1)
+ tail = &(*tail)->prev;
+ /* Do the indicated merge */
+ if (likely(bits)) {
+ struct list_head *a = *tail, *b = a->prev;
+
+ a = merge(priv, cmp, b, a);
+ /* Install the merged result in place of the inputs */
+ a->prev = b->prev;
+ *tail = a;
+ }
+
+ /* Move one element from input list to pending */
+ list->prev = pending;
+ pending = list;
+ list = list->next;
+ pending->next = NULL;
+ count++;
+ } while (list);
+
+ /* End of input; merge together all the pending lists. */
+ list = pending;
+ pending = pending->prev;
+ for (;;) {
+ struct list_head *next = pending->prev;
+
+ if (!next)
+ break;
+ list = merge(priv, cmp, pending, list);
+ pending = next;
+ }
+ /* The final merge, rebuilding prev links */
+ merge_final(priv, cmp, head, pending, list);
+}
+EXPORT_SYMBOL(list_sort);
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
deleted file mode 100644
index 6c308ac4388c..000000000000
--- a/tools/lib/lockdep/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-liblockdep.so.*
diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build
deleted file mode 100644
index 6f667355b068..000000000000
--- a/tools/lib/lockdep/Build
+++ /dev/null
@@ -1 +0,0 @@
-liblockdep-y += common.o lockdep.o preload.o rbtree.o
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
deleted file mode 100644
index 9dafb8cb752f..000000000000
--- a/tools/lib/lockdep/Makefile
+++ /dev/null
@@ -1,162 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# file format version
-FILE_VERSION = 1
-
-LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
- $(if $(or $(findstring environment,$(origin $(1))),\
- $(findstring command line,$(origin $(1)))),,\
- $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-prefix ?= /usr/local
-libdir_relative = lib
-libdir = $(prefix)/$(libdir_relative)
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
-
-export DESTDIR DESTDIR_SQ INSTALL
-
-MAKEFLAGS += --no-print-directory
-
-include ../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-# Shell quotes
-libdir_SQ = $(subst ','\'',$(libdir))
-bindir_SQ = $(subst ','\'',$(bindir))
-
-LIB_IN := $(OUTPUT)liblockdep-in.o
-
-BIN_FILE = lockdep
-LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION)
-
-CONFIG_INCLUDES =
-CONFIG_LIBS =
-CONFIG_FLAGS =
-
-OBJ = $@
-N =
-
-export Q VERBOSE
-
-INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS if not set elsewhere
-CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
-CFLAGS += -fPIC
-CFLAGS += -Wall
-
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-
-ifeq ($(VERBOSE),1)
- Q =
- print_shared_lib_compile =
- print_install =
-else
- Q = @
- print_shared_lib_compile = echo ' LD '$(OBJ);
- print_static_lib_build = echo ' LD '$(OBJ);
- print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2';
-endif
-
-all:
-
-export srctree OUTPUT CC LD CFLAGS V
-include $(srctree)/tools/build/Makefile.include
-
-do_compile_shared_library = \
- ($(print_shared_lib_compile) \
- $(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
-
-do_build_static_lib = \
- ($(print_static_lib_build) \
- $(RM) $@; $(AR) rcs $@ $^)
-
-CMD_TARGETS = $(LIB_FILE)
-
-TARGETS = $(CMD_TARGETS)
-
-
-all: fixdep all_cmd
-
-all_cmd: $(CMD_TARGETS)
-
-$(LIB_IN): force
- $(Q)$(MAKE) $(build)=liblockdep
-
-$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
- $(Q)$(do_compile_shared_library)
-
-$(OUTPUT)liblockdep.a: $(LIB_IN)
- $(Q)$(do_build_static_lib)
-
-tags: force
- $(RM) tags
- find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
- --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS: force
- $(RM) TAGS
- find . -name '*.[ch]' | xargs etags \
- --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install
- $(print_install) \
- if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
- fi; \
- $(INSTALL) $1 '$(DESTDIR_SQ)$2'
-endef
-
-install_lib: all_cmd
- $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
- $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
-
-install: install_lib
-
-clean:
- $(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
- $(RM) tags TAGS
-
-PHONY += force
-force:
-
-# Declare the contents of the .PHONY variable as phony. We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
deleted file mode 100644
index 5c3b58cce8a9..000000000000
--- a/tools/lib/lockdep/common.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/compiler.h>
-#include <linux/lockdep.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-
-static __thread struct task_struct current_obj;
-
-/* lockdep wants these */
-bool debug_locks = true;
-bool debug_locks_silent;
-
-__attribute__((destructor)) static void liblockdep_exit(void)
-{
- debug_check_no_locks_held();
-}
-
-struct task_struct *__curr(void)
-{
- if (current_obj.pid == 0) {
- /* Makes lockdep output pretty */
- prctl(PR_GET_NAME, current_obj.comm);
- current_obj.pid = syscall(__NR_gettid);
- }
-
- return &current_obj;
-}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
deleted file mode 100644
index a6d7ee5f18ba..000000000000
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_COMMON_H
-#define _LIBLOCKDEP_COMMON_H
-
-#include <pthread.h>
-
-#define NR_LOCKDEP_CACHING_CLASSES 2
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#ifndef CALLER_ADDR0
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#endif
-
-#ifndef _RET_IP_
-#define _RET_IP_ CALLER_ADDR0
-#endif
-
-#ifndef _THIS_IP_
-#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
-#endif
-
-struct lockdep_subclass_key {
- char __one_byte;
-};
-
-struct lock_class_key {
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
-};
-
-struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
-#ifdef CONFIG_LOCK_STAT
- int cpu;
- unsigned long ip;
-#endif
-};
-
-void lockdep_init_map(struct lockdep_map *lock, const char *name,
- struct lock_class_key *key, int subclass);
-void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
- int trylock, int read, int check,
- struct lockdep_map *nest_lock, unsigned long ip);
-void lock_release(struct lockdep_map *lock, unsigned long ip);
-void lockdep_reset_lock(struct lockdep_map *lock);
-void lockdep_register_key(struct lock_class_key *key);
-void lockdep_unregister_key(struct lock_class_key *key);
-extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-
-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
- { .name = (_name), .key = (void *)(_key), }
-
-#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
deleted file mode 100644
index bd106b82759b..000000000000
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_MUTEX_H
-#define _LIBLOCKDEP_MUTEX_H
-
-#include <pthread.h>
-#include "common.h"
-
-struct liblockdep_pthread_mutex {
- pthread_mutex_t mutex;
- struct lock_class_key key;
- struct lockdep_map dep_map;
-};
-
-typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
-
-#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \
- (const struct liblockdep_pthread_mutex) { \
- .mutex = PTHREAD_MUTEX_INITIALIZER, \
- .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \
-}
-
-static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
- const char *name,
- struct lock_class_key *key,
- const pthread_mutexattr_t *__mutexattr)
-{
- lockdep_init_map(&lock->dep_map, name, key, 0);
- return pthread_mutex_init(&lock->mutex, __mutexattr);
-}
-
-#define liblockdep_pthread_mutex_init(mutex, mutexattr) \
-({ \
- lockdep_register_key(&(mutex)->key); \
- __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr)); \
-})
-
-static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_mutex_lock(&lock->mutex);
-}
-
-static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
-{
- lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
- return pthread_mutex_unlock(&lock->mutex);
-}
-
-static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
-{
- lockdep_reset_lock(&lock->dep_map);
- lockdep_unregister_key(&lock->key);
- return pthread_mutex_destroy(&lock->mutex);
-}
-
-#ifdef __USE_LIBLOCKDEP
-
-#define pthread_mutex_t liblockdep_pthread_mutex_t
-#define pthread_mutex_init liblockdep_pthread_mutex_init
-#define pthread_mutex_lock liblockdep_pthread_mutex_lock
-#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock
-#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock
-#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy
-
-#endif
-
-#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
deleted file mode 100644
index 6d5d2932bf4d..000000000000
--- a/tools/lib/lockdep/include/liblockdep/rwlock.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_RWLOCK_H
-#define _LIBLOCKDEP_RWLOCK_H
-
-#include <pthread.h>
-#include "common.h"
-
-struct liblockdep_pthread_rwlock {
- pthread_rwlock_t rwlock;
- struct lockdep_map dep_map;
-};
-
-typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
-
-#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \
- (struct liblockdep_pthread_rwlock) { \
- .rwlock = PTHREAD_RWLOCK_INITIALIZER, \
- .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \
-}
-
-static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
- const char *name,
- struct lock_class_key *key,
- const pthread_rwlockattr_t *attr)
-{
- lockdep_init_map(&lock->dep_map, name, key, 0);
-
- return pthread_rwlock_init(&lock->rwlock, attr);
-}
-
-#define liblockdep_pthread_rwlock_init(lock, attr) \
-({ \
- static struct lock_class_key __key; \
- \
- __rwlock_init((lock), #lock, &__key, (attr)); \
-})
-
-static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_rwlock_rdlock(&lock->rwlock);
-
-}
-
-static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
-{
- lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
- return pthread_rwlock_unlock(&lock->rwlock);
-}
-
-static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_rwlock_wrlock(&lock->rwlock);
-}
-
-static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock)
-{
- lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
- return pthread_rwlock_trywrlock(&lock->rwlock) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
-{
- return pthread_rwlock_destroy(&lock->rwlock);
-}
-
-#ifdef __USE_LIBLOCKDEP
-
-#define pthread_rwlock_t liblockdep_pthread_rwlock_t
-#define pthread_rwlock_init liblockdep_pthread_rwlock_init
-#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock
-#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock
-#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock
-#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock
-#define pthread_rwlock_trywrlock liblockdep_pthread_rwlock_trywrlock
-#define pthread_rwlock_destroy liblockdep_rwlock_destroy
-
-#endif
-
-#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
deleted file mode 100755
index 49af9fe19f5b..000000000000
--- a/tools/lib/lockdep/lockdep
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
deleted file mode 100644
index 348a9d0fb766..000000000000
--- a/tools/lib/lockdep/lockdep.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/lockdep.h>
-#include <stdlib.h>
-
-/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
-#define hlist_for_each_entry_rcu hlist_for_each_entry
-#define hlist_add_head_rcu hlist_add_head
-#define hlist_del_rcu hlist_del
-#define list_for_each_entry_rcu list_for_each_entry
-#define list_add_tail_rcu list_add_tail
-
-u32 prandom_u32(void)
-{
- /* Used only by lock_pin_lock() which is dead code */
- abort();
-}
-
-void print_irqtrace_events(struct task_struct *curr)
-{
- abort();
-}
-
-static struct new_utsname *init_utsname(void)
-{
- static struct new_utsname n = (struct new_utsname) {
- .release = "liblockdep",
- .version = LIBLOCKDEP_VERSION,
- };
-
- return &n;
-}
-
-#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
deleted file mode 100644
index 29d0c954cc24..000000000000
--- a/tools/lib/lockdep/lockdep_internals.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
deleted file mode 100644
index 248d235efda9..000000000000
--- a/tools/lib/lockdep/lockdep_states.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
deleted file mode 100644
index 8f1adbe887b2..000000000000
--- a/tools/lib/lockdep/preload.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <pthread.h>
-#include <stdio.h>
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <sysexits.h>
-#include <unistd.h>
-#include "include/liblockdep/mutex.h"
-#include "../../include/linux/rbtree.h"
-
-/**
- * struct lock_lookup - liblockdep's view of a single unique lock
- * @orig: pointer to the original pthread lock, used for lookups
- * @dep_map: lockdep's dep_map structure
- * @key: lockdep's key structure
- * @node: rb-tree node used to store the lock in a global tree
- * @name: a unique name for the lock
- */
-struct lock_lookup {
- void *orig; /* Original pthread lock, used for lookups */
- struct lockdep_map dep_map; /* Since all locks are dynamic, we need
- * a dep_map and a key for each lock */
- /*
- * Wait, there's no support for key classes? Yup :(
- * Most big projects wrap the pthread api with their own calls to
- * be compatible with different locking methods. This means that
- * "classes" will be brokes since the function that creates all
- * locks will point to a generic locking function instead of the
- * actual code that wants to do the locking.
- */
- struct lock_class_key key;
- struct rb_node node;
-#define LIBLOCKDEP_MAX_LOCK_NAME 22
- char name[LIBLOCKDEP_MAX_LOCK_NAME];
-};
-
-/* This is where we store our locks */
-static struct rb_root locks = RB_ROOT;
-static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
-
-/* pthread mutex API */
-
-#ifdef __GLIBC__
-extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
-extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
-#else
-#define __pthread_mutex_init NULL
-#define __pthread_mutex_lock NULL
-#define __pthread_mutex_trylock NULL
-#define __pthread_mutex_unlock NULL
-#define __pthread_mutex_destroy NULL
-#endif
-static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
- const pthread_mutexattr_t *attr) = __pthread_mutex_init;
-static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock;
-static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock;
-static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock;
-static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy;
-
-/* pthread rwlock API */
-
-#ifdef __GLIBC__
-extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
-extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
-#else
-#define __pthread_rwlock_init NULL
-#define __pthread_rwlock_destroy NULL
-#define __pthread_rwlock_wrlock NULL
-#define __pthread_rwlock_trywrlock NULL
-#define __pthread_rwlock_rdlock NULL
-#define __pthread_rwlock_tryrdlock NULL
-#define __pthread_rwlock_unlock NULL
-#endif
-
-static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
- const pthread_rwlockattr_t *attr) = __pthread_rwlock_init;
-static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy;
-static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock;
-static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock;
-static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock;
-static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock;
-static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock;
-
-enum { none, prepare, done, } __init_state;
-static void init_preload(void);
-static void try_init_preload(void)
-{
- if (__init_state != done)
- init_preload();
-}
-
-static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
-{
- struct rb_node **node = &locks.rb_node;
- struct lock_lookup *l;
-
- *parent = NULL;
-
- while (*node) {
- l = rb_entry(*node, struct lock_lookup, node);
-
- *parent = *node;
- if (lock < l->orig)
- node = &l->node.rb_left;
- else if (lock > l->orig)
- node = &l->node.rb_right;
- else
- return node;
- }
-
- return node;
-}
-
-#ifndef LIBLOCKDEP_STATIC_ENTRIES
-#define LIBLOCKDEP_STATIC_ENTRIES 1024
-#endif
-
-static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
-static int __locks_nr;
-
-static inline bool is_static_lock(struct lock_lookup *lock)
-{
- return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
-}
-
-static struct lock_lookup *alloc_lock(void)
-{
- if (__init_state != done) {
- /*
- * Some programs attempt to initialize and use locks in their
- * allocation path. This means that a call to malloc() would
- * result in locks being initialized and locked.
- *
- * Why is it an issue for us? dlsym() below will try allocating
- * to give us the original function. Since this allocation will
- * result in a locking operations, we have to let pthread deal
- * with it, but we can't! we don't have the pointer to the
- * original API since we're inside dlsym() trying to get it
- */
-
- int idx = __locks_nr++;
- if (idx >= ARRAY_SIZE(__locks)) {
- dprintf(STDERR_FILENO,
- "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
- exit(EX_UNAVAILABLE);
- }
- return __locks + idx;
- }
-
- return malloc(sizeof(struct lock_lookup));
-}
-
-static inline void free_lock(struct lock_lookup *lock)
-{
- if (likely(!is_static_lock(lock)))
- free(lock);
-}
-
-/**
- * __get_lock - find or create a lock instance
- * @lock: pointer to a pthread lock function
- *
- * Try to find an existing lock in the rbtree using the provided pointer. If
- * one wasn't found - create it.
- */
-static struct lock_lookup *__get_lock(void *lock)
-{
- struct rb_node **node, *parent;
- struct lock_lookup *l;
-
- ll_pthread_rwlock_rdlock(&locks_rwlock);
- node = __get_lock_node(lock, &parent);
- ll_pthread_rwlock_unlock(&locks_rwlock);
- if (*node) {
- return rb_entry(*node, struct lock_lookup, node);
- }
-
- /* We didn't find the lock, let's create it */
- l = alloc_lock();
- if (l == NULL)
- return NULL;
-
- l->orig = lock;
- /*
- * Currently the name of the lock is the ptr value of the pthread lock,
- * while not optimal, it makes debugging a bit easier.
- *
- * TODO: Get the real name of the lock using libdwarf
- */
- sprintf(l->name, "%p", lock);
- lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
-
- ll_pthread_rwlock_wrlock(&locks_rwlock);
- /* This might have changed since the last time we fetched it */
- node = __get_lock_node(lock, &parent);
- rb_link_node(&l->node, parent, node);
- rb_insert_color(&l->node, &locks);
- ll_pthread_rwlock_unlock(&locks_rwlock);
-
- return l;
-}
-
-static void __del_lock(struct lock_lookup *lock)
-{
- ll_pthread_rwlock_wrlock(&locks_rwlock);
- rb_erase(&lock->node, &locks);
- ll_pthread_rwlock_unlock(&locks_rwlock);
- free_lock(lock);
-}
-
-int pthread_mutex_init(pthread_mutex_t *mutex,
- const pthread_mutexattr_t *attr)
-{
- int r;
-
- /*
- * We keep trying to init our preload module because there might be
- * code in init sections that tries to touch locks before we are
- * initialized, in that case we'll need to manually call preload
- * to get us going.
- *
- * Funny enough, kernel's lockdep had the same issue, and used
- * (almost) the same solution. See look_up_lock_class() in
- * kernel/locking/lockdep.c for details.
- */
- try_init_preload();
-
- r = ll_pthread_mutex_init(mutex, attr);
- if (r == 0)
- /*
- * We do a dummy initialization here so that lockdep could
- * warn us if something fishy is going on - such as
- * initializing a held lock.
- */
- __get_lock(mutex);
-
- return r;
-}
-
-int pthread_mutex_lock(pthread_mutex_t *mutex)
-{
- int r;
-
- try_init_preload();
-
- lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
- (unsigned long)_RET_IP_);
- /*
- * Here's the thing with pthread mutexes: unlike the kernel variant,
- * they can fail.
- *
- * This means that the behaviour here is a bit different from what's
- * going on in the kernel: there we just tell lockdep that we took the
- * lock before actually taking it, but here we must deal with the case
- * that locking failed.
- *
- * To do that we'll "release" the lock if locking failed - this way
- * we'll get lockdep doing the correct checks when we try to take
- * the lock, and if that fails - we'll be back to the correct
- * state by releasing it.
- */
- r = ll_pthread_mutex_lock(mutex);
- if (r)
- lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_mutex_trylock(pthread_mutex_t *mutex)
-{
- int r;
-
- try_init_preload();
-
- lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
- r = ll_pthread_mutex_trylock(mutex);
- if (r)
- lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_mutex_unlock(pthread_mutex_t *mutex)
-{
- int r;
-
- try_init_preload();
-
- lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
- /*
- * Just like taking a lock, only in reverse!
- *
- * If we fail releasing the lock, tell lockdep we're holding it again.
- */
- r = ll_pthread_mutex_unlock(mutex);
- if (r)
- lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_mutex_destroy(pthread_mutex_t *mutex)
-{
- try_init_preload();
-
- /*
- * Let's see if we're releasing a lock that's held.
- *
- * TODO: Hook into free() and add that check there as well.
- */
- debug_check_no_locks_freed(mutex, sizeof(*mutex));
- __del_lock(__get_lock(mutex));
- return ll_pthread_mutex_destroy(mutex);
-}
-
-/* This is the rwlock part, very similar to what happened with mutex above */
-int pthread_rwlock_init(pthread_rwlock_t *rwlock,
- const pthread_rwlockattr_t *attr)
-{
- int r;
-
- try_init_preload();
-
- r = ll_pthread_rwlock_init(rwlock, attr);
- if (r == 0)
- __get_lock(rwlock);
-
- return r;
-}
-
-int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
-{
- try_init_preload();
-
- debug_check_no_locks_freed(rwlock, sizeof(*rwlock));
- __del_lock(__get_lock(rwlock));
- return ll_pthread_rwlock_destroy(rwlock);
-}
-
-int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
-{
- int r;
-
- init_preload();
-
- lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
- r = ll_pthread_rwlock_rdlock(rwlock);
- if (r)
- lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
-{
- int r;
-
- init_preload();
-
- lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
- r = ll_pthread_rwlock_tryrdlock(rwlock);
- if (r)
- lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
-{
- int r;
-
- init_preload();
-
- lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
- r = ll_pthread_rwlock_trywrlock(rwlock);
- if (r)
- lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
-{
- int r;
-
- init_preload();
-
- lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
- r = ll_pthread_rwlock_wrlock(rwlock);
- if (r)
- lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
-{
- int r;
-
- init_preload();
-
- lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
- r = ll_pthread_rwlock_unlock(rwlock);
- if (r)
- lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-
- return r;
-}
-
-__attribute__((constructor)) static void init_preload(void)
-{
- if (__init_state == done)
- return;
-
-#ifndef __GLIBC__
- __init_state = prepare;
-
- ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
- ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
- ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
- ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
- ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
-
- ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
- ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
- ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
- ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
- ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
- ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
- ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
-#endif
-
- __init_state = done;
-}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
deleted file mode 100644
index 297c304571f8..000000000000
--- a/tools/lib/lockdep/rbtree.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
deleted file mode 100755
index 11f425662b43..000000000000
--- a/tools/lib/lockdep/run_tests.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#! /bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-if ! make >/dev/null; then
- echo "Building liblockdep failed."
- echo "FAILED!"
- exit 1
-fi
-
-find tests -name '*.c' | sort | while read -r i; do
- testname=$(basename "$i" .c)
- echo -ne "$testname... "
- if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &&
- timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then
- echo "PASSED!"
- else
- echo "FAILED!"
- fi
- rm -f "tests/$testname"
-done
-
-find tests -name '*.c' | sort | while read -r i; do
- testname=$(basename "$i" .c)
- echo -ne "(PRELOAD) $testname... "
- if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
- timeout 1 ./lockdep "tests/$testname" 2>&1 |
- /bin/bash "tests/${testname}.sh"; then
- echo "PASSED!"
- else
- echo "FAILED!"
- fi
- rm -f "tests/$testname"
-done
-
-find tests -name '*.c' | sort | while read -r i; do
- testname=$(basename "$i" .c)
- echo -ne "(PRELOAD + Valgrind) $testname... "
- if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
- { timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } &&
- /bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" &&
- ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then
- echo "PASSED!"
- else
- echo "FAILED!"
- fi
- rm -f "tests/$testname"
-done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
deleted file mode 100644
index 63c7ce97bda3..000000000000
--- a/tools/lib/lockdep/tests/AA.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-int main(void)
-{
- pthread_mutex_t a;
-
- pthread_mutex_init(&a, NULL);
-
- pthread_mutex_lock(&a);
- pthread_mutex_lock(&a);
-
- return 0;
-}
diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh
deleted file mode 100644
index f39b32865074..000000000000
--- a/tools/lib/lockdep/tests/AA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
deleted file mode 100644
index efa39b23f05d..000000000000
--- a/tools/lib/lockdep/tests/ABA.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-void main(void)
-{
- pthread_mutex_t a, b;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
-
- pthread_mutex_lock(&a);
- pthread_mutex_lock(&b);
- pthread_mutex_lock(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh
deleted file mode 100644
index f39b32865074..000000000000
--- a/tools/lib/lockdep/tests/ABA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
deleted file mode 100644
index 543789bc3e37..000000000000
--- a/tools/lib/lockdep/tests/ABBA.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(b, a);
-
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(b, a);
-
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABBA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
deleted file mode 100644
index 39325ef8a2ac..000000000000
--- a/tools/lib/lockdep/tests/ABBA_2threads.c
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <pthread.h>
-
-pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
-pthread_barrier_t bar;
-
-void *ba_lock(void *arg)
-{
- int ret, i;
-
- pthread_mutex_lock(&b);
-
- if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
- pthread_barrier_destroy(&bar);
-
- pthread_mutex_lock(&a);
-
- pthread_mutex_unlock(&a);
- pthread_mutex_unlock(&b);
-}
-
-int main(void)
-{
- pthread_t t;
-
- pthread_barrier_init(&bar, NULL, 2);
-
- if (pthread_create(&t, NULL, ba_lock, NULL)) {
- fprintf(stderr, "pthread_create() failed\n");
- return 1;
- }
- pthread_mutex_lock(&a);
-
- if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
- pthread_barrier_destroy(&bar);
-
- pthread_mutex_lock(&b);
-
- pthread_mutex_unlock(&b);
- pthread_mutex_unlock(&a);
-
- pthread_join(t, NULL);
-
- return 0;
-}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh b/tools/lib/lockdep/tests/ABBA_2threads.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABBA_2threads.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
deleted file mode 100644
index 48446129d496..000000000000
--- a/tools/lib/lockdep/tests/ABBCCA.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b, c;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
- pthread_mutex_init(&c, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(b, c);
- LOCK_UNLOCK_2(c, a);
-
- pthread_mutex_destroy(&c);
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBCCA.sh b/tools/lib/lockdep/tests/ABBCCA.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABBCCA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
deleted file mode 100644
index 3570bf7b3804..000000000000
--- a/tools/lib/lockdep/tests/ABBCCDDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b, c, d;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
- pthread_mutex_init(&c, NULL);
- pthread_mutex_init(&d, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(b, c);
- LOCK_UNLOCK_2(c, d);
- LOCK_UNLOCK_2(d, a);
-
- pthread_mutex_destroy(&d);
- pthread_mutex_destroy(&c);
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh b/tools/lib/lockdep/tests/ABBCCDDA.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABBCCDDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
deleted file mode 100644
index a1c4659894cd..000000000000
--- a/tools/lib/lockdep/tests/ABCABC.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b, c;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
- pthread_mutex_init(&c, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(c, a);
- LOCK_UNLOCK_2(b, c);
-
- pthread_mutex_destroy(&c);
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCABC.sh b/tools/lib/lockdep/tests/ABCABC.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABCABC.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
deleted file mode 100644
index 335af1c90ab5..000000000000
--- a/tools/lib/lockdep/tests/ABCDBCDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b, c, d;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
- pthread_mutex_init(&c, NULL);
- pthread_mutex_init(&d, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(c, d);
- LOCK_UNLOCK_2(b, c);
- LOCK_UNLOCK_2(d, a);
-
- pthread_mutex_destroy(&d);
- pthread_mutex_destroy(&c);
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh b/tools/lib/lockdep/tests/ABCDBCDA.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABCDBCDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
deleted file mode 100644
index 3c5972863049..000000000000
--- a/tools/lib/lockdep/tests/ABCDBDDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
- pthread_mutex_t a, b, c, d;
-
- pthread_mutex_init(&a, NULL);
- pthread_mutex_init(&b, NULL);
- pthread_mutex_init(&c, NULL);
- pthread_mutex_init(&d, NULL);
-
- LOCK_UNLOCK_2(a, b);
- LOCK_UNLOCK_2(c, d);
- LOCK_UNLOCK_2(b, d);
- LOCK_UNLOCK_2(d, a);
-
- pthread_mutex_destroy(&d);
- pthread_mutex_destroy(&c);
- pthread_mutex_destroy(&b);
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.sh b/tools/lib/lockdep/tests/ABCDBDDA.sh
deleted file mode 100644
index fc31c607a5a8..000000000000
--- a/tools/lib/lockdep/tests/ABCDBDDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
deleted file mode 100644
index eee88df7fc41..000000000000
--- a/tools/lib/lockdep/tests/WW.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/rwlock.h>
-
-void main(void)
-{
- pthread_rwlock_t a, b;
-
- pthread_rwlock_init(&a, NULL);
- pthread_rwlock_init(&b, NULL);
-
- pthread_rwlock_wrlock(&a);
- pthread_rwlock_rdlock(&b);
- pthread_rwlock_wrlock(&a);
-}
diff --git a/tools/lib/lockdep/tests/WW.sh b/tools/lib/lockdep/tests/WW.sh
deleted file mode 100644
index f39b32865074..000000000000
--- a/tools/lib/lockdep/tests/WW.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
deleted file mode 100644
index 3026c29ccb5c..000000000000
--- a/tools/lib/lockdep/tests/common.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_TEST_COMMON_H
-#define _LIBLOCKDEP_TEST_COMMON_H
-
-#define LOCK_UNLOCK_2(a, b) \
- do { \
- pthread_mutex_lock(&(a)); \
- pthread_mutex_lock(&(b)); \
- pthread_mutex_unlock(&(b)); \
- pthread_mutex_unlock(&(a)); \
- } while(0)
-
-#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
deleted file mode 100644
index dba25064b50a..000000000000
--- a/tools/lib/lockdep/tests/unlock_balance.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-void main(void)
-{
- pthread_mutex_t a;
-
- pthread_mutex_init(&a, NULL);
-
- pthread_mutex_lock(&a);
- pthread_mutex_unlock(&a);
- pthread_mutex_unlock(&a);
-
- pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/unlock_balance.sh b/tools/lib/lockdep/tests/unlock_balance.sh
deleted file mode 100644
index c6e3952303fe..000000000000
--- a/tools/lib/lockdep/tests/unlock_balance.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: bad unlock balance detected'
diff --git a/tools/lib/perf/Build b/tools/lib/perf/Build
index 2ef9a4ec6d99..e8f5b7fb9973 100644
--- a/tools/lib/perf/Build
+++ b/tools/lib/perf/Build
@@ -11,3 +11,5 @@ libperf-y += lib.o
$(OUTPUT)zalloc.o: ../../lib/zalloc.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+
+tests-y += tests/
diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
index 8e1a926a9cfe..bc142f0664b5 100644
--- a/tools/lib/perf/Documentation/examples/sampling.c
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
libperf_init(libperf_print);
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus) {
fprintf(stderr, "failed to create cpus\n");
return -1;
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d6ca24f6ef78..2378980fab8a 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:
[source,c]
--
- 42 cpus = perf_cpu_map__new(NULL);
+ 42 cpus = perf_cpu_map__new_online_cpus();
43 if (!cpus) {
44 fprintf(stderr, "failed to create cpus\n");
45 return -1;
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 0c74c30ed23a..fcfb9499ef9c 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -37,7 +37,7 @@ SYNOPSIS
struct perf_cpu_map;
- struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+ struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
@@ -46,8 +46,9 @@ SYNOPSIS
void perf_cpu_map__put(struct perf_cpu_map *map);
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
- bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
+ bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
@@ -61,11 +62,12 @@ SYNOPSIS
struct perf_thread_map;
struct perf_thread_map *perf_thread_map__new_dummy(void);
+ struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
- void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
- char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+ void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+ char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
int perf_thread_map__nr(struct perf_thread_map *threads);
- pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+ pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
void perf_thread_map__put(struct perf_thread_map *map);
@@ -135,13 +137,16 @@ SYNOPSIS
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
- void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
- int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
+ int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
+ void perf_evsel__munmap(struct perf_evsel *evsel);
+ void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
- int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__disable(struct perf_evsel *evsel);
- int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 3718d65cffac..3a9b2140aa04 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -52,6 +52,8 @@ else
Q = @
endif
+TEST_ARGS := $(if $(V),-v)
+
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
@@ -136,12 +138,30 @@ all: fixdep
clean: $(LIBAPI)-clean
$(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
- *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC)
- $(Q)$(MAKE) -C tests clean
+ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \
+ $(TESTS_STATIC) $(TESTS_SHARED)
+
+TESTS_IN = tests-in.o
+
+TESTS_STATIC = $(OUTPUT)tests-static
+TESTS_SHARED = $(OUTPUT)tests-shared
+
+$(TESTS_IN): FORCE
+ $(Q)$(MAKE) $(build)=tests
+
+$(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI)
+ $(QUIET_LINK)$(CC) -o $@ $^
+
+$(TESTS_SHARED): $(TESTS_IN) $(LIBAPI)
+ $(QUIET_LINK)$(CC) -o $@ -L$(or $(OUTPUT),.) $^ -lperf
-tests: libs
- $(Q)$(MAKE) -C tests
- $(Q)$(MAKE) -C tests run
+make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC)
+
+tests: make-tests
+ @echo "running static:"
+ @./$(TESTS_STATIC) $(TEST_ARGS)
+ @echo "running dynamic:"
+ @LD_LIBRARY_PATH=. ./$(TESTS_SHARED) $(TEST_ARGS)
$(LIBPERF_PC):
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -156,10 +176,10 @@ define do_install_mkdir
endef
define do_install
- if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
- fi; \
- $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+ if [ ! -d '$2' ]; then \
+ $(INSTALL) -d -m 755 '$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$2'
endef
install_lib: libs
@@ -167,19 +187,28 @@ install_lib: libs
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
-install_headers:
- $(call QUIET_INSTALL, headers) \
- $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
+HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
+INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
+
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+INSTALL_INTERNAL_HDRS_PFX := $(DESTDIR)$(prefix)/include/internal
+INSTALL_INTERNAL_HDRS := $(addprefix $(INSTALL_INTERNAL_HDRS_PFX)/, $(INTERNAL_HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: include/perf/%.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+$(INSTALL_INTERNAL_HDRS): $(INSTALL_INTERNAL_HDRS_PFX)/%.h: include/internal/%.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_INTERNAL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS)
+ $(call QUIET_INSTALL, libperf_headers)
install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \
- $(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
+ $(call do_install,$(LIBPERF_PC),$(DESTDIR_SQ)$(libdir_SQ)/pkgconfig,644)
install_doc:
$(Q)$(MAKE) -C Documentation install-man install-html install-examples
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index ca0215047c32..4adcd7920d03 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -9,16 +9,31 @@
#include <unistd.h>
#include <ctype.h>
#include <limits.h>
+#include "internal.h"
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+ RC_CHK_ACCESS(map)->nr = nr_cpus;
+}
- if (cpus != NULL) {
- cpus->nr = 1;
- cpus->map[0] = -1;
+struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
+{
+ RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ struct perf_cpu_map *result;
+
+ if (ADD_RC_CHK(result, cpus)) {
+ cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
}
+ return result;
+}
+
+struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+ if (cpus)
+ RC_CHK_ACCESS(cpus)->map[0].cpu = -1;
return cpus;
}
@@ -26,73 +41,116 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void)
static void cpu_map__delete(struct perf_cpu_map *map)
{
if (map) {
- WARN_ONCE(refcount_read(&map->refcnt) != 0,
+ WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0,
"cpu_map refcnt unbalanced\n");
- free(map);
+ RC_CHK_FREE(map);
}
}
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
{
- if (map)
- refcount_inc(&map->refcnt);
- return map;
+ struct perf_cpu_map *result;
+
+ if (RC_CHK_GET(result, map))
+ refcount_inc(perf_cpu_map__refcnt(map));
+
+ return result;
}
void perf_cpu_map__put(struct perf_cpu_map *map)
{
- if (map && refcount_dec_and_test(&map->refcnt))
- cpu_map__delete(map);
+ if (map) {
+ if (refcount_dec_and_test(perf_cpu_map__refcnt(map)))
+ cpu_map__delete(map);
+ else
+ RC_CHK_PUT(map);
+ }
}
-static struct perf_cpu_map *cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysconf(void)
{
struct perf_cpu_map *cpus;
- int nr_cpus;
+ int nr_cpus, nr_cpus_conf;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (nr_cpus < 0)
return NULL;
- cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+ nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
+ if (nr_cpus != nr_cpus_conf) {
+ pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.",
+ nr_cpus, nr_cpus_conf, nr_cpus);
+ }
+
+ cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
- cpus->map[i] = i;
-
- cpus->nr = nr_cpus;
- refcount_set(&cpus->refcnt, 1);
+ RC_CHK_ACCESS(cpus)->map[i].cpu = i;
}
return cpus;
}
-static int cmp_int(const void *a, const void *b)
+static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
{
- return *(const int *)a - *(const int*)b;
+ struct perf_cpu_map *cpus = NULL;
+ FILE *onlnf;
+
+ onlnf = fopen("/sys/devices/system/cpu/online", "r");
+ if (onlnf) {
+ cpus = perf_cpu_map__read(onlnf);
+ fclose(onlnf);
+ }
+ return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__new_online_cpus(void)
+{
+ struct perf_cpu_map *cpus = cpu_map__new_sysfs_online();
+
+ if (cpus)
+ return cpus;
+
+ return cpu_map__new_sysconf();
}
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+
+static int cmp_cpu(const void *a, const void *b)
+{
+ const struct perf_cpu *cpu_a = a, *cpu_b = b;
+
+ return cpu_a->cpu - cpu_b->cpu;
+}
+
+static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
- size_t payload_size = nr_cpus * sizeof(int);
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+ return RC_CHK_ACCESS(cpus)->map[idx];
+}
+
+static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
+{
+ size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
int i, j;
if (cpus != NULL) {
- memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+ memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size);
+ qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i] != cpus->map[i - 1])
- cpus->map[j++] = cpus->map[i];
+ if (i == 0 ||
+ __perf_cpu_map__cpu(cpus, i).cpu !=
+ __perf_cpu_map__cpu(cpus, i - 1).cpu) {
+ RC_CHK_ACCESS(cpus)->map[j++].cpu =
+ __perf_cpu_map__cpu(cpus, i).cpu;
+ }
}
- cpus->nr = j;
+ perf_cpu_map__set_nr(cpus, j);
assert(j <= nr_cpus);
- refcount_set(&cpus->refcnt, 1);
}
-
return cpus;
}
@@ -100,7 +158,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
@@ -119,24 +177,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
- tmp_cpus[nr_cpus++] = prev;
+ tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = cpu;
+ tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
@@ -147,38 +205,22 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (nr_cpus > 0)
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else
- cpus = cpu_map__default_new();
out_free_tmp:
free(tmp_cpus);
return cpus;
}
-static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
-{
- struct perf_cpu_map *cpus = NULL;
- FILE *onlnf;
-
- onlnf = fopen("/sys/devices/system/cpu/online", "r");
- if (!onlnf)
- return cpu_map__default_new();
-
- cpus = perf_cpu_map__read(onlnf);
- fclose(onlnf);
- return cpus;
-}
-
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
{
struct perf_cpu_map *cpus = NULL;
unsigned long start_cpu, end_cpu = 0;
char *p = NULL;
int i, nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
if (!cpu_list)
- return cpu_map__read_all_cpu_map();
+ return perf_cpu_map__new_online_cpus();
/*
* must handle the case of empty cpumap to cover
@@ -215,17 +257,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i] == (int)start_cpu)
+ if (tmp_cpus[i].cpu == (int)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = (int)start_cpu;
+ tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
}
if (*p)
++p;
@@ -235,56 +277,131 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
if (nr_cpus > 0)
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else if (*cpu_list != '\0')
- cpus = cpu_map__default_new();
- else
- cpus = perf_cpu_map__dummy_new();
+ else if (*cpu_list != '\0') {
+ pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
+ cpu_list);
+ cpus = perf_cpu_map__new_online_cpus();
+ } else
+ cpus = perf_cpu_map__new_any_cpu();
invalid:
free(tmp_cpus);
out:
return cpus;
}
-int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
- if (cpus && idx < cpus->nr)
- return cpus->map[idx];
+ return RC_CHK_ACCESS(cpus)->nr;
+}
- return -1;
+struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
+ if (cpus && idx < __perf_cpu_map__nr(cpus))
+ return __perf_cpu_map__cpu(cpus, idx);
+
+ return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
- return cpus ? cpus->nr : 1;
+ return cpus ? __perf_cpu_map__nr(cpus) : 1;
}
-bool perf_cpu_map__empty(const struct perf_cpu_map *map)
+bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
{
- return map ? map->map[0] == -1 : true;
+ return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
- int i;
+ int low, high;
+
+ if (!cpus)
+ return -1;
- for (i = 0; i < cpus->nr; ++i) {
- if (cpus->map[i] == cpu)
- return i;
+ low = 0;
+ high = __perf_cpu_map__nr(cpus);
+ while (low < high) {
+ int idx = (low + high) / 2;
+ struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);
+
+ if (cpu_at_idx.cpu == cpu.cpu)
+ return idx;
+
+ if (cpu_at_idx.cpu > cpu.cpu)
+ high = idx;
+ else
+ low = idx + 1;
}
return -1;
}
-int perf_cpu_map__max(struct perf_cpu_map *map)
+bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
- int i, max = -1;
+ return perf_cpu_map__idx(cpus, cpu) != -1;
+}
+
+bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
+{
+ int nr;
+
+ if (lhs == rhs)
+ return true;
+
+ if (!lhs || !rhs)
+ return false;
+
+ nr = __perf_cpu_map__nr(lhs);
+ if (nr != __perf_cpu_map__nr(rhs))
+ return false;
- for (i = 0; i < map->nr; i++) {
- if (map->map[i] > max)
- max = map->map[i];
+ for (int idx = 0; idx < nr; idx++) {
+ if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
+ return false;
}
+ return true;
+}
+
+bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
+{
+ return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
- return max;
+struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
+{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
+ // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
+ return __perf_cpu_map__nr(map) > 0
+ ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
+ : result;
+}
+
+/** Is 'b' a subset of 'a'. */
+bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b)
+{
+ if (a == b || !b)
+ return true;
+ if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
+ return false;
+
+ for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
+ if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
+ return false;
+ if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
+ j++;
+ if (j == __perf_cpu_map__nr(b))
+ return true;
+ }
+ }
+ return false;
}
/*
@@ -298,44 +415,39 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- int *tmp_cpus;
+ struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
- if (!orig && !other)
- return NULL;
- if (!orig) {
- perf_cpu_map__get(other);
- return other;
- }
- if (!other)
- return orig;
- if (orig->nr == other->nr &&
- !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ if (perf_cpu_map__is_subset(orig, other))
return orig;
+ if (perf_cpu_map__is_subset(other, orig)) {
+ perf_cpu_map__put(orig);
+ return perf_cpu_map__get(other);
+ }
- tmp_len = orig->nr + other->nr;
- tmp_cpus = malloc(tmp_len * sizeof(int));
+ tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
- while (i < orig->nr && j < other->nr) {
- if (orig->map[i] <= other->map[j]) {
- if (orig->map[i] == other->map[j])
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
+ if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
j++;
- tmp_cpus[k++] = orig->map[i++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
} else
- tmp_cpus[k++] = other->map[j++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
}
- while (i < orig->nr)
- tmp_cpus[k++] = orig->map[i++];
+ while (i < __perf_cpu_map__nr(orig))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
- while (j < other->nr)
- tmp_cpus[k++] = other->map[j++];
+ while (j < __perf_cpu_map__nr(other))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
assert(k <= tmp_len);
merged = cpu_map__trim_new(k, tmp_cpus);
@@ -343,3 +455,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
perf_cpu_map__put(orig);
return merged;
}
+
+struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ struct perf_cpu *tmp_cpus;
+ int tmp_len;
+ int i, j, k;
+ struct perf_cpu_map *merged = NULL;
+
+ if (perf_cpu_map__is_subset(other, orig))
+ return perf_cpu_map__get(orig);
+ if (perf_cpu_map__is_subset(orig, other))
+ return perf_cpu_map__get(other);
+
+ tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
+ if (!tmp_cpus)
+ return NULL;
+
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else {
+ j++;
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ }
+ }
+ if (k)
+ merged = cpu_map__trim_new(k, tmp_cpus);
+ free(tmp_cpus);
+ return merged;
+}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index cfcdbd7be066..c6d67fc9e57e 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -23,38 +23,58 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
+#include "internal.h"
void perf_evlist__init(struct perf_evlist *evlist)
{
- int i;
-
- for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
- INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
fdarray__init(&evlist->pollfd, 64);
+ perf_evlist__reset_id_hash(evlist);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
struct perf_evsel *evsel)
{
- /*
- * We already have cpus for evsel (via PMU sysfs) so
- * keep it, if there's no target cpu list defined.
- */
- if (!evsel->own_cpus || evlist->has_user_cpus) {
+ if (evsel->system_wide) {
+ /* System wide: set the cpu map of the evsel to all online CPUs. */
+ perf_cpu_map__put(evsel->cpus);
+ evsel->cpus = perf_cpu_map__new_online_cpus();
+ } else if (evlist->has_user_cpus && evsel->is_pmu_core) {
+ /*
+ * User requested CPUs on a core PMU, ensure the requested CPUs
+ * are valid by intersecting with those of the PMU.
+ */
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->cpus);
- } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
+ evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
+ } else if (!evsel->own_cpus || evlist->has_user_cpus ||
+ (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
+ /*
+ * The PMU didn't specify a default cpu map, this isn't a core
+ * event and the user requested CPUs or the evlist user
+ * requested CPUs have the "any CPU" (aka dummy) CPU value. In
+ * which case use the user requested CPUs rather than the PMU
+ * ones.
+ */
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
+ /*
+ * No user requested cpu map but the PMU cpu map doesn't match
+ * the evsel's. Reset it back to the PMU cpu map.
+ */
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
- perf_thread_map__put(evsel->threads);
- evsel->threads = perf_thread_map__get(evlist->threads);
+ if (evsel->system_wide) {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__new_dummy();
+ } else {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__get(evlist->threads);
+ }
+
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
@@ -62,6 +82,8 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
+ evlist->needs_map_propagation = true;
+
perf_evlist__for_each_evsel(evlist, evsel)
__perf_evlist__propagate_maps(evlist, evsel);
}
@@ -69,9 +91,12 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
void perf_evlist__add(struct perf_evlist *evlist,
struct perf_evsel *evsel)
{
+ evsel->idx = evlist->nr_entries;
list_add_tail(&evsel->node, &evlist->entries);
evlist->nr_entries += 1;
- __perf_evlist__propagate_maps(evlist, evsel);
+
+ if (evlist->needs_map_propagation)
+ __perf_evlist__propagate_maps(evlist, evsel);
}
void perf_evlist__remove(struct perf_evlist *evlist,
@@ -125,10 +150,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
- perf_cpu_map__put(evlist->cpus);
+ perf_cpu_map__put(evlist->user_requested_cpus);
perf_cpu_map__put(evlist->all_cpus);
perf_thread_map__put(evlist->threads);
- evlist->cpus = NULL;
+ evlist->user_requested_cpus = NULL;
evlist->all_cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
@@ -157,9 +182,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
* original reference count of 1. If that is not the case it is up to
* the caller to increase the reference count.
*/
- if (cpus != evlist->cpus) {
- perf_cpu_map__put(evlist->cpus);
- evlist->cpus = perf_cpu_map__get(cpus);
+ if (cpus != evlist->user_requested_cpus) {
+ perf_cpu_map__put(evlist->user_requested_cpus);
+ evlist->user_requested_cpus = perf_cpu_map__get(cpus);
}
if (threads != evlist->threads) {
@@ -167,9 +192,6 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
evlist->threads = perf_thread_map__get(threads);
}
- if (!evlist->all_cpus && cpus)
- evlist->all_cpus = perf_cpu_map__get(cpus);
-
perf_evlist__propagate_maps(evlist);
}
@@ -226,10 +248,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist)
static void perf_evlist__id_hash(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
+ int cpu_map_idx, int thread, u64 id)
{
int hash;
- struct perf_sample_id *sid = SID(evsel, cpu, thread);
+ struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread);
sid->id = id;
sid->evsel = evsel;
@@ -237,23 +259,37 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
hlist_add_head(&sid->node, &evlist->heads[hash]);
}
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
+ int cpu_map_idx, int thread, u64 id)
{
- perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+ if (!SID(evsel, cpu_map_idx, thread))
+ return;
+
+ perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id);
evsel->id[evsel->ids++] = id;
}
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, int fd)
+ int cpu_map_idx, int thread, int fd)
{
u64 read_data[4] = { 0, };
int id_idx = 1; /* The first entry is the counter value */
u64 id;
int ret;
+ if (!SID(evsel, cpu_map_idx, thread))
+ return -1;
+
ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
if (!ret)
goto add;
@@ -282,13 +318,13 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
id = read_data[id_idx];
add:
- perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+ perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id);
return 0;
}
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
@@ -367,21 +403,13 @@ static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, boo
return map;
}
-static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
- struct perf_evsel *evsel, int idx, int cpu,
- int thread)
+static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread)
{
struct perf_sample_id *sid = SID(evsel, cpu, thread);
sid->idx = idx;
- if (evlist->cpus && cpu >= 0)
- sid->cpu = evlist->cpus->map[cpu];
- else
- sid->cpu = -1;
- if (!evsel->system_wide && evlist->threads && thread >= 0)
- sid->tid = perf_thread_map__pid(evlist->threads, thread);
- else
- sid->tid = -1;
+ sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu);
+ sid->tid = perf_thread_map__pid(evsel->threads, thread);
}
static struct perf_mmap*
@@ -409,7 +437,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
@@ -426,14 +454,15 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
+ int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
- int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
perf_evlist__for_each_entry(evlist, evsel) {
bool overwrite = evsel->attr.write_backward;
+ enum fdarray_flags flgs;
struct perf_mmap *map;
int *output, fd, cpu;
@@ -476,12 +505,21 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
*/
refcount_set(&map->refcnt, 2);
+ if (ops->idx)
+ ops->idx(evlist, evsel, mp, idx);
+
+ /* Debug message used by test scripts */
+ pr_debug("idx %d: mmapping fd %d\n", idx, *output);
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
+ *nr_mmaps += 1;
+
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
+ /* Debug message used by test scripts */
+ pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output);
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
@@ -490,8 +528,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
- if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
+ flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default;
+ if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) {
perf_mmap__put(map);
return -1;
}
@@ -500,8 +538,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
fd) < 0)
return -1;
- perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
- thread);
+ perf_evsel__set_sid_idx(evsel, idx, cpu, thread);
}
}
@@ -512,21 +549,37 @@ static int
mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
- int thread;
int nr_threads = perf_thread_map__nr(evlist->threads);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int cpu, thread, idx = 0;
+ int nr_mmaps = 0;
+
+ pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n",
+ __func__, nr_cpus, nr_threads);
- for (thread = 0; thread < nr_threads; thread++) {
+ /* per-thread mmaps */
+ for (thread = 0; thread < nr_threads; thread++, idx++) {
int output = -1;
int output_overwrite = -1;
- if (ops->idx)
- ops->idx(evlist, mp, thread, false);
+ if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output,
+ &output_overwrite, &nr_mmaps))
+ goto out_unmap;
+ }
- if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
- &output, &output_overwrite))
+ /* system-wide mmaps i.e. per-cpu */
+ for (cpu = 1; cpu < nr_cpus; cpu++, idx++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output,
+ &output_overwrite, &nr_mmaps))
goto out_unmap;
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -539,23 +592,26 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int nr_mmaps = 0;
int cpu, thread;
+ pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads);
+
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
int output_overwrite = -1;
- if (ops->idx)
- ops->idx(evlist, mp, cpu, true);
-
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
- thread, &output, &output_overwrite))
+ thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -567,9 +623,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
- nr_mmaps = perf_cpu_map__nr(evlist->cpus);
- if (perf_cpu_map__empty(evlist->cpus))
- nr_mmaps = perf_thread_map__nr(evlist->threads);
+ /* One for each CPU */
+ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
+ if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) {
+ /* Plus one for each thread */
+ nr_mmaps += perf_thread_map__nr(evlist->threads);
+ /* Minus the per-thread CPU (-1) */
+ nr_mmaps -= 1;
+ }
return nr_mmaps;
}
@@ -578,9 +639,8 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
+ const struct perf_cpu_map *cpus = evlist->all_cpus;
struct perf_evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->cpus;
- const struct perf_thread_map *threads = evlist->threads;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -592,14 +652,14 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+ perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
return -ENOMEM;
}
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- if (perf_cpu_map__empty(cpus))
+ if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
return mmap_per_thread(evlist, ops, mp);
return mmap_per_cpu(evlist, ops, mp);
@@ -645,3 +705,51 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
+
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
+{
+ struct perf_evsel *evsel;
+ int n = 0;
+
+ __perf_evlist__for_each_entry(list, evsel) {
+ evsel->leader = leader;
+ n++;
+ }
+ leader->nr_members = n;
+}
+
+void perf_evlist__set_leader(struct perf_evlist *evlist)
+{
+ if (evlist->nr_entries) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ __perf_evlist__set_leader(&evlist->entries, first);
+ }
+}
+
+int perf_evlist__nr_groups(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int nr_groups = 0;
+
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ /*
+ * evsels by default have a nr_members of 1, and they are their
+ * own leader. If the nr_members is >1 then this is an
+ * indication of a group.
+ */
+ if (evsel->leader == evsel && evsel->nr_members > 1)
+ nr_groups++;
+ }
+ return nr_groups;
+}
+
+void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel)
+{
+ if (!evsel->system_wide) {
+ evsel->system_wide = true;
+ if (evlist->needs_map_propagation)
+ __perf_evlist__propagate_maps(evlist, evsel);
+ }
+}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 4dc06289f4c7..c07160953224 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -11,15 +11,21 @@
#include <stdlib.h>
#include <internal/xyarray.h>
#include <internal/cpumap.h>
+#include <internal/mmap.h>
#include <internal/threadmap.h>
#include <internal/lib.h>
#include <linux/string.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <asm/bug.h>
-void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
+void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
+ int idx)
{
INIT_LIST_HEAD(&evsel->node);
evsel->attr = *attr;
+ evsel->idx = idx;
+ evsel->leader = evsel;
}
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
@@ -27,7 +33,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
if (evsel != NULL)
- perf_evsel__init(evsel, attr);
+ perf_evsel__init(evsel, attr, 0);
return evsel;
}
@@ -37,17 +43,25 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
+#define FD(_evsel, _cpu_map_idx, _thread) \
+ ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
+#define MMAP(_evsel, _cpu_map_idx, _thread) \
+ (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
+ : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
+ int idx, thread;
+
+ for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- FD(evsel, cpu, thread) = -1;
+ int *fd = FD(evsel, idx, thread);
+
+ if (fd)
+ *fd = -1;
}
}
}
@@ -55,24 +69,58 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap));
+
+ return evsel->mmap != NULL ? 0 : -ENOMEM;
+}
+
static int
sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
+ pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
+}
+
+static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
+{
+ struct perf_evsel *leader = evsel->leader;
+ int *fd;
+
+ if (evsel == leader) {
+ *group_fd = -1;
+ return 0;
+ }
+
+ /*
+ * Leader must be already processed/open,
+ * if not it's a bug.
+ */
+ if (!leader->fd)
+ return -ENOTCONN;
+
+ fd = FD(leader, cpu_map_idx, thread);
+ if (fd == NULL || *fd == -1)
+ return -EBADF;
+
+ *group_fd = *fd;
+
+ return 0;
}
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int cpu, thread, err = 0;
+ struct perf_cpu cpu;
+ int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
if (empty_cpu_map == NULL) {
- empty_cpu_map = perf_cpu_map__dummy_new();
+ empty_cpu_map = perf_cpu_map__new_any_cpu();
if (empty_cpu_map == NULL)
return -ENOMEM;
}
@@ -93,44 +141,60 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
}
if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
- int fd;
+ int fd, group_fd, *evsel_fd;
+
+ evsel_fd = FD(evsel, idx, thread);
+ if (evsel_fd == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = get_group_fd(evsel, idx, thread, &group_fd);
+ if (err < 0)
+ goto out;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
- cpus->map[cpu], -1, 0);
+ cpu, group_fd, 0);
- if (fd < 0)
- return -errno;
+ if (fd < 0) {
+ err = -errno;
+ goto out;
+ }
- FD(evsel, cpu, thread) = fd;
+ *evsel_fd = fd;
}
}
+out:
+ if (err)
+ perf_evsel__close(evsel);
return err;
}
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
+ int *fd = FD(evsel, cpu_map_idx, thread);
+
+ if (fd && *fd >= 0) {
+ close(*fd);
+ *fd = -1;
+ }
}
}
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
+ for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
+ perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -148,12 +212,81 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
- perf_evsel__close_fd_cpu(evsel, cpu);
+ perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
+}
+
+void perf_evsel__munmap(struct perf_evsel *evsel)
+{
+ int idx, thread;
+
+ if (evsel->fd == NULL || evsel->mmap == NULL)
+ return;
+
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int *fd = FD(evsel, idx, thread);
+
+ if (fd == NULL || *fd < 0)
+ continue;
+
+ perf_mmap__munmap(MMAP(evsel, idx, thread));
+ }
+ }
+
+ xyarray__delete(evsel->mmap);
+ evsel->mmap = NULL;
+}
+
+int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
+{
+ int ret, idx, thread;
+ struct perf_mmap_param mp = {
+ .prot = PROT_READ | PROT_WRITE,
+ .mask = (pages * page_size) - 1,
+ };
+
+ if (evsel->fd == NULL || evsel->mmap)
+ return -EINVAL;
+
+ if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
+ return -ENOMEM;
+
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int *fd = FD(evsel, idx, thread);
+ struct perf_mmap *map;
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
+
+ if (fd == NULL || *fd < 0)
+ continue;
+
+ map = MMAP(evsel, idx, thread);
+ perf_mmap__init(map, NULL, false, NULL);
+
+ ret = perf_mmap__mmap(map, &mp, *fd, cpu);
+ if (ret) {
+ perf_evsel__munmap(evsel);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
+{
+ int *fd = FD(evsel, cpu_map_idx, thread);
+
+ if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
+ return NULL;
+
+ return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@@ -172,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
if (read_format & PERF_FORMAT_GROUP) {
nr = evsel->nr_members;
size += sizeof(u64);
@@ -181,31 +317,120 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+/* This only reads values for the leader */
+static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
+ int thread, struct perf_counts_values *count)
+{
+ size_t size = perf_evsel__read_size(evsel);
+ int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
+ u64 *data;
+ int idx = 1;
+
+ if (fd == NULL || *fd < 0)
+ return -EINVAL;
+
+ data = calloc(1, size);
+ if (data == NULL)
+ return -ENOMEM;
+
+ if (readn(*fd, data, size) <= 0) {
+ free(data);
+ return -errno;
+ }
+
+ /*
+ * This reads only the leader event intentionally since we don't have
+ * perf counts values for sibling events.
+ */
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ count->ena = data[idx++];
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ count->run = data[idx++];
+
+ /* value is always available */
+ count->val = data[idx++];
+ if (read_format & PERF_FORMAT_ID)
+ count->id = data[idx++];
+ if (read_format & PERF_FORMAT_LOST)
+ count->lost = data[idx++];
+
+ free(data);
+ return 0;
+}
+
+/*
+ * The perf read format is very flexible. It needs to set the proper
+ * values according to the read format.
+ */
+static void perf_evsel__adjust_values(struct perf_evsel *evsel, u64 *buf,
+ struct perf_counts_values *count)
+{
+ u64 read_format = evsel->attr.read_format;
+ int n = 0;
+
+ count->val = buf[n++];
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ count->ena = buf[n++];
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ count->run = buf[n++];
+
+ if (read_format & PERF_FORMAT_ID)
+ count->id = buf[n++];
+
+ if (read_format & PERF_FORMAT_LOST)
+ count->lost = buf[n++];
+}
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
+ int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
+ struct perf_counts_values buf;
memset(count, 0, sizeof(*count));
- if (FD(evsel, cpu, thread) < 0)
+ if (fd == NULL || *fd < 0)
return -EINVAL;
- if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
+ if (read_format & PERF_FORMAT_GROUP)
+ return perf_evsel__read_group(evsel, cpu_map_idx, thread, count);
+
+ if (MMAP(evsel, cpu_map_idx, thread) &&
+ !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
+ return 0;
+
+ if (readn(*fd, buf.values, size) <= 0)
return -errno;
+ perf_evsel__adjust_values(evsel, buf.values, count);
return 0;
}
+static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg,
+ int cpu_map_idx, int thread)
+{
+ int *fd = FD(evsel, cpu_map_idx, thread);
+
+ if (fd == NULL || *fd < 0)
+ return -1;
+
+ return ioctl(*fd, ioc, arg);
+}
+
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
- int cpu)
+ int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int fd = FD(evsel, cpu, thread),
- err = ioctl(fd, ioc, arg);
+ int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread);
if (err)
return err;
@@ -214,9 +439,24 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
+}
+
+int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+ struct perf_cpu cpu __maybe_unused;
+ int idx;
+ int err;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) {
+ err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
int perf_evsel__enable(struct perf_evsel *evsel)
@@ -229,9 +469,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@@ -248,7 +488,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
int err = 0, i;
- for (i = 0; i < evsel->cpus->nr && !err; i++)
+ for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter, i);
@@ -275,9 +515,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
if (ncpus == 0 || nthreads == 0)
return 0;
- if (evsel->system_wide)
- nthreads = 1;
-
evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
if (evsel->sample_id == NULL)
return -ENOMEM;
@@ -299,3 +536,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 840d4032587b..49649eb51ce4 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -3,17 +3,36 @@
#define __LIBPERF_INTERNAL_CPUMAP_H
#include <linux/refcount.h>
+#include <perf/cpumap.h>
+#include <internal/rc_check.h>
-struct perf_cpu_map {
+/**
+ * A sized, reference counted, sorted array of integers representing CPU
+ * numbers. This is commonly used to capture which CPUs a PMU is associated
+ * with. The indices into the cpumap are frequently used as they avoid having
+ * gaps if CPU numbers were used. For events associated with a pid, rather than
+ * a CPU, a single dummy map with an entry of -1 is used.
+ */
+DECLARE_RC_STRUCT(perf_cpu_map) {
refcount_t refcnt;
+ /** Length of the map array. */
int nr;
- int map[];
+ /** The CPU values. */
+ struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
+struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
+bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus);
+
+static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map)
+{
+ return &RC_CHK_ACCESS(map)->refcnt;
+}
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 2d0fa02b036f..f43bdb9b6227 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <api/fd/array.h>
+#include <internal/cpumap.h>
#include <internal/evsel.h>
#define PERF_EVLIST__HLIST_BITS 8
@@ -17,7 +18,13 @@ struct perf_evlist {
struct list_head entries;
int nr_entries;
bool has_user_cpus;
- struct perf_cpu_map *cpus;
+ bool needs_map_propagation;
+ /**
+ * The cpus passed from the command line or all online CPUs by
+ * default.
+ */
+ struct perf_cpu_map *user_requested_cpus;
+ /** The union of all evsel cpu maps. */
struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
@@ -31,11 +38,12 @@ struct perf_evlist {
};
typedef void
-(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
+(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*,
+ struct perf_mmap_param*, int);
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
-(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
@@ -118,10 +126,15 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist);
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id);
+ int cpu_map_idx, int thread, u64 id);
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, int fd);
+ int cpu_map_idx, int thread, int fd);
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
+
+void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index 1ffd083b235e..5cd220a61962 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -6,8 +6,8 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include <sys/types.h>
+#include <internal/cpumap.h>
-struct perf_cpu_map;
struct perf_thread_map;
struct xyarray;
@@ -27,9 +27,13 @@ struct perf_sample_id {
* queue number.
*/
int idx;
- int cpu;
+ struct perf_cpu cpu;
pid_t tid;
+ /* Guest machine pid and VCPU, valid only if machine_pid is non-zero */
+ pid_t machine_pid;
+ struct perf_cpu vcpu;
+
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
u64 period;
};
@@ -37,20 +41,44 @@ struct perf_sample_id {
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
+ /** The commonly used cpu map of CPUs the event should be opened upon, etc. */
struct perf_cpu_map *cpus;
+ /**
+ * The cpu map read from the PMU. For core PMUs this is the list of all
+ * CPUs the event can be opened upon. For other PMUs this is the default
+ * cpu map for opening the event on, for example, the first CPU on a
+ * socket for an uncore event.
+ */
struct perf_cpu_map *own_cpus;
struct perf_thread_map *threads;
struct xyarray *fd;
+ struct xyarray *mmap;
struct xyarray *sample_id;
u64 *id;
u32 ids;
+ struct perf_evsel *leader;
/* parse modifier helper */
int nr_members;
+ /*
+ * system_wide is for events that need to be on every CPU, irrespective
+ * of user requested CPUs or threads. Tha main example of this is the
+ * dummy event. Map propagation will set cpus for this event to all CPUs
+ * as software PMU events like dummy, have a CPU map that is empty.
+ */
bool system_wide;
+ /*
+ * Some events, for example uncore events, require a CPU.
+ * i.e. it cannot be the 'any CPU' value of -1.
+ */
+ bool requires_cpu;
+ /** Is the PMU for the event a core one? Effects the handling of own_cpus. */
+ bool is_pmu_core;
+ int idx;
};
-void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr);
+void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
+ int idx);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
void perf_evsel__close_fd(struct perf_evsel *evsel);
void perf_evsel__free_fd(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h
index 5175d491b2d4..85471a4b900f 100644
--- a/tools/lib/perf/include/internal/lib.h
+++ b/tools/lib/perf/include/internal/lib.h
@@ -9,4 +9,6 @@ extern unsigned int page_size;
ssize_t readn(int fd, void *buf, size_t n);
ssize_t writen(int fd, const void *buf, size_t n);
+ssize_t preadn(int fd, void *buf, size_t n, off_t offs);
+
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index be7556e0a2b2..5f08cab61ece 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -6,11 +6,13 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <stdbool.h>
+#include <internal/cpumap.h>
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
struct perf_mmap;
+struct perf_counts_values;
typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
@@ -23,7 +25,7 @@ struct perf_mmap {
void *base;
int mask;
int fd;
- int cpu;
+ struct perf_cpu cpu;
refcount_t refcnt;
u64 prev;
u64 start;
@@ -31,7 +33,8 @@ struct perf_mmap {
bool overwrite;
u64 flush;
libperf_unmap_cb_t unmap_cb;
- char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+ void *event_copy;
+ size_t event_copy_sz;
struct perf_mmap *next;
};
@@ -45,11 +48,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu);
+ int fd, struct perf_cpu cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
u64 perf_mmap__read_head(struct perf_mmap *map);
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count);
+
#endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/lib/perf/include/internal/rc_check.h b/tools/lib/perf/include/internal/rc_check.h
new file mode 100644
index 000000000000..f80ddfc80129
--- /dev/null
+++ b/tools/lib/perf/include/internal/rc_check.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBPERF_INTERNAL_RC_CHECK_H
+#define __LIBPERF_INTERNAL_RC_CHECK_H
+
+#include <stdlib.h>
+#include <linux/zalloc.h>
+
+/*
+ * Enable reference count checking implicitly with leak checking, which is
+ * integrated into address sanitizer.
+ */
+#if defined(__SANITIZE_ADDRESS__) || defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER)
+#define REFCNT_CHECKING 1
+#elif defined(__has_feature)
+#if __has_feature(address_sanitizer) || __has_feature(leak_sanitizer)
+#define REFCNT_CHECKING 1
+#endif
+#endif
+
+/*
+ * Shared reference count checking macros.
+ *
+ * Reference count checking is an approach to sanitizing the use of reference
+ * counted structs. It leverages address and leak sanitizers to make sure gets
+ * are paired with a put. Reference count checking adds a malloc-ed layer of
+ * indirection on a get, and frees it on a put. A missed put will be reported as
+ * a memory leak. A double put will be reported as a double free. Accessing
+ * after a put will cause a use-after-free and/or a segfault.
+ */
+
+#ifndef REFCNT_CHECKING
+/* Replaces "struct foo" so that the pointer may be interposed. */
+#define DECLARE_RC_STRUCT(struct_name) \
+ struct struct_name
+
+/* Declare a reference counted struct variable. */
+#define RC_STRUCT(struct_name) struct struct_name
+
+/*
+ * Interpose the indirection. Result will hold the indirection and object is the
+ * reference counted struct.
+ */
+#define ADD_RC_CHK(result, object) (result = object, object)
+
+/* Strip the indirection layer. */
+#define RC_CHK_ACCESS(object) object
+
+/* Frees the object and the indirection layer. */
+#define RC_CHK_FREE(object) free(object)
+
+/* A get operation adding the indirection layer. */
+#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object)
+
+/* A put operation removing the indirection layer. */
+#define RC_CHK_PUT(object) {}
+
+/* Pointer equality when the indirection may or may not be there. */
+#define RC_CHK_EQUAL(object1, object2) (object1 == object2)
+
+#else
+
+/* Replaces "struct foo" so that the pointer may be interposed. */
+#define DECLARE_RC_STRUCT(struct_name) \
+ struct original_##struct_name; \
+ struct struct_name { \
+ struct original_##struct_name *orig; \
+ }; \
+ struct original_##struct_name
+
+/* Declare a reference counted struct variable. */
+#define RC_STRUCT(struct_name) struct original_##struct_name
+
+/*
+ * Interpose the indirection. Result will hold the indirection and object is the
+ * reference counted struct.
+ */
+#define ADD_RC_CHK(result, object) \
+ ( \
+ object ? (result = malloc(sizeof(*result)), \
+ result ? (result->orig = object, result) \
+ : (result = NULL, NULL)) \
+ : (result = NULL, NULL) \
+ )
+
+/* Strip the indirection layer. */
+#define RC_CHK_ACCESS(object) object->orig
+
+/* Frees the object and the indirection layer. */
+#define RC_CHK_FREE(object) \
+ do { \
+ zfree(&object->orig); \
+ free(object); \
+ } while(0)
+
+/* A get operation adding the indirection layer. */
+#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL))
+
+/* A put operation removing the indirection layer. */
+#define RC_CHK_PUT(object) \
+ do { \
+ if (object) { \
+ object->orig = NULL; \
+ free(object); \
+ } \
+ } while(0)
+
+/* Pointer equality when the indirection may or may not be there. */
+#define RC_CHK_EQUAL(object1, object2) (object1 == object2 || \
+ (object1 && object2 && object1->orig == object2->orig))
+
+#endif
+
+#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */
diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h
index 2093e8868a67..b130a6663ff8 100644
--- a/tools/lib/perf/include/internal/tests.h
+++ b/tools/lib/perf/include/internal/tests.h
@@ -3,11 +3,34 @@
#define __LIBPERF_INTERNAL_TESTS_H
#include <stdio.h>
+#include <unistd.h>
-int tests_failed;
+extern int tests_failed;
+extern int tests_verbose;
+
+static inline int get_verbose(char **argv, int argc)
+{
+ int c;
+ int verbose = 0;
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c)
+ {
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 1;
+
+ return verbose;
+}
#define __T_START \
do { \
+ tests_verbose = get_verbose(argv, argc); \
fprintf(stdout, "- running %s...", __FILE__); \
fflush(NULL); \
tests_failed = 0; \
@@ -30,4 +53,15 @@ do {
} \
} while (0)
+#define __T_VERBOSE(...) \
+do { \
+ if (tests_verbose) { \
+ if (tests_verbose == 1) { \
+ fputc('\n', stderr); \
+ tests_verbose++; \
+ } \
+ fprintf(stderr, ##__VA_ARGS__); \
+ } \
+} while (0)
+
#endif /* __LIBPERF_INTERNAL_TESTS_H */
diff --git a/tools/lib/perf/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h
index 51e35d6c8ec4..f10af3da7b21 100644
--- a/tools/lib/perf/include/internal/xyarray.h
+++ b/tools/lib/perf/include/internal/xyarray.h
@@ -18,11 +18,18 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
void xyarray__delete(struct xyarray *xy);
void xyarray__reset(struct xyarray *xy);
-static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+static inline void *__xyarray__entry(struct xyarray *xy, int x, int y)
{
return &xy->contents[x * xy->row_size + y * xy->entry_size];
}
+static inline void *xyarray__entry(struct xyarray *xy, size_t x, size_t y)
+{
+ if (x >= xy->max_x || y >= xy->max_y)
+ return NULL;
+ return __xyarray__entry(xy, x, y);
+}
+
static inline int xyarray__max_y(struct xyarray *xy)
{
return xy->max_y;
diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h
new file mode 100644
index 000000000000..e7cf6ba7b674
--- /dev/null
+++ b/tools/lib/perf/include/perf/bpf_perf.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBPERF_BPF_PERF_H
+#define __LIBPERF_BPF_PERF_H
+
+#include <linux/types.h> /* for __u32 */
+
+/*
+ * bpf_perf uses a hashmap, the attr_map, to track all the leader programs.
+ * The hashmap is pinned in bpffs. flock() on this file is used to ensure
+ * no concurrent access to the attr_map. The key of attr_map is struct
+ * perf_event_attr, and the value is struct perf_event_attr_map_entry.
+ *
+ * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the
+ * leader prog, and the diff_map. Each perf-stat session holds a reference
+ * to the bpf_link to make sure the leader prog is attached to sched_switch
+ * tracepoint.
+ *
+ * Since the hashmap only contains IDs of the bpf_link and diff_map, it
+ * does not hold any references to the leader program. Once all perf-stat
+ * sessions of these events exit, the leader prog, its maps, and the
+ * perf_events will be freed.
+ */
+struct perf_event_attr_map_entry {
+ __u32 link_id;
+ __u32 diff_map_id;
+};
+
+/* default attr_map name */
+#define BPF_PERF_DEFAULT_ATTR_MAP_PATH "perf_attr_map"
+
+#endif /* __LIBPERF_BPF_PERF_H */
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 6a17ad730cbc..228c6c629b0c 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -6,23 +6,82 @@
#include <stdio.h>
#include <stdbool.h>
+/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
+struct perf_cpu {
+ int cpu;
+};
+
+struct perf_cache {
+ int cache_lvl;
+ int cache;
+};
+
struct perf_cpu_map;
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+/**
+ * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
+/**
+ * perf_cpu_map__new_online_cpus - a map read from
+ * /sys/devices/system/cpu/online if
+ * available. If reading wasn't possible a map
+ * is created using the online processors
+ * assuming the first 'n' processors are all
+ * online.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
+/**
+ * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no
+ * cpu_list argument is provided then
+ * perf_cpu_map__new_online_cpus is returned.
+ */
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
+ * is invalid.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
+ * cpu of -1 for an invalid index, this makes an empty map
+ * look like it contains the "any CPU"/dummy value. Otherwise
+ * the result is the number CPUs in the map plus one if the
+ * "any CPU"/dummy value is present.
+ */
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
-LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
+LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
+ const struct perf_cpu_map *rhs);
+/**
+ * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value?
+ */
+LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
(idx) < perf_cpu_map__nr(cpus); \
(idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
+#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \
+ for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \
+ (idx) < perf_cpu_map__nr(cpus); \
+ (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \
+ if ((_cpu).cpu != -1)
+
+#define perf_cpu_map__for_each_idx(idx, cpus) \
+ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
+
#endif /* __LIBPERF_CPUMAP_H */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 988c539bedb6..ae64090184d3 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -8,6 +8,8 @@
#include <linux/bpf.h>
#include <sys/types.h> /* pid_t */
+#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem))
+
struct perf_record_mmap {
struct perf_event_header header;
__u32 pid, tid;
@@ -23,10 +25,20 @@ struct perf_record_mmap2 {
__u64 start;
__u64 len;
__u64 pgoff;
- __u32 maj;
- __u32 min;
- __u64 ino;
- __u64 ino_generation;
+ union {
+ struct {
+ __u32 maj;
+ __u32 min;
+ __u64 ino;
+ __u64 ino_generation;
+ };
+ struct {
+ __u8 build_id_size;
+ __u8 __reserved_1;
+ __u16 __reserved_2;
+ __u8 build_id[20];
+ };
+ };
__u32 prot;
__u32 flags;
char filename[PATH_MAX];
@@ -58,13 +70,15 @@ struct perf_record_lost {
__u64 lost;
};
+#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15)
+
struct perf_record_lost_samples {
struct perf_event_header header;
__u64 lost;
};
/*
- * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
*/
struct perf_record_read {
struct perf_event_header header;
@@ -73,6 +87,7 @@ struct perf_record_read {
__u64 time_enabled;
__u64 time_running;
__u64 id;
+ __u64 lost;
};
struct perf_record_throttle {
@@ -83,7 +98,7 @@ struct perf_record_throttle {
};
#ifndef KSYM_NAME_LEN
-#define KSYM_NAME_LEN 256
+#define KSYM_NAME_LEN 512
#endif
struct perf_record_ksymbol {
@@ -133,29 +148,91 @@ struct perf_record_switch {
struct perf_record_header_attr {
struct perf_event_header header;
struct perf_event_attr attr;
- __u64 id[];
+ /*
+ * Array of u64 id follows here but we cannot use a flexible array
+ * because size of attr in the data can be different then current
+ * version. Please use perf_record_header_attr_id() below.
+ *
+ * __u64 id[]; // do not use this
+ */
};
+/* Returns the pointer to id array based on the actual attr size. */
+#define perf_record_header_attr_id(evt) \
+ ((void *)&(evt)->attr.attr + (evt)->attr.attr.size)
+
enum {
PERF_CPU_MAP__CPUS = 0,
PERF_CPU_MAP__MASK = 1,
+ PERF_CPU_MAP__RANGE_CPUS = 2,
};
+/*
+ * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[]
+ * and each entry is a value for a CPU in the map.
+ */
struct cpu_map_entries {
__u16 nr;
__u16 cpu[];
};
-struct perf_record_record_cpu_map {
+/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */
+struct perf_record_mask_cpu_map32 {
+ /* Number of mask values. */
__u16 nr;
+ /* Constant 4. */
__u16 long_size;
- unsigned long mask[];
+ /* Bitmap data. */
+ __u32 mask[];
+};
+
+/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */
+struct perf_record_mask_cpu_map64 {
+ /* Number of mask values. */
+ __u16 nr;
+ /* Constant 8. */
+ __u16 long_size;
+ /* Legacy padding. */
+ char __pad[4];
+ /* Bitmap data. */
+ __u64 mask[];
+};
+
+/*
+ * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier
+ * version had unaligned data and we wish to retain file format compatibility.
+ * -irogers
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+
+/*
+ * An encoding of a CPU map for a range starting at start_cpu through to
+ * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present.
+ */
+struct perf_record_range_cpu_map {
+ __u8 any_cpu;
+ __u8 __pad;
+ __u16 start_cpu;
+ __u16 end_cpu;
};
struct perf_record_cpu_map_data {
__u16 type;
- char data[];
-};
+ union {
+ /* Used when type == PERF_CPU_MAP__CPUS. */
+ struct cpu_map_entries cpus_data;
+ /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */
+ struct perf_record_mask_cpu_map32 mask32_data;
+ /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */
+ struct perf_record_mask_cpu_map64 mask64_data;
+ /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */
+ struct perf_record_range_cpu_map range_cpu_data;
+ };
+} __attribute__((packed));
+
+#pragma GCC diagnostic pop
struct perf_record_cpu_map {
struct perf_event_header header;
@@ -181,7 +258,16 @@ struct perf_record_event_update {
struct perf_event_header header;
__u64 type;
__u64 id;
- char data[];
+ union {
+ /* Used when type == PERF_EVENT_UPDATE__SCALE. */
+ struct perf_record_event_update_scale scale;
+ /* Used when type == PERF_EVENT_UPDATE__UNIT. */
+ char unit[0];
+ /* Used when type == PERF_EVENT_UPDATE__NAME. */
+ char name[0];
+ /* Used when type == PERF_EVENT_UPDATE__CPUS. */
+ struct perf_record_event_update_cpus cpus;
+ };
};
#define MAX_EVENT_NAME 64
@@ -225,10 +311,15 @@ struct id_index_entry {
__u64 tid;
};
+struct id_index_entry_2 {
+ __u64 machine_pid;
+ __u64 vcpu;
+};
+
struct perf_record_id_index {
struct perf_event_header header;
__u64 nr;
- struct id_index_entry entries[0];
+ struct id_index_entry entries[];
};
struct perf_record_auxtrace_info {
@@ -262,6 +353,8 @@ struct perf_record_auxtrace_error {
__u64 ip;
__u64 time;
char msg[MAX_AUXTRACE_ERROR_MSG];
+ __u32 machine_pid;
+ __u32 vcpu;
};
struct perf_record_aux {
@@ -277,6 +370,11 @@ struct perf_record_itrace_start {
__u32 tid;
};
+struct perf_record_aux_output_hw_id {
+ struct perf_event_header header;
+ __u64 hw_id;
+};
+
struct perf_record_thread_map_entry {
__u64 pid;
char comm[16];
@@ -292,7 +390,8 @@ enum {
PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
PERF_STAT_CONFIG_TERM__INTERVAL = 1,
PERF_STAT_CONFIG_TERM__SCALE = 2,
- PERF_STAT_CONFIG_TERM__MAX = 3,
+ PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3,
+ PERF_STAT_CONFIG_TERM__MAX = 4,
};
struct perf_record_stat_config_entry {
@@ -336,8 +435,9 @@ struct perf_record_time_conv {
__u64 time_zero;
__u64 time_cycles;
__u64 time_mask;
- bool cap_user_time_zero;
- bool cap_user_time_short;
+ __u8 cap_user_time_zero;
+ __u8 cap_user_time_short;
+ __u8 reserved[6]; /* For alignment */
};
struct perf_record_header_feature {
@@ -371,6 +471,7 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_TIME_CONV = 79,
PERF_RECORD_HEADER_FEATURE = 80,
PERF_RECORD_COMPRESSED = 81,
+ PERF_RECORD_FINISHED_INIT = 82,
PERF_RECORD_HEADER_MAX
};
@@ -401,6 +502,7 @@ union perf_event {
struct perf_record_auxtrace_error auxtrace_error;
struct perf_record_aux aux;
struct perf_record_itrace_start itrace_start;
+ struct perf_record_aux_output_hw_id aux_output_hw_id;
struct perf_record_switch context_switch;
struct perf_record_thread_map thread_map;
struct perf_record_cpu_map cpu_map;
diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h
index 0a7479dc13bf..e894b770779e 100644
--- a/tools/lib/perf/include/perf/evlist.h
+++ b/tools/lib/perf/include/perf/evlist.h
@@ -46,4 +46,6 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
(pos) != NULL; \
(pos) = perf_evlist__next_mmap((evlist), (pos), overwrite))
+LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist);
+LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist);
#endif /* __LIBPERF_EVLIST_H */
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index c82ec39a4ad0..6f92204075c2 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
+#include <stdbool.h>
+#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@@ -16,8 +18,10 @@ struct perf_counts_values {
uint64_t val;
uint64_t ena;
uint64_t run;
+ uint64_t id;
+ uint64_t lost;
};
- uint64_t values[3];
+ uint64_t values[5];
};
};
@@ -26,15 +30,21 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
-LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
-LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
+LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
+LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
+LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
+LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
index a7c50de8d010..8b40e7777cea 100644
--- a/tools/lib/perf/include/perf/threadmap.h
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -8,11 +8,12 @@
struct perf_thread_map;
LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
+LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
-LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
-LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
-LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c
index 18658931fc71..696fb0ea67c6 100644
--- a/tools/lib/perf/lib.c
+++ b/tools/lib/perf/lib.c
@@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n)
return ion(true, fd, buf, n);
}
+ssize_t preadn(int fd, void *buf, size_t n, off_t offs)
+{
+ size_t left = n;
+
+ while (left) {
+ ssize_t ret = pread(fd, buf, left, offs);
+
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ return ret;
+
+ left -= ret;
+ buf += ret;
+ offs += ret;
+ }
+
+ return n;
+}
+
/*
* Write exactly 'n' bytes or return an error.
*/
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 7be1af8a546c..10b3f3722642 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -1,15 +1,18 @@
LIBPERF_0.0.1 {
global:
libperf_init;
- perf_cpu_map__dummy_new;
+ perf_cpu_map__new_any_cpu;
+ perf_cpu_map__new_online_cpus;
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
perf_cpu_map__read;
perf_cpu_map__nr;
perf_cpu_map__cpu;
- perf_cpu_map__empty;
+ perf_cpu_map__has_any_cpu_or_is_empty;
perf_cpu_map__max;
+ perf_cpu_map__has;
+ perf_thread_map__new_array;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
@@ -23,6 +26,9 @@ LIBPERF_0.0.1 {
perf_evsel__disable;
perf_evsel__open;
perf_evsel__close;
+ perf_evsel__mmap;
+ perf_evsel__munmap;
+ perf_evsel__mmap_base;
perf_evsel__read;
perf_evsel__cpus;
perf_evsel__threads;
@@ -42,10 +48,12 @@ LIBPERF_0.0.1 {
perf_evlist__munmap;
perf_evlist__filter_pollfd;
perf_evlist__next_mmap;
+ perf_evlist__set_leader;
perf_mmap__consume;
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_counts_values__scale;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 79d5ed6c38cc..0c903c2372c9 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -8,14 +8,18 @@
#include <linux/perf_event.h>
#include <perf/mmap.h>
#include <perf/event.h>
+#include <perf/evsel.h>
#include <internal/mmap.h>
#include <internal/lib.h>
#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/stringify.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb)
{
+ /* Assume fields were zero initialized. */
map->fd = -1;
map->overwrite = overwrite;
map->unmap_cb = unmap_cb;
@@ -30,7 +34,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu)
+ int fd, struct perf_cpu cpu)
{
map->prev = 0;
map->mask = mp->mask;
@@ -48,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
void perf_mmap__munmap(struct perf_mmap *map)
{
- if (map && map->base != NULL) {
+ if (!map)
+ return;
+
+ zfree(&map->event_copy);
+ map->event_copy_sz = 0;
+ if (map->base) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
map->fd = -1;
refcount_set(&map->refcnt, 0);
}
- if (map && map->unmap_cb)
+ if (map->unmap_cb)
map->unmap_cb(map);
}
@@ -220,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
*/
if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
unsigned int offset = *startp;
- unsigned int len = min(sizeof(*event), size), cpy;
+ unsigned int len = size, cpy;
void *dst = map->event_copy;
+ if (size > map->event_copy_sz) {
+ dst = realloc(map->event_copy, size);
+ if (!dst)
+ return NULL;
+ map->event_copy = dst;
+ map->event_copy_sz = size;
+ }
+
do {
cpy = min(map->mask + 1 - (offset & map->mask), len);
memcpy(dst, &data[offset & map->mask], cpy);
@@ -273,3 +290,250 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
return event;
}
+
+#if defined(__i386__) || defined(__x86_64__)
+static u64 read_perf_counter(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 read_timestamp(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+#elif defined(__aarch64__)
+#define read_sysreg(r) ({ \
+ u64 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+static u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+#define PMEVCNTR_READ(idx) \
+ static u64 read_pmevcntr_##idx(void) { \
+ return read_sysreg(pmevcntr##idx##_el0); \
+ }
+
+PMEVCNTR_READ(0);
+PMEVCNTR_READ(1);
+PMEVCNTR_READ(2);
+PMEVCNTR_READ(3);
+PMEVCNTR_READ(4);
+PMEVCNTR_READ(5);
+PMEVCNTR_READ(6);
+PMEVCNTR_READ(7);
+PMEVCNTR_READ(8);
+PMEVCNTR_READ(9);
+PMEVCNTR_READ(10);
+PMEVCNTR_READ(11);
+PMEVCNTR_READ(12);
+PMEVCNTR_READ(13);
+PMEVCNTR_READ(14);
+PMEVCNTR_READ(15);
+PMEVCNTR_READ(16);
+PMEVCNTR_READ(17);
+PMEVCNTR_READ(18);
+PMEVCNTR_READ(19);
+PMEVCNTR_READ(20);
+PMEVCNTR_READ(21);
+PMEVCNTR_READ(22);
+PMEVCNTR_READ(23);
+PMEVCNTR_READ(24);
+PMEVCNTR_READ(25);
+PMEVCNTR_READ(26);
+PMEVCNTR_READ(27);
+PMEVCNTR_READ(28);
+PMEVCNTR_READ(29);
+PMEVCNTR_READ(30);
+
+/*
+ * Read a value direct from PMEVCNTR<idx>
+ */
+static u64 read_perf_counter(unsigned int counter)
+{
+ static u64 (* const read_f[])(void) = {
+ read_pmevcntr_0,
+ read_pmevcntr_1,
+ read_pmevcntr_2,
+ read_pmevcntr_3,
+ read_pmevcntr_4,
+ read_pmevcntr_5,
+ read_pmevcntr_6,
+ read_pmevcntr_7,
+ read_pmevcntr_8,
+ read_pmevcntr_9,
+ read_pmevcntr_10,
+ read_pmevcntr_11,
+ read_pmevcntr_13,
+ read_pmevcntr_12,
+ read_pmevcntr_14,
+ read_pmevcntr_15,
+ read_pmevcntr_16,
+ read_pmevcntr_17,
+ read_pmevcntr_18,
+ read_pmevcntr_19,
+ read_pmevcntr_20,
+ read_pmevcntr_21,
+ read_pmevcntr_22,
+ read_pmevcntr_23,
+ read_pmevcntr_24,
+ read_pmevcntr_25,
+ read_pmevcntr_26,
+ read_pmevcntr_27,
+ read_pmevcntr_28,
+ read_pmevcntr_29,
+ read_pmevcntr_30,
+ read_pmccntr
+ };
+
+ if (counter < ARRAY_SIZE(read_f))
+ return (read_f[counter])();
+
+ return 0;
+}
+
+static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+
+/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
+#elif defined(__riscv) && __riscv_xlen == 64
+
+/* TODO: implement rv32 support */
+
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__ ("csrr %0, %1" \
+ : "=r" (__v) \
+ : "i" (csr) : ); \
+ __v; \
+})
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ default:
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static u64 read_perf_counter(unsigned int counter)
+{
+ return csr_read_num(CSR_CYCLE + counter);
+}
+
+static u64 read_timestamp(void)
+{
+ return csr_read_num(CSR_TIME);
+}
+
+#else
+static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
+static u64 read_timestamp(void) { return 0; }
+#endif
+
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
+{
+ struct perf_event_mmap_page *pc = map->base;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
+
+ if (!pc || !pc->cap_user_rdpmc)
+ return -1;
+
+ do {
+ seq = READ_ONCE(pc->lock);
+ barrier();
+
+ count->ena = READ_ONCE(pc->time_enabled);
+ count->run = READ_ONCE(pc->time_running);
+
+ if (pc->cap_user_time && count->ena != count->run) {
+ cyc = read_timestamp();
+ time_mult = READ_ONCE(pc->time_mult);
+ time_shift = READ_ONCE(pc->time_shift);
+ time_offset = READ_ONCE(pc->time_offset);
+
+ if (pc->cap_user_time_short) {
+ time_cycles = READ_ONCE(pc->time_cycles);
+ time_mask = READ_ONCE(pc->time_mask);
+ }
+ }
+
+ idx = READ_ONCE(pc->index);
+ cnt = READ_ONCE(pc->offset);
+ if (pc->cap_user_rdpmc && idx) {
+ s64 evcnt = read_perf_counter(idx - 1);
+ u16 width = READ_ONCE(pc->pmc_width);
+
+ evcnt <<= 64 - width;
+ evcnt >>= 64 - width;
+ cnt += evcnt;
+ } else
+ return -1;
+
+ barrier();
+ } while (READ_ONCE(pc->lock) != seq);
+
+ if (count->ena != count->run) {
+ u64 delta;
+
+ /* Adjust for cap_usr_time_short, a nop if not */
+ cyc = time_cycles + ((cyc - time_cycles) & time_mask);
+
+ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
+
+ count->ena += delta;
+ if (idx)
+ count->run += delta;
+ }
+
+ count->val = cnt;
+
+ return 0;
+}
diff --git a/tools/lib/perf/tests/Build b/tools/lib/perf/tests/Build
new file mode 100644
index 000000000000..56e81378d443
--- /dev/null
+++ b/tools/lib/perf/tests/Build
@@ -0,0 +1,5 @@
+tests-y += main.o
+tests-y += test-evsel.o
+tests-y += test-evlist.o
+tests-y += test-cpumap.o
+tests-y += test-threadmap.o
diff --git a/tools/lib/perf/tests/Makefile b/tools/lib/perf/tests/Makefile
deleted file mode 100644
index 96841775feaf..000000000000
--- a/tools/lib/perf/tests/Makefile
+++ /dev/null
@@ -1,38 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-TESTS = test-cpumap test-threadmap test-evlist test-evsel
-
-TESTS_SO := $(addsuffix -so,$(TESTS))
-TESTS_A := $(addsuffix -a,$(TESTS))
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
-all:
-
-include $(srctree)/tools/scripts/Makefile.include
-
-INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
-
-$(TESTS_A): FORCE
- $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI)
-
-$(TESTS_SO): FORCE
- $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf
-
-all: $(TESTS_A) $(TESTS_SO)
-
-run:
- @echo "running static:"
- @for i in $(TESTS_A); do ./$$i; done
- @echo "running dynamic:"
- @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done
-
-clean:
- $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO)
-
-.PHONY: all clean FORCE
diff --git a/tools/lib/perf/tests/main.c b/tools/lib/perf/tests/main.c
new file mode 100644
index 000000000000..56423fd4db19
--- /dev/null
+++ b/tools/lib/perf/tests/main.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <internal/tests.h>
+#include "tests.h"
+
+int tests_failed;
+int tests_verbose;
+
+int main(int argc, char **argv)
+{
+ __T("test cpumap", !test_cpumap(argc, argv));
+ __T("test threadmap", !test_threadmap(argc, argv));
+ __T("test evlist", !test_evlist(argc, argv));
+ __T("test evsel", !test_evsel(argc, argv));
+ return 0;
+}
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index c8d45091e7c2..c998b1dae863 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <perf/cpumap.h>
#include <internal/tests.h>
+#include "tests.h"
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
@@ -10,15 +11,17 @@ static int libperf_print(enum libperf_print_level level,
return vfprintf(stderr, fmt, ap);
}
-int main(int argc, char **argv)
+int test_cpumap(int argc, char **argv)
{
struct perf_cpu_map *cpus;
+ struct perf_cpu cpu;
+ int idx;
__T_START;
libperf_init(libperf_print);
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
if (!cpus)
return -1;
@@ -26,6 +29,15 @@ int main(int argc, char **argv)
perf_cpu_map__put(cpus);
perf_cpu_map__put(cpus);
+ cpus = perf_cpu_map__new_online_cpus();
+ if (!cpus)
+ return -1;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+ __T("wrong cpu number", cpu.cpu != -1);
+
+ perf_cpu_map__put(cpus);
+
__T_END;
- return 0;
+ return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index 6d8ebe0c2504..10f70cb41ff1 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET)
+#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdarg.h>
@@ -18,6 +19,11 @@
#include <perf/event.h>
#include <internal/tests.h>
#include <api/fs/fs.h>
+#include "tests.h"
+#include <internal/evsel.h>
+
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
@@ -29,7 +35,7 @@ static int test_stat_cpu(void)
{
struct perf_cpu_map *cpus;
struct perf_evlist *evlist;
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel, *leader;
struct perf_event_attr attr1 = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
@@ -38,15 +44,15 @@ static int test_stat_cpu(void)
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_TASK_CLOCK,
};
- int err, cpu, tmp;
+ int err, idx;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evlist = perf_evlist__new();
__T("failed to create evlist", evlist);
- evsel = perf_evsel__new(&attr1);
+ evsel = leader = perf_evsel__new(&attr1);
__T("failed to create evsel1", evsel);
perf_evlist__add(evlist, evsel);
@@ -56,18 +62,22 @@ static int test_stat_cpu(void)
perf_evlist__add(evlist, evsel);
+ perf_evlist__set_leader(evlist);
+ __T("failed to set leader", leader->leader == leader);
+ __T("failed to set leader", evsel->leader == leader);
+
perf_evlist__set_maps(evlist, cpus, NULL);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
cpus = perf_evsel__cpus(evsel);
- perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
+ for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) {
struct perf_counts_values counts = { .val = 0 };
- perf_evsel__read(evsel, cpu, 0, &counts);
+ perf_evsel__read(evsel, idx, 0, &counts);
__T("failed to read value for evsel", counts.val != 0);
}
}
@@ -84,7 +94,7 @@ static int test_stat_thread(void)
struct perf_counts_values counts = { .val = 0 };
struct perf_thread_map *threads;
struct perf_evlist *evlist;
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel, *leader;
struct perf_event_attr attr1 = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
@@ -103,7 +113,7 @@ static int test_stat_thread(void)
evlist = perf_evlist__new();
__T("failed to create evlist", evlist);
- evsel = perf_evsel__new(&attr1);
+ evsel = leader = perf_evsel__new(&attr1);
__T("failed to create evsel1", evsel);
perf_evlist__add(evlist, evsel);
@@ -113,10 +123,14 @@ static int test_stat_thread(void)
perf_evlist__add(evlist, evsel);
+ perf_evlist__set_leader(evlist);
+ __T("failed to set leader", leader->leader == leader);
+ __T("failed to set leader", evsel->leader == leader);
+
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -135,7 +149,7 @@ static int test_stat_thread_enable(void)
struct perf_counts_values counts = { .val = 0 };
struct perf_thread_map *threads;
struct perf_evlist *evlist;
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel, *leader;
struct perf_event_attr attr1 = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
@@ -156,7 +170,7 @@ static int test_stat_thread_enable(void)
evlist = perf_evlist__new();
__T("failed to create evlist", evlist);
- evsel = perf_evsel__new(&attr1);
+ evsel = leader = perf_evsel__new(&attr1);
__T("failed to create evsel1", evsel);
perf_evlist__add(evlist, evsel);
@@ -166,10 +180,14 @@ static int test_stat_thread_enable(void)
perf_evlist__add(evlist, evsel);
+ perf_evlist__set_leader(evlist);
+ __T("failed to set leader", leader->leader == leader);
+ __T("failed to set leader", evsel->leader == leader);
+
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -208,13 +226,13 @@ static int test_mmap_thread(void)
char path[PATH_MAX];
int id, err, pid, go_pipe[2];
union perf_event *event;
- char bf;
int count = 0;
snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
sysfs__mountpoint());
if (filename__read_int(path, &id)) {
+ tests_failed++;
fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
return -1;
}
@@ -229,6 +247,7 @@ static int test_mmap_thread(void)
pid = fork();
if (!pid) {
int i;
+ char bf;
read(go_pipe[0], &bf, 1);
@@ -242,7 +261,7 @@ static int test_mmap_thread(void)
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
__T("failed to create cpus", cpus);
perf_thread_map__set_pid(threads, 0, pid);
@@ -252,6 +271,7 @@ static int test_mmap_thread(void)
evsel = perf_evsel__new(&attr);
__T("failed to create evsel1", evsel);
+ __T("failed to set leader", evsel->leader == evsel);
perf_evlist__add(evlist, evsel);
@@ -266,7 +286,7 @@ static int test_mmap_thread(void)
perf_evlist__enable(evlist);
/* kick the child and wait for it to finish */
- write(go_pipe[1], &bf, 1);
+ write(go_pipe[1], "A", 1);
waitpid(pid, NULL, 0);
/*
@@ -315,7 +335,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
- int id, err, cpu, tmp;
+ int id, err, tmp;
+ struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@@ -329,7 +350,7 @@ static int test_mmap_cpus(void)
attr.config = id;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evlist = perf_evlist__new();
@@ -337,6 +358,7 @@ static int test_mmap_cpus(void)
evsel = perf_evsel__new(&attr);
__T("failed to create evsel1", evsel);
+ __T("failed to set leader", evsel->leader == evsel);
perf_evlist__add(evlist, evsel);
@@ -357,7 +379,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
+ CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@@ -396,7 +418,160 @@ static int test_mmap_cpus(void)
return 0;
}
-int main(int argc, char **argv)
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evlist", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
+int test_evlist(int argc, char **argv)
{
__T_START;
@@ -407,7 +582,8 @@ int main(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
- return 0;
+ return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 135722ac965b..545ec3150546 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -1,11 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdarg.h>
#include <stdio.h>
+#include <string.h>
#include <linux/perf_event.h>
+#include <linux/kernel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <perf/evsel.h>
+#include <internal/evsel.h>
#include <internal/tests.h>
+#include "tests.h"
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
@@ -21,9 +25,9 @@ static int test_stat_cpu(void)
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- int err, cpu, tmp;
+ int err, idx;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evsel = perf_evsel__new(&attr);
@@ -32,10 +36,10 @@ static int test_stat_cpu(void)
err = perf_evsel__open(evsel, cpus, NULL);
__T("failed to open evsel", err == 0);
- perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
+ for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) {
struct perf_counts_values counts = { .val = 0 };
- perf_evsel__read(evsel, cpu, 0, &counts);
+ perf_evsel__read(evsel, idx, 0, &counts);
__T("failed to read value for evsel", counts.val != 0);
}
@@ -120,7 +124,232 @@ static int test_stat_thread_enable(void)
return 0;
}
-int main(int argc, char **argv)
+static int test_stat_user_read(int event)
+{
+ struct perf_counts_values counts = { .val = 0 };
+ struct perf_thread_map *threads;
+ struct perf_evsel *evsel;
+ struct perf_event_mmap_page *pc;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = event,
+#ifdef __aarch64__
+ .config1 = 0x2, /* Request user access */
+#endif
+ };
+ int err, i;
+
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__mmap(evsel, 0);
+ __T("failed to mmap evsel", err == 0);
+
+ pc = perf_evsel__mmap_base(evsel, 0, 0);
+ __T("failed to get mmapped address", pc);
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+ __T("userspace counter access not supported", pc->cap_user_rdpmc);
+ __T("userspace counter access not enabled", pc->index);
+ __T("userspace counter width not set", pc->pmc_width >= 32);
+#endif
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ __T("failed to read value for evsel", counts.val != 0);
+
+ for (i = 0; i < 5; i++) {
+ volatile int count = 0x10000 << i;
+ __u64 start, end, last = 0;
+
+ __T_VERBOSE("\tloop = %u, ", count);
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ start = counts.val;
+
+ while (count--) ;
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ end = counts.val;
+
+ __T("invalid counter data", (end - start) > last);
+ last = end - start;
+ __T_VERBOSE("count = %llu\n", end - start);
+ }
+
+ perf_evsel__munmap(evsel);
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+ return 0;
+}
+
+static int test_stat_read_format_single(struct perf_event_attr *attr, struct perf_thread_map *threads)
+{
+ struct perf_evsel *evsel;
+ struct perf_counts_values counts;
+ volatile int count = 0x100000;
+ int err;
+
+ evsel = perf_evsel__new(attr);
+ __T("failed to create evsel", evsel);
+
+ /* skip old kernels that don't support the format */
+ err = perf_evsel__open(evsel, NULL, threads);
+ if (err < 0)
+ return 0;
+
+ while (count--) ;
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(evsel, 0, 0, &counts);
+
+ __T("failed to read value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read LOST", counts.lost == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+ return 0;
+}
+
+static int test_stat_read_format_group(struct perf_event_attr *attr, struct perf_thread_map *threads)
+{
+ struct perf_evsel *leader, *member;
+ struct perf_counts_values counts;
+ volatile int count = 0x100000;
+ int err;
+
+ attr->read_format |= PERF_FORMAT_GROUP;
+ leader = perf_evsel__new(attr);
+ __T("failed to create leader", leader);
+
+ attr->read_format &= ~PERF_FORMAT_GROUP;
+ member = perf_evsel__new(attr);
+ __T("failed to create member", member);
+
+ member->leader = leader;
+ leader->nr_members = 2;
+
+ /* skip old kernels that don't support the format */
+ err = perf_evsel__open(leader, NULL, threads);
+ if (err < 0)
+ return 0;
+ err = perf_evsel__open(member, NULL, threads);
+ if (err < 0)
+ return 0;
+
+ while (count--) ;
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(leader, 0, 0, &counts);
+
+ __T("failed to read leader value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read leader TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read leader TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read leader ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read leader LOST", counts.lost == 0);
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(member, 0, 0, &counts);
+
+ __T("failed to read member value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read member TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read member TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read member ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read member LOST", counts.lost == 0);
+
+ perf_evsel__close(member);
+ perf_evsel__close(leader);
+ perf_evsel__delete(member);
+ perf_evsel__delete(leader);
+ return 0;
+}
+
+static int test_stat_read_format(void)
+{
+ struct perf_thread_map *threads;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_TASK_CLOCK,
+ };
+ int err, i;
+
+#define FMT(_fmt) PERF_FORMAT_ ## _fmt
+#define FMT_TIME (FMT(TOTAL_TIME_ENABLED) | FMT(TOTAL_TIME_RUNNING))
+
+ uint64_t test_formats [] = {
+ 0,
+ FMT_TIME,
+ FMT(ID),
+ FMT(LOST),
+ FMT_TIME | FMT(ID),
+ FMT_TIME | FMT(LOST),
+ FMT_TIME | FMT(ID) | FMT(LOST),
+ FMT(ID) | FMT(LOST),
+ };
+
+#undef FMT
+#undef FMT_TIME
+
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) {
+ attr.read_format = test_formats[i];
+ __T_VERBOSE("testing single read with read_format: %lx\n",
+ (unsigned long)test_formats[i]);
+
+ err = test_stat_read_format_single(&attr, threads);
+ __T("failed to read single format", err == 0);
+ }
+
+ perf_thread_map__put(threads);
+
+ threads = perf_thread_map__new_array(2, NULL);
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+ perf_thread_map__set_pid(threads, 1, 0);
+
+ for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) {
+ attr.read_format = test_formats[i];
+ __T_VERBOSE("testing group read with read_format: %lx\n",
+ (unsigned long)test_formats[i]);
+
+ err = test_stat_read_format_group(&attr, threads);
+ __T("failed to read group format", err == 0);
+ }
+
+ perf_thread_map__put(threads);
+ return 0;
+}
+
+int test_evsel(int argc, char **argv)
{
__T_START;
@@ -129,7 +358,10 @@ int main(int argc, char **argv)
test_stat_cpu();
test_stat_thread();
test_stat_thread_enable();
+ test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
+ test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
+ test_stat_read_format();
__T_END;
- return 0;
+ return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c
index 7dc4d6fbedde..f728ad7002bb 100644
--- a/tools/lib/perf/tests/test-threadmap.c
+++ b/tools/lib/perf/tests/test-threadmap.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <perf/threadmap.h>
#include <internal/tests.h>
+#include "tests.h"
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
@@ -10,9 +11,43 @@ static int libperf_print(enum libperf_print_level level,
return vfprintf(stderr, fmt, ap);
}
-int main(int argc, char **argv)
+static int test_threadmap_array(int nr, pid_t *array)
{
struct perf_thread_map *threads;
+ int i;
+
+ threads = perf_thread_map__new_array(nr, array);
+ __T("Failed to allocate new thread map", threads);
+
+ __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr);
+
+ for (i = 0; i < nr; i++) {
+ __T("Unexpected initial value of thread",
+ perf_thread_map__pid(threads, i) == (array ? array[i] : -1));
+ }
+
+ for (i = 1; i < nr; i++)
+ perf_thread_map__set_pid(threads, i, i * 100);
+
+ __T("Unexpected value of thread 0",
+ perf_thread_map__pid(threads, 0) == (array ? array[0] : -1));
+
+ for (i = 1; i < nr; i++) {
+ __T("Unexpected thread value",
+ perf_thread_map__pid(threads, i) == i * 100);
+ }
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
+#define THREADS_NR 10
+int test_threadmap(int argc, char **argv)
+{
+ struct perf_thread_map *threads;
+ pid_t thr_array[THREADS_NR];
+ int i;
__T_START;
@@ -26,6 +61,13 @@ int main(int argc, char **argv)
perf_thread_map__put(threads);
perf_thread_map__put(threads);
+ test_threadmap_array(THREADS_NR, NULL);
+
+ for (i = 0; i < THREADS_NR; i++)
+ thr_array[i] = i + 100;
+
+ test_threadmap_array(THREADS_NR, thr_array);
+
__T_END;
- return 0;
+ return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/tests.h b/tools/lib/perf/tests/tests.h
new file mode 100644
index 000000000000..604838f21b2b
--- /dev/null
+++ b/tools/lib/perf/tests/tests.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TESTS_H
+#define TESTS_H
+
+int test_cpumap(int argc, char **argv);
+int test_threadmap(int argc, char **argv);
+int test_evlist(int argc, char **argv);
+int test_evsel(int argc, char **argv);
+
+#endif /* TESTS_H */
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index e92c368b0a6c..07968f3ea093 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in
#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
-void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
+void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid)
{
- map->map[thread].pid = pid;
+ map->map[idx].pid = pid;
}
-char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
+char *perf_thread_map__comm(struct perf_thread_map *map, int idx)
{
- return map->map[thread].comm;
+ return map->map[idx].comm;
}
-struct perf_thread_map *perf_thread_map__new_dummy(void)
+struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array)
{
- struct perf_thread_map *threads = thread_map__alloc(1);
+ struct perf_thread_map *threads = thread_map__alloc(nr_threads);
+ int i;
+
+ if (!threads)
+ return NULL;
+
+ for (i = 0; i < nr_threads; i++)
+ perf_thread_map__set_pid(threads, i, array ? array[i] : -1);
+
+ threads->nr = nr_threads;
+ refcount_set(&threads->refcnt, 1);
- if (threads != NULL) {
- perf_thread_map__set_pid(threads, 0, -1);
- threads->nr = 1;
- refcount_set(&threads->refcnt, 1);
- }
return threads;
}
+struct perf_thread_map *perf_thread_map__new_dummy(void)
+{
+ return perf_thread_map__new_array(1, NULL);
+}
+
static void perf_thread_map__delete(struct perf_thread_map *threads)
{
if (threads) {
@@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
return threads ? threads->nr : 1;
}
-pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
+pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
{
- return map->map[thread].pid;
+ return map->map[idx].pid;
}
diff --git a/tools/lib/slab.c b/tools/lib/slab.c
new file mode 100644
index 000000000000..959997fb0652
--- /dev/null
+++ b/tools/lib/slab.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+
+#include <urcu/uatomic.h>
+#include <linux/slab.h>
+#include <malloc.h>
+#include <linux/gfp.h>
+
+int kmalloc_nr_allocated;
+int kmalloc_verbose;
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+ void *ret;
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ ret = malloc(size);
+ uatomic_inc(&kmalloc_nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from malloc\n", ret);
+ if (gfp & __GFP_ZERO)
+ memset(ret, 0, size);
+ return ret;
+}
+
+void kfree(void *p)
+{
+ if (!p)
+ return;
+ uatomic_dec(&kmalloc_nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to malloc\n", p);
+ free(p);
+}
diff --git a/tools/lib/string.c b/tools/lib/string.c
index f645343815de..8b6892f959ab 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new)
*s = new;
return s;
}
+
+static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)
+{
+ while (bytes) {
+ if (*start != value)
+ return (void *)start;
+ start++;
+ bytes--;
+ }
+ return NULL;
+}
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @start: The memory area
+ * @c: Find a character other than c
+ * @bytes: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *start, int c, size_t bytes)
+{
+ u8 value = c;
+ u64 value64;
+ unsigned int words, prefix;
+
+ if (bytes <= 16)
+ return check_bytes8(start, value, bytes);
+
+ value64 = value;
+ value64 |= value64 << 8;
+ value64 |= value64 << 16;
+ value64 |= value64 << 32;
+
+ prefix = (unsigned long)start % 8;
+ if (prefix) {
+ u8 *r;
+
+ prefix = 8 - prefix;
+ r = check_bytes8(start, value, prefix);
+ if (r)
+ return r;
+ start += prefix;
+ bytes -= prefix;
+ }
+
+ words = bytes / 8;
+
+ while (words) {
+ if (*(u64 *)start != value64)
+ return check_bytes8(start, value, 8);
+ start += 8;
+ words--;
+ }
+
+ return check_bytes8(start, value, bytes % 8);
+}
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 1c777a72bb39..b87213263a5e 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -17,6 +17,15 @@ RM = rm -f
MAKEFLAGS += --no-print-directory
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
LIBFILE = $(OUTPUT)libsubcmd.a
CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
@@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
all:
export srctree OUTPUT CC LD CFLAGS V
@@ -61,9 +82,40 @@ $(SUBCMD_IN): FORCE
$(LIBFILE): $(SUBCMD_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$2' ]; then \
+ $(INSTALL) -d -m 755 '$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+HDRS := exec-cmd.h help.h pager.h parse-options.h run-command.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/subcmd
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS)
+ $(call QUIET_INSTALL, libsubcmd_headers)
+
+install: install_lib install_headers
+
clean:
$(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
- find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
FORCE:
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
index 33e94fb83986..7739b5217cf6 100644
--- a/tools/lib/subcmd/exec-cmd.c
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -24,6 +24,9 @@ void exec_cmd_init(const char *exec_name, const char *prefix,
subcmd_config.prefix = prefix;
subcmd_config.exec_path = exec_path;
subcmd_config.exec_path_env = exec_path_env;
+
+ /* Setup environment variable for invoked shell script. */
+ setenv("PREFIX", prefix, 1);
}
#define is_dir_sep(c) ((c) == '/')
@@ -33,38 +36,40 @@ static int is_absolute_path(const char *path)
return path[0] == '/';
}
-static const char *get_pwd_cwd(void)
+static const char *get_pwd_cwd(char *buf, size_t sz)
{
- static char cwd[PATH_MAX + 1];
char *pwd;
struct stat cwd_stat, pwd_stat;
- if (getcwd(cwd, PATH_MAX) == NULL)
+ if (getcwd(buf, sz) == NULL)
return NULL;
pwd = getenv("PWD");
- if (pwd && strcmp(pwd, cwd)) {
- stat(cwd, &cwd_stat);
+ if (pwd && strcmp(pwd, buf)) {
+ stat(buf, &cwd_stat);
if (!stat(pwd, &pwd_stat) &&
pwd_stat.st_dev == cwd_stat.st_dev &&
pwd_stat.st_ino == cwd_stat.st_ino) {
- strlcpy(cwd, pwd, PATH_MAX);
+ strlcpy(buf, pwd, sz);
}
}
- return cwd;
+ return buf;
}
-static const char *make_nonrelative_path(const char *path)
+static const char *make_nonrelative_path(char *buf, size_t sz, const char *path)
{
- static char buf[PATH_MAX + 1];
-
if (is_absolute_path(path)) {
- if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+ if (strlcpy(buf, path, sz) >= sz)
die("Too long path: %.*s", 60, path);
} else {
- const char *cwd = get_pwd_cwd();
+ const char *cwd = get_pwd_cwd(buf, sz);
+
if (!cwd)
die("Cannot determine the current working directory");
- if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+
+ if (strlen(cwd) + strlen(path) + 2 >= sz)
die("Too long path: %.*s", 60, path);
+
+ strcat(buf, "/");
+ strcat(buf, path);
}
return buf;
}
@@ -130,8 +135,11 @@ static void add_path(char **out, const char *path)
if (path && *path) {
if (is_absolute_path(path))
astrcat(out, path);
- else
- astrcat(out, make_nonrelative_path(path));
+ else {
+ char buf[PATH_MAX];
+
+ astrcat(out, make_nonrelative_path(buf, sizeof(buf), path));
+ }
astrcat(out, ":");
}
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index bf02d62a3b2b..8561b0f01a24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -16,6 +16,8 @@
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
{
struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+ if (!ent)
+ return;
ent->len = len;
memcpy(ent->name, name, len);
@@ -50,11 +52,21 @@ void uniq(struct cmdnames *cmds)
if (!cmds->cnt)
return;
- for (i = j = 1; i < cmds->cnt; i++)
- if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
- cmds->names[j++] = cmds->names[i];
-
+ for (i = 1; i < cmds->cnt; i++) {
+ if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+ zfree(&cmds->names[i - 1]);
+ }
+ for (i = 0, j = 0; i < cmds->cnt; i++) {
+ if (cmds->names[i]) {
+ if (i == j)
+ j++;
+ else
+ cmds->names[j++] = cmds->names[i];
+ }
+ }
cmds->cnt = j;
+ while (j < i)
+ cmds->names[j++] = NULL;
}
void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
@@ -66,7 +78,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
if (cmp < 0) {
- cmds->names[cj++] = cmds->names[ci++];
+ if (ci == cj) {
+ ci++;
+ cj++;
+ } else {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
} else if (cmp == 0) {
ci++;
ei++;
@@ -74,10 +92,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ei++;
}
}
-
- while (ci < cmds->cnt)
- cmds->names[cj++] = cmds->names[ci++];
-
+ if (ci != cj) {
+ while (ci < cmds->cnt) {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
+ }
+ for (ci = cj; ci < cmds->cnt; ci++)
+ zfree(&cmds->names[ci]);
cmds->cnt = cj;
}
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 39ebf6192016..9fa75943f2ed 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -806,9 +806,9 @@ static int option__cmp(const void *va, const void *vb)
static struct option *options__order(const struct option *opts)
{
- int nr_opts = 0, len;
+ int nr_opts = 0, nr_group = 0, len;
const struct option *o = opts;
- struct option *ordered;
+ struct option *opt, *ordered, *group;
for (o = opts; o->type != OPTION_END; o++)
++nr_opts;
@@ -819,7 +819,18 @@ static struct option *options__order(const struct option *opts)
goto out;
memcpy(ordered, opts, len);
- qsort(ordered, nr_opts, sizeof(*o), option__cmp);
+ /* sort each option group individually */
+ for (opt = group = ordered; opt->type != OPTION_END; opt++) {
+ if (opt->type == OPTION_GROUP) {
+ qsort(group, nr_group, sizeof(*opt), option__cmp);
+ group = opt + 1;
+ nr_group = 0;
+ continue;
+ }
+ nr_group++;
+ }
+ qsort(group, nr_group, sizeof(*opt), option__cmp);
+
out:
return ordered;
}
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index d2414144eb8c..8e9147358a28 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -6,10 +6,6 @@
#include <stdbool.h>
#include <stdint.h>
-#ifndef NORETURN
-#define NORETURN __attribute__((__noreturn__))
-#endif
-
enum parse_opt_type {
/* special types */
OPTION_END,
@@ -133,6 +129,7 @@ struct option {
#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) }
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
+#define OPT_UINTEGER_OPTARG(s, l, v, d, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
@@ -182,9 +179,9 @@ extern int parse_options_subcommand(int argc, const char **argv,
const char *const subcommands[],
const char *usagestr[], int flags);
-extern NORETURN void usage_with_options(const char * const *usagestr,
+extern __noreturn void usage_with_options(const char * const *usagestr,
const struct option *options);
-extern NORETURN __attribute__((format(printf,3,4)))
+extern __noreturn __attribute__((format(printf,3,4)))
void usage_with_options_msg(const char * const *usagestr,
const struct option *options,
const char *fmt, ...);
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
index 5cdac2162532..d435eb42354b 100644
--- a/tools/lib/subcmd/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -122,6 +122,8 @@ int start_command(struct child_process *cmd)
}
if (cmd->preexec_cb)
cmd->preexec_cb();
+ if (cmd->no_exec_cmd)
+ exit(cmd->no_exec_cmd(cmd));
if (cmd->exec_cmd) {
execv_cmd(cmd->argv);
} else {
diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h
index 17d969c6add3..d794138a797f 100644
--- a/tools/lib/subcmd/run-command.h
+++ b/tools/lib/subcmd/run-command.h
@@ -47,6 +47,8 @@ struct child_process {
unsigned exec_cmd:1; /* if this is to be external sub-command */
unsigned stdout_to_stderr:1;
void (*preexec_cb)(void);
+ /* If set, call function in child rather than doing an exec. */
+ int (*no_exec_cmd)(struct child_process *process);
};
int start_command(struct child_process *);
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
index 794a375dad36..dfac76e35ac7 100644
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -5,8 +5,7 @@
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
-
-#define NORETURN __attribute__((__noreturn__))
+#include <linux/compiler.h>
static inline void report(const char *prefix, const char *err, va_list params)
{
@@ -15,7 +14,7 @@ static inline void report(const char *prefix, const char *err, va_list params)
fprintf(stderr, " %s%s\n", prefix, msg);
}
-static NORETURN inline void die(const char *err, ...)
+static __noreturn inline void die(const char *err, ...)
{
va_list params;
@@ -50,15 +49,8 @@ static NORETURN inline void die(const char *err, ...)
static inline void *xrealloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret) {
- ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret)
- die("Out of memory, realloc failed");
- }
+ if (!ret)
+ die("Out of memory, realloc failed");
return ret;
}
diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build
new file mode 100644
index 000000000000..9b9a9c78d3c9
--- /dev/null
+++ b/tools/lib/symbol/Build
@@ -0,0 +1 @@
+libsymbol-y += kallsyms.o
diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile
new file mode 100644
index 000000000000..13d43c6f92b4
--- /dev/null
+++ b/tools/lib/symbol/Makefile
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+AR ?= $(CROSS_COMPILE)ar
+LD ?= $(CROSS_COMPILE)ld
+
+MAKEFLAGS += --no-print-directory
+
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LIBFILE = $(OUTPUT)libsymbol.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -O3
+else
+ CFLAGS += -O6
+endif
+endif
+
+ifeq ($(DEBUG),0)
+ CFLAGS += -D_FORTIFY_SOURCE
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+
+CFLAGS += -I$(srctree)/tools/lib
+CFLAGS += -I$(srctree)/tools/include
+
+RM = rm -f
+
+SYMBOL_IN := $(OUTPUT)libsymbol-in.o
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SYMBOL_IN): FORCE
+ @$(MAKE) $(build)=libsymbol
+
+$(LIBFILE): $(SYMBOL_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN)
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$2' ]; then \
+ $(INSTALL) -d -m 755 '$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+HDRS := kallsyms.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/symbol
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+ $(call QUIET_INSTALL, $@) \
+ $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS)
+ $(call QUIET_INSTALL, libsymbol_headers)
+
+install: install_lib install_headers
+
+clean:
+ $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \
+ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index 72ab9870454b..542f9b059c3b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -7,7 +7,7 @@
#include <linux/types.h>
#ifndef KSYM_NAME_LEN
-#define KSYM_NAME_LEN 256
+#define KSYM_NAME_LEN 512
#endif
static inline u8 kallsyms2elf_binding(char type)
diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore
new file mode 100644
index 000000000000..5d2aeda80fea
--- /dev/null
+++ b/tools/lib/thermal/.gitignore
@@ -0,0 +1,2 @@
+libthermal.so*
+libthermal.pc
diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build
new file mode 100644
index 000000000000..4a892d9e24f9
--- /dev/null
+++ b/tools/lib/thermal/Build
@@ -0,0 +1,5 @@
+libthermal-y += commands.o
+libthermal-y += events.o
+libthermal-y += thermal_nl.o
+libthermal-y += sampling.o
+libthermal-y += thermal.o
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
new file mode 100644
index 000000000000..2d0d255fd0e1
--- /dev/null
+++ b/tools/lib/thermal/Makefile
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/perf/Makefile
+
+LIBTHERMAL_VERSION = 0
+LIBTHERMAL_PATCHLEVEL = 0
+LIBTHERMAL_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I/usr/include/libnl3 \
+-I$(srctree)/tools/lib/thermal/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
+override CFGLAS += -Wl,-L.
+override CFGLAS += -Wl,-lthermal
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+VERSION_SCRIPT := libthermal.map
+
+PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL)
+EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION)
+VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION)
+
+LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION)
+LIBTHERMAL_A := $(OUTPUT)libthermal.a
+LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o
+LIBTHERMAL_PC := $(OUTPUT)libthermal.pc
+LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so*
+
+THERMAL_UAPI := include/uapi/linux/thermal.h
+
+$(THERMAL_UAPI): FORCE
+ ln -sf $(srctree)/$@ $(srctree)/tools/$@
+
+$(LIBTHERMAL_IN): FORCE
+ $(Q)$(MAKE) $(build)=libthermal
+
+$(LIBTHERMAL_A): $(LIBTHERMAL_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN)
+
+$(LIBTHERMAL_SO): $(LIBTHERMAL_IN)
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \
+ -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+ @ln -sf $(@F) $(OUTPUT)libthermal.so
+ @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION)
+
+
+libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC)
+
+all: fixdep
+ $(Q)$(MAKE) libs
+
+clean:
+ $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \
+ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC)
+
+$(LIBTHERMAL_PC):
+ $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+ -e "s|@LIBDIR@|$(libdir_SQ)|" \
+ -e "s|@VERSION@|$(VERSION)|" \
+ < libthermal.pc.template > $@
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+ $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, headers) \
+ $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \
+
+install_pkgconfig: $(LIBTHERMAL_PC)
+ $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \
+ $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+ $(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+install: install_lib install_headers install_pkgconfig
+
+FORCE:
+
+.PHONY: all install clean FORCE
diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c
new file mode 100644
index 000000000000..73d4d4e8d6ec
--- /dev/null
+++ b/tools/lib/thermal/commands.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = {
+ /* Thermal zone */
+ [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING },
+
+ /* Governor(s) */
+ [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING },
+
+ /* Cooling devices */
+ [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING },
+};
+
+static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz)
+{
+ struct nlattr *attr;
+ struct thermal_zone *__tz = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) {
+
+ size++;
+
+ __tz = realloc(__tz, sizeof(*__tz) * (size + 2));
+ if (!__tz)
+ return THERMAL_ERROR;
+
+ __tz[size - 1].id = nla_get_u32(attr);
+ }
+
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME)
+ nla_strlcpy(__tz[size - 1].name, attr,
+ THERMAL_NAME_LENGTH);
+ }
+
+ if (__tz)
+ __tz[size].id = -1;
+
+ *tz = __tz;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev)
+{
+ struct nlattr *attr;
+ struct thermal_cdev *__cdev = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) {
+
+ size++;
+
+ __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2));
+ if (!__cdev)
+ return THERMAL_ERROR;
+
+ __cdev[size - 1].id = nla_get_u32(attr);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) {
+ nla_strlcpy(__cdev[size - 1].name, attr,
+ THERMAL_NAME_LENGTH);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE)
+ __cdev[size - 1].cur_state = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE)
+ __cdev[size - 1].max_state = nla_get_u32(attr);
+ }
+
+ if (__cdev)
+ __cdev[size].id = -1;
+
+ *cdev = __cdev;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz)
+{
+ struct nlattr *attr;
+ struct thermal_trip *__tt = NULL;
+ size_t size = 0;
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) {
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) {
+
+ size++;
+
+ __tt = realloc(__tt, sizeof(*__tt) * (size + 2));
+ if (!__tt)
+ return THERMAL_ERROR;
+
+ __tt[size - 1].id = nla_get_u32(attr);
+ }
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE)
+ __tt[size - 1].type = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP)
+ __tt[size - 1].temp = nla_get_u32(attr);
+
+ if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST)
+ __tt[size - 1].hyst = nla_get_u32(attr);
+ }
+
+ if (__tt)
+ __tt[size].id = -1;
+
+ tz->trip = __tt;
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz)
+{
+ int id = -1;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+ id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+ if (tz->id != id)
+ return THERMAL_ERROR;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP])
+ tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]);
+
+ return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz)
+{
+ int id = -1;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+ id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+ if (tz->id != id)
+ return THERMAL_ERROR;
+
+ if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) {
+ nla_strlcpy(tz->governor,
+ info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME],
+ THERMAL_NAME_LENGTH);
+ }
+
+ return THERMAL_SUCCESS;
+}
+
+static int handle_netlink(struct nl_cache_ops *unused,
+ struct genl_cmd *cmd,
+ struct genl_info *info, void *arg)
+{
+ int ret;
+
+ switch (cmd->c_id) {
+
+ case THERMAL_GENL_CMD_TZ_GET_ID:
+ ret = parse_tz_get(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_CDEV_GET:
+ ret = parse_cdev_get(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_TEMP:
+ ret = parse_tz_get_temp(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_TRIP:
+ ret = parse_tz_get_trip(info, arg);
+ break;
+
+ case THERMAL_GENL_CMD_TZ_GET_GOV:
+ ret = parse_tz_get_gov(info, arg);
+ break;
+
+ default:
+ return THERMAL_ERROR;
+ }
+
+ return ret;
+}
+
+static struct genl_cmd thermal_cmds[] = {
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_ID,
+ .c_name = (char *)"List thermal zones",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_GOV,
+ .c_name = (char *)"Get governor",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP,
+ .c_name = (char *)"Get thermal zone temperature",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP,
+ .c_name = (char *)"Get thermal zone trip points",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+ {
+ .c_id = THERMAL_GENL_CMD_CDEV_GET,
+ .c_name = (char *)"Get cooling devices",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = THERMAL_GENL_ATTR_MAX,
+ .c_attr_policy = thermal_genl_policy,
+ },
+};
+
+static struct genl_ops thermal_cmd_ops = {
+ .o_name = (char *)"thermal",
+ .o_cmds = thermal_cmds,
+ .o_ncmds = ARRAY_SIZE(thermal_cmds),
+};
+
+static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd,
+ int flags, void *arg)
+{
+ struct nl_msg *msg;
+ void *hdr;
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return THERMAL_ERROR;
+
+ hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id,
+ 0, flags, cmd, THERMAL_GENL_VERSION);
+ if (!hdr)
+ return THERMAL_ERROR;
+
+ if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id))
+ return THERMAL_ERROR;
+
+ if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg))
+ return THERMAL_ERROR;
+
+ nlmsg_free(msg);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz)
+{
+ return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID,
+ NLM_F_DUMP | NLM_F_ACK, tz);
+}
+
+thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc)
+{
+ return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET,
+ NLM_F_DUMP | NLM_F_ACK, tc);
+}
+
+thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP,
+ 0, tz);
+}
+
+thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz);
+}
+
+thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz)
+{
+ return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz);
+}
+
+thermal_error_t thermal_cmd_exit(struct thermal_handler *th)
+{
+ if (genl_unregister_family(&thermal_cmd_ops))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_cmd, th->cb_cmd);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_init(struct thermal_handler *th)
+{
+ int ret;
+ int family;
+
+ if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd))
+ return THERMAL_ERROR;
+
+ ret = genl_register_family(&thermal_cmd_ops);
+ if (ret)
+ return THERMAL_ERROR;
+
+ ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops);
+ if (ret)
+ return THERMAL_ERROR;
+
+ family = genl_ctrl_resolve(th->sk_cmd, "nlctrl");
+ if (family != GENL_ID_CTRL)
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c
new file mode 100644
index 000000000000..a7a55d1a0c4c
--- /dev/null
+++ b/tools/lib/thermal/events.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+/*
+ * Optimization: fill this array to tell which event we do want to pay
+ * attention to. That happens at init time with the ops
+ * structure. Each ops will enable the event and the general handler
+ * will be able to discard the event if there is not ops associated
+ * with it.
+ */
+static int enabled_ops[__THERMAL_GENL_EVENT_MAX];
+
+static int handle_thermal_event(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ struct thermal_handler_param *thp = arg;
+ struct thermal_events_ops *ops = &thp->th->ops->events;
+
+ genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ arg = thp->arg;
+
+ /*
+ * This is an event we don't care of, bail out.
+ */
+ if (!enabled_ops[genlhdr->cmd])
+ return THERMAL_SUCCESS;
+
+ switch (genlhdr->cmd) {
+
+ case THERMAL_GENL_EVENT_TZ_CREATE:
+ return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_DELETE:
+ return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_ENABLE:
+ return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_DISABLE:
+ return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE:
+ return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_ADD:
+ return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_DELETE:
+ return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_UP:
+ return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_TRIP_DOWN:
+ return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_ADD:
+ return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_DELETE:
+ return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg);
+
+ case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE:
+ return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg);
+
+ case THERMAL_GENL_EVENT_TZ_GOV_CHANGE:
+ return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg);
+ default:
+ return -1;
+ }
+}
+
+static void thermal_events_ops_init(struct thermal_events_ops *ops)
+{
+ enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete;
+ enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update;
+ enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change;
+}
+
+thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg)
+{
+ struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+ if (!th)
+ return THERMAL_ERROR;
+
+ if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM,
+ handle_thermal_event, &thp))
+ return THERMAL_ERROR;
+
+ return nl_recvmsgs(th->sk_event, th->cb_event);
+}
+
+int thermal_events_fd(struct thermal_handler *th)
+{
+ if (!th)
+ return -1;
+
+ return nl_socket_get_fd(th->sk_event);
+}
+
+thermal_error_t thermal_events_exit(struct thermal_handler *th)
+{
+ if (nl_unsubscribe_thermal(th->sk_event, th->cb_event,
+ THERMAL_GENL_EVENT_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_event, th->cb_event);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_events_init(struct thermal_handler *th)
+{
+ thermal_events_ops_init(&th->ops->events);
+
+ if (nl_thermal_connect(&th->sk_event, &th->cb_event))
+ return THERMAL_ERROR;
+
+ if (nl_subscribe_thermal(th->sk_event, th->cb_event,
+ THERMAL_GENL_EVENT_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h
new file mode 100644
index 000000000000..1abc560602cf
--- /dev/null
+++ b/tools/lib/thermal/include/thermal.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __LIBTHERMAL_H
+#define __LIBTHERMAL_H
+
+#include <linux/thermal.h>
+
+#ifndef LIBTHERMAL_API
+#define LIBTHERMAL_API __attribute__((visibility("default")))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct thermal_sampling_ops {
+ int (*tz_temp)(int tz_id, int temp, void *arg);
+};
+
+struct thermal_events_ops {
+ int (*tz_create)(const char *name, int tz_id, void *arg);
+ int (*tz_delete)(int tz_id, void *arg);
+ int (*tz_enable)(int tz_id, void *arg);
+ int (*tz_disable)(int tz_id, void *arg);
+ int (*trip_high)(int tz_id, int trip_id, int temp, void *arg);
+ int (*trip_low)(int tz_id, int trip_id, int temp, void *arg);
+ int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+ int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+ int (*trip_delete)(int tz_id, int trip_id, void *arg);
+ int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg);
+ int (*cdev_delete)(int cdev_id, void *arg);
+ int (*cdev_update)(int cdev_id, int cur_state, void *arg);
+ int (*gov_change)(int tz_id, const char *gov_name, void *arg);
+};
+
+struct thermal_ops {
+ struct thermal_sampling_ops sampling;
+ struct thermal_events_ops events;
+};
+
+struct thermal_trip {
+ int id;
+ int type;
+ int temp;
+ int hyst;
+};
+
+struct thermal_zone {
+ int id;
+ int temp;
+ char name[THERMAL_NAME_LENGTH];
+ char governor[THERMAL_NAME_LENGTH];
+ struct thermal_trip *trip;
+};
+
+struct thermal_cdev {
+ int id;
+ char name[THERMAL_NAME_LENGTH];
+ int max_state;
+ int min_state;
+ int cur_state;
+};
+
+typedef enum {
+ THERMAL_ERROR = -1,
+ THERMAL_SUCCESS = 0,
+} thermal_error_t;
+
+struct thermal_handler;
+
+typedef int (*cb_tz_t)(struct thermal_zone *, void *);
+
+typedef int (*cb_tt_t)(struct thermal_trip *, void *);
+
+typedef int (*cb_tc_t)(struct thermal_cdev *, void *);
+
+LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+ const char *name);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th);
+
+LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops);
+
+LIBTHERMAL_API void thermal_exit(struct thermal_handler *th);
+
+/*
+ * Netlink thermal events
+ */
+LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th);
+
+/*
+ * Netlink thermal commands
+ */
+LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th,
+ struct thermal_zone **tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th,
+ struct thermal_cdev **tc);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th,
+ struct thermal_zone *tz);
+
+/*
+ * Netlink thermal samples
+ */
+LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th);
+
+#endif /* __LIBTHERMAL_H */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
new file mode 100644
index 000000000000..d5e77738c7a4
--- /dev/null
+++ b/tools/lib/thermal/libthermal.map
@@ -0,0 +1,25 @@
+LIBTHERMAL_0.0.1 {
+ global:
+ thermal_init;
+ for_each_thermal_zone;
+ for_each_thermal_trip;
+ for_each_thermal_cdev;
+ thermal_zone_find_by_name;
+ thermal_zone_find_by_id;
+ thermal_zone_discover;
+ thermal_init;
+ thermal_events_init;
+ thermal_events_handle;
+ thermal_events_fd;
+ thermal_cmd_init;
+ thermal_cmd_get_tz;
+ thermal_cmd_get_cdev;
+ thermal_cmd_get_trip;
+ thermal_cmd_get_governor;
+ thermal_cmd_get_temp;
+ thermal_sampling_init;
+ thermal_sampling_handle;
+ thermal_sampling_fd;
+local:
+ *;
+};
diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template
new file mode 100644
index 000000000000..ac24d0ab17f5
--- /dev/null
+++ b/tools/lib/thermal/libthermal.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libthermal
+Description: thermal library
+Requires: libnl-3.0 libnl-genl-3.0
+Version: @VERSION@
+Libs: -L${libdir} -lnl-genl-3 -lnl-3
+Cflags: -I${includedir} -I${include}/libnl3
diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c
new file mode 100644
index 000000000000..70577423a9f0
--- /dev/null
+++ b/tools/lib/thermal/sampling.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static int handle_thermal_sample(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ struct thermal_handler_param *thp = arg;
+ struct thermal_handler *th = thp->th;
+
+ genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ switch (genlhdr->cmd) {
+
+ case THERMAL_GENL_SAMPLING_TEMP:
+ return th->ops->sampling.tz_temp(
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+ nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+ default:
+ return THERMAL_ERROR;
+ }
+}
+
+thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg)
+{
+ struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+ if (!th)
+ return THERMAL_ERROR;
+
+ if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM,
+ handle_thermal_sample, &thp))
+ return THERMAL_ERROR;
+
+ return nl_recvmsgs(th->sk_sampling, th->cb_sampling);
+}
+
+int thermal_sampling_fd(struct thermal_handler *th)
+{
+ if (!th)
+ return -1;
+
+ return nl_socket_get_fd(th->sk_sampling);
+}
+
+thermal_error_t thermal_sampling_exit(struct thermal_handler *th)
+{
+ if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling,
+ THERMAL_GENL_SAMPLING_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ nl_thermal_disconnect(th->sk_sampling, th->cb_sampling);
+
+ return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_sampling_init(struct thermal_handler *th)
+{
+ if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling))
+ return THERMAL_ERROR;
+
+ if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling,
+ THERMAL_GENL_SAMPLING_GROUP_NAME))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c
new file mode 100644
index 000000000000..72a76dc205bc
--- /dev/null
+++ b/tools/lib/thermal/thermal.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdio.h>
+#include <thermal.h>
+
+#include "thermal_nl.h"
+
+int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!cdev)
+ return 0;
+
+ for (i = 0; cdev[i].id != -1; i++)
+ ret |= cb(&cdev[i], arg);
+
+ return ret;
+}
+
+int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!tt)
+ return 0;
+
+ for (i = 0; tt[i].id != -1; i++)
+ ret |= cb(&tt[i], arg);
+
+ return ret;
+}
+
+int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg)
+{
+ int i, ret = 0;
+
+ if (!tz)
+ return 0;
+
+ for (i = 0; tz[i].id != -1; i++)
+ ret |= cb(&tz[i], arg);
+
+ return ret;
+}
+
+struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+ const char *name)
+{
+ int i;
+
+ if (!tz || !name)
+ return NULL;
+
+ for (i = 0; tz[i].id != -1; i++) {
+ if (!strcmp(tz[i].name, name))
+ return &tz[i];
+ }
+
+ return NULL;
+}
+
+struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id)
+{
+ int i;
+
+ if (!tz || id < 0)
+ return NULL;
+
+ for (i = 0; tz[i].id != -1; i++) {
+ if (tz[i].id == id)
+ return &tz[i];
+ }
+
+ return NULL;
+}
+
+static int __thermal_zone_discover(struct thermal_zone *tz, void *th)
+{
+ if (thermal_cmd_get_trip(th, tz) < 0)
+ return -1;
+
+ if (thermal_cmd_get_governor(th, tz))
+ return -1;
+
+ return 0;
+}
+
+struct thermal_zone *thermal_zone_discover(struct thermal_handler *th)
+{
+ struct thermal_zone *tz;
+
+ if (thermal_cmd_get_tz(th, &tz) < 0)
+ return NULL;
+
+ if (for_each_thermal_zone(tz, __thermal_zone_discover, th))
+ return NULL;
+
+ return tz;
+}
+
+void thermal_exit(struct thermal_handler *th)
+{
+ thermal_cmd_exit(th);
+ thermal_events_exit(th);
+ thermal_sampling_exit(th);
+
+ free(th);
+}
+
+struct thermal_handler *thermal_init(struct thermal_ops *ops)
+{
+ struct thermal_handler *th;
+
+ th = malloc(sizeof(*th));
+ if (!th)
+ return NULL;
+ th->ops = ops;
+
+ if (thermal_events_init(th))
+ goto out_free;
+
+ if (thermal_sampling_init(th))
+ goto out_free;
+
+ if (thermal_cmd_init(th))
+ goto out_free;
+
+ return th;
+
+out_free:
+ free(th);
+
+ return NULL;
+}
diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c
new file mode 100644
index 000000000000..b05cf9569858
--- /dev/null
+++ b/tools/lib/thermal/thermal_nl.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+struct handler_args {
+ const char *group;
+ int id;
+};
+
+static __thread int err;
+static __thread int done;
+
+static int nl_seq_check_handler(struct nl_msg *msg, void *arg)
+{
+ return NL_OK;
+}
+
+static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err,
+ void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = nl_err->error;
+
+ return NL_STOP;
+}
+
+static int nl_finish_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = 1;
+
+ return NL_OK;
+}
+
+static int nl_ack_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+
+ if (ret)
+ *ret = 1;
+
+ return NL_OK;
+}
+
+int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg,
+ int (*rx_handler)(struct nl_msg *, void *), void *data)
+{
+ if (!rx_handler)
+ return THERMAL_ERROR;
+
+ err = nl_send_auto_complete(sock, msg);
+ if (err < 0)
+ return err;
+
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data);
+
+ err = done = 0;
+
+ while (err == 0 && done == 0)
+ nl_recvmsgs(sock, cb);
+
+ return err;
+}
+
+static int nl_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return THERMAL_ERROR;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group,
+ nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+
+ break;
+ }
+
+ return THERMAL_SUCCESS;
+}
+
+static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb,
+ const char *family, const char *group)
+{
+ struct nl_msg *msg;
+ int ret = 0, ctrlid;
+ struct handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return THERMAL_ERROR;
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp);
+ if (ret)
+ goto nla_put_failure;
+
+ ret = grp.id;
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb)
+{
+ struct nl_cb *cb;
+ struct nl_sock *sock;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb)
+ return THERMAL_ERROR;
+
+ sock = nl_socket_alloc();
+ if (!sock)
+ goto out_cb_free;
+
+ if (genl_connect(sock))
+ goto out_socket_free;
+
+ if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) ||
+ nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) ||
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) ||
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done))
+ return THERMAL_ERROR;
+
+ *nl_sock = sock;
+ *nl_cb = cb;
+
+ return THERMAL_SUCCESS;
+
+out_socket_free:
+ nl_socket_free(sock);
+out_cb_free:
+ nl_cb_put(cb);
+ return THERMAL_ERROR;
+}
+
+void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb)
+{
+ nl_close(nl_sock);
+ nl_socket_free(nl_sock);
+ nl_cb_put(nl_cb);
+}
+
+int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group)
+{
+ int mcid;
+
+ mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+ group);
+ if (mcid < 0)
+ return THERMAL_ERROR;
+
+ if (nl_socket_drop_membership(nl_sock, mcid))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
+
+int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group)
+{
+ int mcid;
+
+ mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+ group);
+ if (mcid < 0)
+ return THERMAL_ERROR;
+
+ if (nl_socket_add_membership(nl_sock, mcid))
+ return THERMAL_ERROR;
+
+ return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h
new file mode 100644
index 000000000000..ddf635642f07
--- /dev/null
+++ b/tools/lib/thermal/thermal_nl.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_H
+#define __THERMAL_H
+
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/mngt.h>
+#include <netlink/genl/ctrl.h>
+
+struct thermal_handler {
+ int done;
+ int error;
+ struct thermal_ops *ops;
+ struct nl_msg *msg;
+ struct nl_sock *sk_event;
+ struct nl_sock *sk_sampling;
+ struct nl_sock *sk_cmd;
+ struct nl_cb *cb_cmd;
+ struct nl_cb *cb_event;
+ struct nl_cb *cb_sampling;
+};
+
+struct thermal_handler_param {
+ struct thermal_handler *th;
+ void *arg;
+};
+
+/*
+ * Low level netlink
+ */
+extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group);
+
+extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+ const char *group);
+
+extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb);
+
+extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb);
+
+extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg,
+ int (*rx_handler)(struct nl_msg *, void *),
+ void *data);
+
+#endif /* __THERMAL_H */
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
deleted file mode 100644
index 7123c70b9ebc..000000000000
--- a/tools/lib/traceevent/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-TRACEEVENT-CFLAGS
-libtraceevent-dynamic-list
-libtraceevent.so.*
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
deleted file mode 100644
index f9a5d79578f5..000000000000
--- a/tools/lib/traceevent/Build
+++ /dev/null
@@ -1,8 +0,0 @@
-libtraceevent-y += event-parse.o
-libtraceevent-y += event-plugin.o
-libtraceevent-y += trace-seq.o
-libtraceevent-y += parse-filter.o
-libtraceevent-y += parse-utils.o
-libtraceevent-y += kbuffer-parse.o
-libtraceevent-y += tep_strerror.o
-libtraceevent-y += event-parse-api.o
diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile
deleted file mode 100644
index aa72ab96c3c1..000000000000
--- a/tools/lib/traceevent/Documentation/Makefile
+++ /dev/null
@@ -1,207 +0,0 @@
-include ../../../scripts/Makefile.include
-include ../../../scripts/utilities.mak
-
-# This Makefile and manpage XSL files were taken from tools/perf/Documentation
-# and modified for libtraceevent.
-
-MAN3_TXT= \
- $(wildcard libtraceevent-*.txt) \
- libtraceevent.txt
-
-MAN_TXT = $(MAN3_TXT)
-_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
-_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
-_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT))
-
-MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
-MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
-DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3))
-
-# Make the path relative to DESTDIR, not prefix
-ifndef DESTDIR
-prefix?=$(HOME)
-endif
-bindir?=$(prefix)/bin
-htmldir?=$(prefix)/share/doc/libtraceevent-doc
-pdfdir?=$(prefix)/share/doc/libtraceevent-doc
-mandir?=$(prefix)/share/man
-man3dir=$(mandir)/man3
-
-ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
-ASCIIDOC_HTML = xhtml11
-MANPAGE_XSL = manpage-normal.xsl
-XMLTO_EXTRA =
-INSTALL?=install
-RM ?= rm -f
-
-ifdef USE_ASCIIDOCTOR
-ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
-ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
-ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual"
-ASCIIDOC_HTML = xhtml5
-endif
-
-XMLTO=xmlto
-
-_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
-ifeq ($(_tmp_tool_path),)
- missing_tools = $(ASCIIDOC)
-endif
-
-ifndef USE_ASCIIDOCTOR
-_tmp_tool_path := $(call get-executable,$(XMLTO))
-ifeq ($(_tmp_tool_path),)
- missing_tools += $(XMLTO)
-endif
-endif
-
-#
-# For asciidoc ...
-# -7.1.2, no extra settings are needed.
-# 8.0-, set ASCIIDOC8.
-#
-
-#
-# For docbook-xsl ...
-# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
-# 1.69.0, no extra settings are needed?
-# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
-# 1.71.1, no extra settings are needed?
-# 1.72.0, set DOCBOOK_XSL_172.
-# 1.73.0-, set ASCIIDOC_NO_ROFF
-#
-
-#
-# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
-# of 'the ".ft C" problem' in your generated manpages, and you
-# instead ended up with weird characters around callouts, try
-# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
-#
-
-ifdef ASCIIDOC8
-ASCIIDOC_EXTRA += -a asciidoc7compatible
-endif
-ifdef DOCBOOK_XSL_172
-ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
-MANPAGE_XSL = manpage-1.72.xsl
-else
- ifdef ASCIIDOC_NO_ROFF
- # docbook-xsl after 1.72 needs the regular XSL, but will not
- # pass-thru raw roff codes from asciidoc.conf, so turn them off.
- ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
- endif
-endif
-ifdef MAN_BOLD_LITERAL
-XMLTO_EXTRA += -m manpage-bold-literal.xsl
-endif
-ifdef DOCBOOK_SUPPRESS_SP
-XMLTO_EXTRA += -m manpage-suppress-sp.xsl
-endif
-
-SHELL_PATH ?= $(SHELL)
-# Shell quote;
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-export DESTDIR DESTDIR_SQ
-
-#
-# Please note that there is a minor bug in asciidoc.
-# The version after 6.0.3 _will_ include the patch found here:
-# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
-#
-# Until that version is released you may have to apply the patch
-# yourself - yes, all 6 characters of it!
-#
-QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
-QUIET_SUBDIR1 =
-
-ifneq ($(findstring $(MAKEFLAGS),w),w)
-PRINT_DIR = --no-print-directory
-else # "make -w"
-NO_SUBDIR = :
-endif
-
-ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifneq ($(V),1)
- QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
- QUIET_XMLTO = @echo ' XMLTO '$@;
- QUIET_SUBDIR0 = +@subdir=
- QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
- echo ' SUBDIR ' $$subdir; \
- $(MAKE) $(PRINT_DIR) -C $$subdir
- export V
-endif
-endif
-
-all: html man
-
-man: man3
-man3: $(DOC_MAN3)
-
-html: $(MAN_HTML)
-
-$(MAN_HTML) $(DOC_MAN3): asciidoc.conf
-
-install: install-man
-
-check-man-tools:
-ifdef missing_tools
- $(error "You need to install $(missing_tools) for man pages")
-endif
-
-do-install-man: man
- $(call QUIET_INSTALL, Documentation-man) \
- $(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
- $(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir);
-
-install-man: check-man-tools man do-install-man
-
-uninstall: uninstall-man
-
-uninstall-man:
- $(call QUIET_UNINST, Documentation-man) \
- $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3))
-
-
-ifdef missing_tools
- DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
-else
- DO_INSTALL_MAN = do-install-man
-endif
-
-CLEAN_FILES = \
- $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
- $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
- $(DOC_MAN3) *.3
-
-clean:
- $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
-
-ifdef USE_ASCIIDOCTOR
-$(OUTPUT)%.3 : $(OUTPUT)%.txt
- $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- $(ASCIIDOC) -b manpage -d manpage \
- $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
- mv $@+ $@
-endif
-
-$(OUTPUT)%.3 : $(OUTPUT)%.xml
- $(QUIET_XMLTO)$(RM) $@ && \
- $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
-
-$(OUTPUT)%.xml : %.txt
- $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- $(ASCIIDOC) -b docbook -d manpage \
- $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
- mv $@+ $@
-
-$(MAN_HTML): $(OUTPUT)%.html : %.txt
- $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
- $(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
- mv $@+ $@
diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf
deleted file mode 100644
index 07595717f06e..000000000000
--- a/tools/lib/traceevent/Documentation/asciidoc.conf
+++ /dev/null
@@ -1,120 +0,0 @@
-## linktep: macro
-#
-# Usage: linktep:command[manpage-section]
-#
-# Note, {0} is the manpage section, while {target} is the command.
-#
-# Show TEP link as: <command>(<section>); if section is defined, else just show
-# the command.
-
-[macros]
-(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
-
-[attributes]
-asterisk=&#42;
-plus=&#43;
-caret=&#94;
-startsb=&#91;
-endsb=&#93;
-tilde=&#126;
-
-ifdef::backend-docbook[]
-[linktep-inlinemacro]
-{0%{target}}
-{0#<citerefentry>}
-{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
-{0#</citerefentry>}
-endif::backend-docbook[]
-
-ifdef::backend-docbook[]
-ifndef::tep-asciidoc-no-roff[]
-# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
-# v1.72 breaks with this because it replaces dots not in roff requests.
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-ifdef::doctype-manpage[]
-&#10;.ft C&#10;
-endif::doctype-manpage[]
-|
-ifdef::doctype-manpage[]
-&#10;.ft&#10;
-endif::doctype-manpage[]
-</literallayout>
-{title#}</example>
-endif::tep-asciidoc-no-roff[]
-
-ifdef::tep-asciidoc-no-roff[]
-ifdef::doctype-manpage[]
-# The following two small workarounds insert a simple paragraph after screen
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-|
-</literallayout><simpara></simpara>
-{title#}</example>
-
-[verseblock]
-<formalpara{id? id="{id}"}><title>{title}</title><para>
-{title%}<literallayout{id? id="{id}"}>
-{title#}<literallayout>
-|
-</literallayout>
-{title#}</para></formalpara>
-{title%}<simpara></simpara>
-endif::doctype-manpage[]
-endif::tep-asciidoc-no-roff[]
-endif::backend-docbook[]
-
-ifdef::doctype-manpage[]
-ifdef::backend-docbook[]
-[header]
-template::[header-declarations]
-<refentry>
-<refmeta>
-<refentrytitle>{mantitle}</refentrytitle>
-<manvolnum>{manvolnum}</manvolnum>
-<refmiscinfo class="source">libtraceevent</refmiscinfo>
-<refmiscinfo class="version">{libtraceevent_version}</refmiscinfo>
-<refmiscinfo class="manual">libtraceevent Manual</refmiscinfo>
-</refmeta>
-<refnamediv>
- <refname>{manname1}</refname>
- <refname>{manname2}</refname>
- <refname>{manname3}</refname>
- <refname>{manname4}</refname>
- <refname>{manname5}</refname>
- <refname>{manname6}</refname>
- <refname>{manname7}</refname>
- <refname>{manname8}</refname>
- <refname>{manname9}</refname>
- <refname>{manname10}</refname>
- <refname>{manname11}</refname>
- <refname>{manname12}</refname>
- <refname>{manname13}</refname>
- <refname>{manname14}</refname>
- <refname>{manname15}</refname>
- <refname>{manname16}</refname>
- <refname>{manname17}</refname>
- <refname>{manname18}</refname>
- <refname>{manname19}</refname>
- <refname>{manname20}</refname>
- <refname>{manname21}</refname>
- <refname>{manname22}</refname>
- <refname>{manname23}</refname>
- <refname>{manname24}</refname>
- <refname>{manname25}</refname>
- <refname>{manname26}</refname>
- <refname>{manname27}</refname>
- <refname>{manname28}</refname>
- <refname>{manname29}</refname>
- <refname>{manname30}</refname>
- <refpurpose>{manpurpose}</refpurpose>
-</refnamediv>
-endif::backend-docbook[]
-endif::doctype-manpage[]
-
-ifdef::backend-xhtml11[]
-[linktep-inlinemacro]
-<a href="{target}.html">{target}{0?({0})}</a>
-endif::backend-xhtml11[]
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
deleted file mode 100644
index bec552001f8e..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_comm, tep_override_comm, tep_pid_is_registered,
-tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid -
-Manage pid to process name mappings.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
-const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_);
-struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
-int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_);
---
-
-DESCRIPTION
------------
-These functions can be used to handle the mapping between pid and process name.
-The library builds a cache of these mappings, which is used to display the name
-of the process, instead of its pid. This information can be retrieved from
-tracefs/saved_cmdlines file.
-
-The _tep_register_comm()_ function registers a _pid_ / process name mapping.
-If a command with the same _pid_ is already registered, an error is returned.
-The _pid_ argument is the process ID, the _comm_ argument is the process name,
-_tep_ is the event context. The _comm_ is duplicated internally.
-
-The _tep_override_comm()_ function registers a _pid_ / process name mapping.
-If a process with the same pid is already registered, the process name string is
-udapted with the new one. The _pid_ argument is the process ID, the _comm_
-argument is the process name, _tep_ is the event context. The _comm_ is
-duplicated internally.
-
-The _tep_is_pid_registered()_ function checks if a pid has a process name
-mapping registered. The _pid_ argument is the process ID, _tep_ is the event
-context.
-
-The _tep_data_comm_from_pid()_ function returns the process name for a given
-pid. The _pid_ argument is the process ID, _tep_ is the event context.
-The returned string should not be freed, but will be freed when the _tep_
-handler is closed.
-
-The _tep_data_pid_from_comm()_ function returns a pid for a given process name.
-The _comm_ argument is the process name, _tep_ is the event context.
-The argument _next_ is the cmdline structure to search for the next pid.
-As there may be more than one pid for a given process, the result of this call
-can be passed back into a recurring call in the _next_ parameter, to search for
-the next pid. If _next_ is NULL, it will return the first pid associated with
-the _comm_. The function performs a linear search, so it may be slow.
-
-The _tep_cmdline_pid()_ function returns the pid associated with a given
-_cmdline_. The _tep_ argument is the event context.
-
-RETURN VALUE
-------------
-_tep_register_comm()_ function returns 0 on success. In case of an error -1 is
-returned and errno is set to indicate the cause of the problem: ENOMEM, if there
-is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this
-_pid_ is already registered.
-
-_tep_override_comm()_ function returns 0 on success. In case of an error -1 is
-returned and errno is set to indicate the cause of the problem: ENOMEM, if there
-is not enough memory to duplicate the _comm_.
-
-_tep_is_pid_registered()_ function returns true if the _pid_ has a process name
-mapped to it, false otherwise.
-
-_tep_data_comm_from_pid()_ function returns the process name as string, or the
-string "<...>" if there is no mapping for the given pid.
-
-_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that
-holds a pid for a given process, or NULL if none is found. This result can be
-passed back into a recurring call as the _next_ parameter of the function.
-
-_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_
- is NULL, then -1 is returned.
-
-EXAMPLE
--------
-The following example registers pid for command "ls", in context of event _tep_
-and performs various searches for pid / process name mappings:
-[source,c]
---
-#include <event-parse.h>
-...
-int ret;
-int ls_pid = 1021;
-struct tep_handle *tep = tep_alloc();
-...
- ret = tep_register_comm(tep, "ls", ls_pid);
- if (ret != 0 && errno == EEXIST)
- ret = tep_override_comm(tep, "ls", ls_pid);
- if (ret != 0) {
- /* Failed to register pid / command mapping */
- }
-...
- if (tep_is_pid_registered(tep, ls_pid) == 0) {
- /* Command mapping for ls_pid is not registered */
- }
-...
- const char *comm = tep_data_comm_from_pid(tep, ls_pid);
- if (comm) {
- /* Found process name for ls_pid */
- }
-...
- int pid;
- struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL);
- while (cmd) {
- pid = tep_cmdline_pid(tep, cmd);
- /* Found pid for process "ls" */
- cmd = tep_data_pid_from_comm(tep, "ls", cmd);
- }
---
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
deleted file mode 100644
index 5ad70e43b752..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing
-buffer representing it. Note, the buffer may be empty.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
-void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
---
-
-DESCRIPTION
------------
-The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing
-buffer representing it. The _tep_ argument is trace event parser context.
-
-The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing
-buffer representing it. The _tep_ argument is trace event parser context.
-The _cpu_ argument is the number of CPUs with tracing data.
-
-RETURN VALUE
-------------
-The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing
-data recorded.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
- tep_set_cpus(tep, 5);
-...
- printf("We have tracing data for %d CPUs", tep_get_cpus(tep));
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
deleted file mode 100644
index e64851b6e189..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_read_number - Reads a number from raw data.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
---
-
-DESCRIPTION
------------
-The _tep_read_number()_ function reads an integer from raw data, taking into
-account the endianness of the raw data and the current host. The _tep_ argument
-is the trace event parser context. The _ptr_ is a pointer to the raw data, where
-the integer is, and the _size_ is the size of the integer.
-
-RETURN VALUE
-------------
-The _tep_read_number()_ function returns the integer in the byte order of
-the current host. In case of an error, 0 is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void process_record(struct tep_record *record)
-{
- int offset = 24;
- int data = tep_read_number(tep, record->data + offset, 4);
-
- /* Read the 4 bytes at the offset 24 of data as an integer */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
deleted file mode 100644
index 7bc062c9f76f..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_event,tep_find_event_by_name,tep_find_event_by_record -
-Find events by given key.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
-struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
-struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
---
-
-DESCRIPTION
------------
-This set of functions can be used to search for an event, based on a given
-criteria. All functions require a pointer to a _tep_, trace event parser
-context.
-
-The _tep_find_event()_ function searches for an event by given event _id_. The
-event ID is assigned dynamically and can be viewed in event's format file,
-"ID" field.
-
-The tep_find_event_by_name()_ function searches for an event by given
-event _name_, under the system _sys_. If the _sys_ is NULL (not specified),
-the first event with _name_ is returned.
-
-The tep_find_event_by_record()_ function searches for an event from a given
-_record_.
-
-RETURN VALUE
-------------
-All these functions return a pointer to the found event, or NULL if there is no
-such event.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event;
-
-event = tep_find_event(tep, 1857);
-if (event == NULL) {
- /* There is no event with ID 1857 */
-}
-
-event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-if (event == NULL) {
- /* There is no kvm_exit event, from kvm system */
-}
-
-void event_from_record(struct tep_record *record)
-{
- struct tep_event *event = tep_find_event_by_record(tep, record);
- if (event == NULL) {
- /* There is no event from given record */
- }
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
deleted file mode 100644
index 6525092fc417..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_event, tep_get_first_event, tep_get_events_count - Access events.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
-struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
-int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
---
-
-DESCRIPTION
------------
-The _tep_get_event()_ function returns a pointer to event at the given _index_.
-The _tep_ argument is trace event parser context, the _index_ is the index of
-the requested event.
-
-The _tep_get_first_event()_ function returns a pointer to the first event.
-As events are stored in an array, this function returns the pointer to the
-beginning of the array. The _tep_ argument is trace event parser context.
-
-The _tep_get_events_count()_ function returns the number of the events
-in the array. The _tep_ argument is trace event parser context.
-
-RETURN VALUE
-------------
-The _tep_get_event()_ returns a pointer to the event located at _index_.
-NULL is returned in case of error, in case there are no events or _index_ is
-out of range.
-
-The _tep_get_first_event()_ returns a pointer to the first event. NULL is
-returned in case of error, or in case there are no events.
-
-The _tep_get_events_count()_ returns the number of the events. 0 is
-returned in case of error, or in case there are no events.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i,count = tep_get_events_count(tep);
-struct tep_event *event, *events = tep_get_first_event(tep);
-
-if (events == NULL) {
- /* There are no events */
-} else {
- for (i = 0; i < count; i++) {
- event = (events+i);
- /* process events[i] */
- }
-
- /* Get the last event */
- event = tep_get_event(tep, count-1);
-}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
deleted file mode 100644
index fba350e5a4cb..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_list_events, tep_list_events_copy -
-Get list of events, sorted by given criteria.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_event_sort_type* {
- _TEP_EVENT_SORT_ID_,
- _TEP_EVENT_SORT_NAME_,
- _TEP_EVENT_SORT_SYSTEM_,
-};
-
-struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
---
-
-DESCRIPTION
------------
-The _tep_list_events()_ function returns an array of pointers to the events,
-sorted by the _sort_type_ criteria. The last element of the array is NULL.
-The returned memory must not be freed, it is managed by the library.
-The function is not thread safe. The _tep_ argument is trace event parser
-context. The _sort_type_ argument is the required sort criteria:
-[verse]
---
- _TEP_EVENT_SORT_ID_ - sort by the event ID.
- _TEP_EVENT_SORT_NAME_ - sort by the event (name, system, id) triplet.
- _TEP_EVENT_SORT_SYSTEM_ - sort by the event (system, name, id) triplet.
---
-
-The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_.
-It has the same behavior, but the returned array is allocated internally and
-must be freed by the caller. Note that the content of the array must not be
-freed (see the EXAMPLE below).
-
-RETURN VALUE
-------------
-The _tep_list_events()_ function returns an array of pointers to events.
-In case of an error, NULL is returned. The returned array must not be freed,
-it is managed by the library.
-
-The _tep_list_events_copy()_ function returns an array of pointers to events.
-In case of an error, NULL is returned. The returned array must be freed by
-the caller.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i;
-struct tep_event_format **events;
-
-i=0;
-events = tep_list_events(tep, TEP_EVENT_SORT_ID);
-if (events == NULL) {
- /* Failed to get the events, sorted by ID */
-} else {
- while(events[i]) {
- /* walk through the list of the events, sorted by ID */
- i++;
- }
-}
-
-i=0;
-events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME);
-if (events == NULL) {
- /* Failed to get the events, sorted by name */
-} else {
- while(events[i]) {
- /* walk through the list of the events, sorted by name */
- i++;
- }
- free(events);
-}
-
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
deleted file mode 100644
index 2c6a61811118..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_print_event - Writes event information into a trace sequence.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._)
---
-
-DESCRIPTION
------------
-
-The _tep_print_event()_ function parses the event information of the given
-_record_ and writes it into the trace sequence _s_, according to the format
-string _fmt_. The desired information is specified after the format string.
-The _fmt_ is printf-like format string, following arguments are supported:
-[verse]
---
- TEP_PRINT_PID, "%d" - PID of the event.
- TEP_PRINT_CPU, "%d" - Event CPU.
- TEP_PRINT_COMM, "%s" - Event command string.
- TEP_PRINT_NAME, "%s" - Event name.
- TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more
- fields - interrupt state, scheduling state,
- current context, and preemption count.
- Field 1 is the interrupt enabled state:
- d : Interrupts are disabled
- . : Interrupts are enabled
- X : The architecture does not support this
- information
- Field 2 is the "need resched" state.
- N : The task is set to call the scheduler when
- possible, as another higher priority task
- may need to be scheduled in.
- . : The task is not set to call the scheduler.
- Field 3 is the context state.
- . : Normal context
- s : Soft interrupt context
- h : Hard interrupt context
- H : Hard interrupt context which triggered
- during soft interrupt context.
- z : NMI context
- Z : NMI context which triggered during hard
- interrupt context
- Field 4 is the preemption count.
- . : The preempt count is zero.
- On preemptible kernels (where the task can be scheduled
- out in arbitrary locations while in kernel context), the
- preempt count, when non zero, will prevent the kernel
- from scheduling out the current task. The preempt count
- number is displayed when it is not zero.
- Depending on the kernel, it may show other fields
- (lock depth, or migration disabled, which are unique to
- specialized kernels).
- TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be
- specified as part of this format string:
- "%precision.divisord". Example:
- "%3.1000d" - divide the time by 1000 and print the first
- 3 digits before the dot. Thus, the time stamp
- "123456000" will be printed as "123.456"
- TEP_PRINT_INFO, "%s" - event information.
- TEP_PRINT_INFO_RAW, "%s" - event information, in raw format.
-
---
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-struct tep_handle *tep = tep_alloc();
-...
-void print_my_event(struct tep_record *record)
-{
- trace_seq_reset(&seq);
- tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s",
- TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU,
- TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME,
- TEP_PRINT_INFO);
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences related APIs.
- Trace sequences are used to allow a function to call several other functions
- to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
deleted file mode 100644
index 0896af5b9eff..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_common_field, tep_find_field, tep_find_any_field -
-Search for a field in an event.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
-struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
-struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
---
-
-DESCRIPTION
------------
-These functions search for a field with given name in an event. The field
-returned can be used to find the field content from within a data record.
-
-The _tep_find_common_field()_ function searches for a common field with _name_
-in the _event_.
-
-The _tep_find_field()_ function searches for an event specific field with
-_name_ in the _event_.
-
-The _tep_find_any_field()_ function searches for any field with _name_ in the
-_event_.
-
-RETURN VALUE
-------------
-The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_
-functions return a pointer to the found field, or NULL in case there is no field
-with the requested name.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-void get_htimer_info(struct tep_handle *tep, struct tep_record *record)
-{
- struct tep_format_field *field;
- struct tep_event *event;
- long long softexpires;
- int mode;
- int pid;
-
- event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-
- field = tep_find_common_field(event, "common_pid");
- if (field == NULL) {
- /* Cannot find "common_pid" field in the event */
- } else {
- /* Get pid from the data record */
- pid = tep_read_number(tep, record->data + field->offset,
- field->size);
- }
-
- field = tep_find_field(event, "softexpires");
- if (field == NULL) {
- /* Cannot find "softexpires" event specific field in the event */
- } else {
- /* Get softexpires parameter from the data record */
- softexpires = tep_read_number(tep, record->data + field->offset,
- field->size);
- }
-
- field = tep_find_any_field(event, "mode");
- if (field == NULL) {
- /* Cannot find "mode" field in the event */
- } else
- {
- /* Get mode parameter from the data record */
- mode = tep_read_number(tep, record->data + field->offset,
- field->size);
- }
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
deleted file mode 100644
index 6324f0d48aeb..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val,
-tep_get_field_raw - Get value of a field.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
---
-
-DESCRIPTION
------------
-These functions can be used to find a field and retrieve its value.
-
-The _tep_get_any_field_val()_ function searches in the _record_ for a field
-with _name_, part of the _event_. If the field is found, its value is stored in
-_val_. If there is an error and _err_ is not zero, then an error string is
-written into _s_.
-
-The _tep_get_common_field_val()_ function does the same as
-_tep_get_any_field_val()_, but searches only in the common fields. This works
-for any event as all events include the common fields.
-
-The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_,
-but searches only in the event specific fields.
-
-The _tep_get_field_raw()_ function searches in the _record_ for a field with
-_name_, part of the _event_. If the field is found, a pointer to where the field
-exists in the record's raw data is returned. The size of the data is stored in
-_len_. If there is an error and _err_ is not zero, then an error string is
-written into _s_.
-
-RETURN VALUE
-------------
-The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and
-_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error.
-
-The _tep_get_field_raw()_ function returns a pointer to field's raw data, and
-places the length of this data in _len_. In case of an error NULL is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-...
-void process_record(struct tep_record *record)
-{
- int len;
- char *comm;
- struct tep_event_format *event;
- unsigned long long val;
-
- event = tep_find_event_by_record(pevent, record);
- if (event != NULL) {
- if (tep_get_common_field_val(NULL, event, "common_type",
- record, &val, 0) == 0) {
- /* Got the value of common type field */
- }
- if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) {
- /* Got the value of pid specific field */
- }
- comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0);
- if (comm != NULL) {
- /* Got a pointer to the comm event specific field */
- }
- }
-}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences
- related APIs. Trace sequences are used to allow a function to call
- several other functions to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
deleted file mode 100644
index 9a9df98ac44d..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field -
-Print the field content.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
-void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
-int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
-int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
---
-
-DESCRIPTION
------------
-These functions print recorded field's data, according to the field's type.
-
-The _tep_print_field()_ function extracts from the recorded raw _data_ value of
-the _field_ and prints it into _s_, according to the field type.
-
-The _tep_print_fields()_ prints each field name followed by the record's field
-value according to the field's type:
-[verse]
---
-"field1_name=field1_value field2_name=field2_value ..."
---
-It iterates all fields of the _event_, and calls _tep_print_field()_ for each of
-them.
-
-The _tep_print_num_field()_ function prints a numeric field with given format
-string. A search is performed in the _event_ for a field with _name_. If such
-field is found, its value is extracted from the _record_ and is printed in the
-_s_, according to the given format string _fmt_. If the argument _err_ is
-non-zero, and an error occures - it is printed in the _s_.
-
-The _tep_print_func_field()_ function prints a function field with given format
-string. A search is performed in the _event_ for a field with _name_. If such
-field is found, its value is extracted from the _record_. The value is assumed
-to be a function address, and a search is perform to find the name of this
-function. The function name (if found) and its address are printed in the _s_,
-according to the given format string _fmt_. If the argument _err_ is non-zero,
-and an error occures - it is printed in _s_.
-
-RETURN VALUE
-------------
-The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1
-on success, -1 in case of an error or 0 if the print buffer _s_ is full.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-...
-void process_record(struct tep_record *record)
-{
- struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
-
- trace_seq_reset(&seq);
-
- /* Print the value of "common_pid" */
- tep_print_field(&seq, record->data, field_pid);
-
- /* Print all fields of the "hrtimer_start" event */
- tep_print_fields(&seq, record->data, record->size, event);
-
- /* Print the value of "expires" field with custom format string */
- tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0);
-
- /* Print the address and the name of "function" field with custom format string */
- tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0);
- }
- ...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences related APIs.
- Trace sequences are used to allow a function to call several other functions
- to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
deleted file mode 100644
index 64e9e25d3fd9..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_read_number_field - Reads a number from raw data.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
---
-
-DESCRIPTION
------------
-The _tep_read_number_field()_ function reads the value of the _field_ from the
-raw _data_ and stores it in the _value_. The function sets the _value_ according
-to the endianness of the raw data and the current machine and stores it in
-_value_.
-
-RETURN VALUE
-------------
-The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in
-case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-...
-void process_record(struct tep_record *record)
-{
- unsigned long long pid;
- struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
-
- if (tep_read_number_field(field_pid, record->data, &pid) != 0) {
- /* Failed to get "common_pid" value */
- }
-}
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
deleted file mode 100644
index 1ccb531d5114..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_event_common_fields, tep_event_fields - Get a list of fields for an event.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
-struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
---
-
-DESCRIPTION
------------
-The _tep_event_common_fields()_ function returns an array of pointers to common
-fields for the _event_. The array is allocated in the function and must be freed
-by free(). The last element of the array is NULL.
-
-The _tep_event_fields()_ function returns an array of pointers to event specific
-fields for the _event_. The array is allocated in the function and must be freed
-by free(). The last element of the array is NULL.
-
-RETURN VALUE
-------------
-Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return
-an array of pointers to tep_format_field structures in case of success, or
-NULL in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i;
-struct tep_format_field **fields;
-struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-if (event != NULL) {
- fields = tep_event_common_fields(event);
- if (fields != NULL) {
- i = 0;
- while (fields[i]) {
- /*
- walk through the list of the common fields
- of the kvm_exit event
- */
- i++;
- }
- free(fields);
- }
- fields = tep_event_fields(event);
- if (fields != NULL) {
- i = 0;
- while (fields[i]) {
- /*
- walk through the list of the event specific
- fields of the kvm_exit event
- */
- i++;
- }
- free(fields);
- }
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
deleted file mode 100644
index f401ad311047..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the
-raw data being accessed by the tep handler.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_endian* {
- TEP_LITTLE_ENDIAN = 0,
- TEP_BIG_ENDIAN
-};
-
-bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
-void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
-
---
-DESCRIPTION
------------
-The _tep_is_file_bigendian()_ function gets the endianness of the raw data,
-being accessed by the tep handler. The _tep_ argument is trace event parser
-context.
-
-The _tep_set_file_bigendian()_ function sets the endianness of raw data being
-accessed by the tep handler. The _tep_ argument is trace event parser context.
-[verse]
---
-The _endian_ argument is the endianness:
- _TEP_LITTLE_ENDIAN_ - the raw data is in little endian format,
- _TEP_BIG_ENDIAN_ - the raw data is in big endian format.
---
-RETURN VALUE
-------------
-The _tep_is_file_bigendian()_ function returns true if the data is in bigendian
-format, false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
- tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN);
-...
- if (tep_is_file_bigendian(tep)) {
- /* The raw data is in big endian */
- } else {
- /* The raw data is in little endian */
- }
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
deleted file mode 100644
index 4a9962d8cb59..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
+++ /dev/null
@@ -1,209 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string,
-tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered,
-tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str -
-Event filter related APIs.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
-void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
-void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
-enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
-int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
-int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
-int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
-char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
---
-
-DESCRIPTION
------------
-Filters can be attached to traced events. They can be used to filter out various
-events when outputting them. Each event can be filtered based on its parameters,
-described in the event's format file. This set of functions can be used to
-create, delete, modify and attach event filters.
-
-The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument
-is the trace event parser context.
-
-The _tep_filter_free()_ function frees an event filter and all resources that it
-had used.
-
-The _tep_filter_reset()_ function removes all rules from an event filter and
-resets it.
-
-The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The
-_filter_str_ argument is the filter string, that contains the rule.
-
-The _tep_event_filtered()_ function checks if the event with _event_id_ has
-_filter_.
-
-The _tep_filter_remove_event()_ function removes a _filter_ for an event with
-_event_id_.
-
-The _tep_filter_match()_ function tests if a _record_ matches given _filter_.
-
-The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter.
-
-The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_.
-
-The _tep_filter_make_string()_ function constructs a string, displaying
-the _filter_ contents for given _event_id_.
-
-The _tep_filter_strerror()_ function copies the _filter_ error buffer into the
-given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_
-is copied a string, describing the error _err_.
-
-RETURN VALUE
-------------
-The _tep_filter_alloc()_ function returns a pointer to the newly created event
-filter, or NULL in case of an error.
-
-The _tep_filter_add_filter_str()_ function returns 0 if the rule was
-successfully added or a negative error code. Use _tep_filter_strerror()_ to see
-actual error message in case of an error.
-
-The _tep_event_filtered()_ function returns 1 if the filter is found for given
-event, or 0 otherwise.
-
-The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or
-0 if the event was not found.
-
-The _tep_filter_match()_ function returns _tep_errno_, according to the result:
-[verse]
---
-_pass:[TEP_ERRNO__FILTER_MATCH]_ - filter found for event, the record matches.
-_pass:[TEP_ERRNO__FILTER_MISS]_ - filter found for event, the record does not match.
-_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_ - no filter found for record's event.
-_pass:[TEP_ERRNO__NO_FILTER]_ - no rules in the filter.
---
-or any other _tep_errno_, if an error occurred during the test.
-
-The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules
- were copied.
-
-The _tep_filter_compare()_ function returns 1 if the two filters hold the same
-content, or 0 if they do not.
-
-The _tep_filter_make_string()_ function returns a string, which must be freed
-with free(), or NULL in case of an error.
-
-The _tep_filter_strerror()_ function returns 0 if message was filled
-successfully, or -1 in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char errstr[200];
-int ret;
-
-struct tep_event_filter *filter = tep_filter_alloc(tep);
-struct tep_event_filter *filter1 = tep_filter_alloc(tep);
-ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1");
-if(ret < 0) {
- tep_filter_strerror(filter, ret, errstr, sizeof(errstr));
- /* Failed to add a new rule to the filter, the error string is in errstr */
-}
-if (tep_filter_copy(filter1, filter) != 0) {
- /* Failed to copy filter in filter1 */
-}
-...
-if (tep_filter_compare(filter, filter1) != 1) {
- /* Both filters are different */
-}
-...
-void process_record(struct tep_handle *tep, struct tep_record *record)
-{
- struct tep_event *event;
- char *fstring;
-
- event = tep_find_event_by_record(tep, record);
-
- if (tep_event_filtered(filter, event->id) == 1) {
- /* The event has filter */
- fstring = tep_filter_make_string(filter, event->id);
- if (fstring != NULL) {
- /* The filter for the event is in fstring */
- free(fstring);
- }
- }
-
- switch (tep_filter_match(filter, record)) {
- case TEP_ERRNO__FILTER_MATCH:
- /* The filter matches the record */
- break;
- case TEP_ERRNO__FILTER_MISS:
- /* The filter does not match the record */
- break;
- case TEP_ERRNO__FILTER_NOT_FOUND:
- /* No filter found for record's event */
- break;
- case TEP_ERRNO__NO_FILTER:
- /* There are no rules in the filter */
- break
- default:
- /* An error occurred during the test */
- break;
- }
-
- if (tep_filter_remove_event(filter, event->id) == 1) {
- /* The event was removed from the filter */
- }
-}
-
-...
-tep_filter_reset(filter);
-...
-tep_filter_free(filter);
-tep_filter_free(filter1);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
deleted file mode 100644
index f6aca0df2151..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_function, tep_find_function_address, tep_set_function_resolver,
-tep_reset_function_resolver, tep_register_function, tep_register_print_string -
-function related tep APIs
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_);
-int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
-void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
-const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
-int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
---
-
-DESCRIPTION
------------
-Some tools may have already a way to resolve the kernel functions. These APIs
-allow them to keep using it instead of duplicating all the entries inside.
-
-The _tep_func_resolver_t_ type is the prototype of the alternative kernel
-functions resolver. This function receives a pointer to its custom context
-(set with the _tep_set_function_resolver()_ call ) and the address of a kernel
-function, which has to be resolved. In case of success, it should return
-the name of the function and its module (if any) in _modp_.
-
-The _tep_set_function_resolver()_ function registers _func_ as an alternative
-kernel functions resolver. The _tep_ argument is trace event parser context.
-The _priv_ argument is a custom context of the _func_ function. The function
-resolver is used by the APIs _tep_find_function()_,
-_tep_find_function_address()_, and _tep_print_func_field()_ to resolve
-a function address to a function name.
-
-The _tep_reset_function_resolver()_ function resets the kernel functions
-resolver to the default function. The _tep_ argument is trace event parser
-context.
-
-
-These APIs can be used to find function name and start address, by given
-address. The given address does not have to be exact, it will select
-the function that would contain it.
-
-The _tep_find_function()_ function returns the function name, which contains the
-given address _addr_. The _tep_ argument is the trace event parser context.
-
-The _tep_find_function_address()_ function returns the function start address,
-by given address _addr_. The _addr_ does not have to be exact, it will select
-the function that would contain it. The _tep_ argument is the trace event
-parser context.
-
-The _tep_register_function()_ function registers a function name mapped to an
-address and (optional) module. This mapping is used in case the function tracer
-or events have "%pS" parameter in its format string. It is common to pass in
-the kallsyms function names with their corresponding addresses with this
-function. The _tep_ argument is the trace event parser context. The _name_ is
-the name of the function, the string is copied internally. The _addr_ is the
-start address of the function. The _mod_ is the kernel module the function may
-be in (NULL for none).
-
-The _tep_register_print_string()_ function registers a string by the address
-it was stored in the kernel. Some strings internal to the kernel with static
-address are passed to certain events. The "%s" in the event's format field
-which has an address needs to know what string would be at that address. The
-tep_register_print_string() supplies the parsing with the mapping between kernel
-addresses and those strings. The _tep_ argument is the trace event parser
-context. The _fmt_ is the string to register, it is copied internally.
-The _addr_ is the address the string was located at.
-
-
-RETURN VALUE
-------------
-The _tep_set_function_resolver()_ function returns 0 in case of success, or -1
-in case of an error.
-
-The _tep_find_function()_ function returns the function name, or NULL in case
-it cannot be found.
-
-The _tep_find_function_address()_ function returns the function start address,
-or 0 in case it cannot be found.
-
-The _tep_register_function()_ function returns 0 in case of success. In case of
-an error -1 is returned, and errno is set to the appropriate error number.
-
-The _tep_register_print_string()_ function returns 0 in case of success. In case
-of an error -1 is returned, and errno is set to the appropriate error number.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *my_resolve_kernel_addr(void *context,
- unsigned long long *addrp, char **modp)
-{
- struct db *function_database = context;
- struct symbol *sym = sql_lookup(function_database, *addrp);
-
- if (!sym)
- return NULL;
-
- *modp = sym->module_name;
- return sym->name;
-}
-
-void show_function( unsigned long long addr)
-{
- unsigned long long fstart;
- const char *fname;
-
- if (tep_set_function_resolver(tep, my_resolve_kernel_addr,
- function_database) != 0) {
- /* failed to register my_resolve_kernel_addr */
- }
-
- /* These APIs use my_resolve_kernel_addr() to resolve the addr */
- fname = tep_find_function(tep, addr);
- fstart = tep_find_function_address(tep, addr);
-
- /*
- addr is in function named fname, starting at fstart address,
- at offset (addr - fstart)
- */
-
- tep_reset_function_resolver(tep);
-
-}
-...
- if (tep_register_function(tep, "kvm_exit",
- (unsigned long long) 0x12345678, "kvm") != 0) {
- /* Failed to register kvm_exit address mapping */
- }
-...
- if (tep_register_print_string(tep, "print string",
- (unsigned long long) 0x87654321, NULL) != 0) {
- /* Failed to register "print string" address mapping */
- }
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
deleted file mode 100644
index 04840e244445..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_function,tep_find_function_address - Find function name / start address.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
---
-
-DESCRIPTION
------------
-These functions can be used to find function name and start address, by given
-address. The given address does not have to be exact, it will select the function
-that would contain it.
-
-The _tep_find_function()_ function returns the function name, which contains the
-given address _addr_. The _tep_ argument is the trace event parser context.
-
-The _tep_find_function_address()_ function returns the function start address,
-by given address _addr_. The _addr_ does not have to be exact, it will select the
-function that would contain it. The _tep_ argument is the trace event parser context.
-
-RETURN VALUE
-------------
-The _tep_find_function()_ function returns the function name, or NULL in case
-it cannot be found.
-
-The _tep_find_function_address()_ function returns the function start address,
-or 0 in case it cannot be found.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void show_function( unsigned long long addr)
-{
- const char *fname = tep_find_function(tep, addr);
- unsigned long long fstart = tep_find_function_address(tep, addr);
-
- /* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
deleted file mode 100644
index 45b20172e262..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage
-references of trace event parser context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_handle pass:[*]*tep_alloc*(void);
-void *tep_free*(struct tep_handle pass:[*]_tep_);
-void *tep_ref*(struct tep_handle pass:[*]_tep_);
-void *tep_unref*(struct tep_handle pass:[*]_tep_);
-int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
---
-
-DESCRIPTION
------------
-These are the main functions to create and destroy tep_handle - the main
-structure, representing the trace event parser context. This context is used as
-the input parameter of most library APIs.
-
-The _tep_alloc()_ function allocates and initializes the tep context.
-
-The _tep_free()_ function will decrement the reference of the _tep_ handler.
-When there is no more references, then it will free the handler, as well
-as clean up all its resources that it had used. The argument _tep_ is
-the pointer to the trace event parser context.
-
-The _tep_ref()_ function adds a reference to the _tep_ handler.
-
-The _tep_unref()_ function removes a reference from the _tep_ handler. When
-the last reference is removed, the _tep_ is destroyed, and all resources that
-it had used are cleaned up.
-
-The _tep_ref_get()_ functions gets the current references of the _tep_ handler.
-
-RETURN VALUE
-------------
-_tep_alloc()_ returns a pointer to a newly created tep_handle structure.
-NULL is returned in case there is not enough free memory to allocate it.
-
-_tep_ref_get()_ returns the current references of _tep_.
-If _tep_ is NULL, 0 is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-
-...
-struct tep_handle *tep = tep_alloc();
-...
-int ref = tep_get_ref(tep);
-tep_ref(tep);
-if ( (ref+1) != tep_get_ref(tep)) {
- /* Something wrong happened, the counter is not incremented by 1 */
-}
-tep_unref(tep);
-...
-tep_free(tep);
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
deleted file mode 100644
index 615d117dc39f..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
+++ /dev/null
@@ -1,102 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format -
-Get the data stored in the header page, in kernel context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
-int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
-bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
---
-DESCRIPTION
------------
-These functions retrieve information from kernel context, stored in tracefs
-events/header_page. Old kernels do not have header page info, so default values
-from user space context are used.
-
-The _tep_get_header_page_size()_ function returns the size of a long integer,
-in kernel context. The _tep_ argument is trace event parser context.
-This information is retrieved from tracefs events/header_page, "commit" field.
-
-The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
-in kernel context. The _tep_ argument is trace event parser context. This
-information is retrieved from tracefs events/header_page, "timestamp" field.
-
-The _tep_is_old_format()_ function returns true if the kernel predates
-the addition of events/header_page, otherwise it returns false.
-
-RETURN VALUE
-------------
-The _tep_get_header_page_size()_ function returns the size of a long integer,
-in bytes.
-
-The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
-in bytes.
-
-The _tep_is_old_format()_ function returns true, if an old kernel is used to
-generate the tracing data, which has no event/header_page. If the kernel is new,
-or _tep_ is NULL, false is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
- int longsize;
- int timesize;
- bool old;
-
- longsize = tep_get_header_page_size(tep);
- timesize = tep_get_header_timestamp_size(tep);
- old = tep_is_old_format(tep);
-
- printf ("%s kernel is used to generate the tracing data.\n",
- old?"Old":"New");
- printf("The size of a long integer is %d bytes.\n", longsize);
- printf("The timestamps size is %d bytes.\n", timesize);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
deleted file mode 100644
index d5d375eb8d1e..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set
-the endianness of the local machine.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_endian* {
- TEP_LITTLE_ENDIAN = 0,
- TEP_BIG_ENDIAN
-};
-
-int *tep_is_bigendian*(void);
-bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
-void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
---
-
-DESCRIPTION
------------
-
-The _tep_is_bigendian()_ gets the endianness of the machine, executing
-the function.
-
-The _tep_is_local_bigendian()_ function gets the endianness of the local
-machine, saved in the _tep_ handler. The _tep_ argument is the trace event
-parser context. This API is a bit faster than _tep_is_bigendian()_, as it
-returns cached endianness of the local machine instead of checking it each time.
-
-The _tep_set_local_bigendian()_ function sets the endianness of the local
-machine in the _tep_ handler. The _tep_ argument is trace event parser context.
-The _endian_ argument is the endianness:
-[verse]
---
- _TEP_LITTLE_ENDIAN_ - the machine is little endian,
- _TEP_BIG_ENDIAN_ - the machine is big endian.
---
-
-RETURN VALUE
-------------
-The _tep_is_bigendian()_ function returns non zero if the endianness of the
-machine, executing the code, is big endian and zero otherwise.
-
-The _tep_is_local_bigendian()_ function returns true, if the endianness of the
-local machine, saved in the _tep_ handler, is big endian, or false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
- if (tep_is_bigendian())
- tep_set_local_bigendian(tep, TEP_BIG_ENDIAN);
- else
- tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN);
-...
- if (tep_is_local_bigendian(tep))
- printf("This machine you are running on is bigendian\n");
- else
- printf("This machine you are running on is little endian\n");
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
deleted file mode 100644
index 01d78ea2519a..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on
-the machine, where the trace is generated, in bytes
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
-void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
---
-
-DESCRIPTION
------------
-The _tep_get_long_size()_ function returns the size of a long integer on the machine,
-where the trace is generated. The _tep_ argument is trace event parser context.
-
-The _tep_set_long_size()_ function sets the size of a long integer on the machine,
-where the trace is generated. The _tep_ argument is trace event parser context.
-The _long_size_ is the size of a long integer, in bytes.
-
-RETURN VALUE
-------------
-The _tep_get_long_size()_ function returns the size of a long integer on the machine,
-where the trace is generated, in bytes.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-tep_set_long_size(tep, 4);
-...
-int long_size = tep_get_long_size(tep);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
deleted file mode 100644
index 452c0cfa1822..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on
-the machine, where the trace is generated
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
-void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
---
-
-DESCRIPTION
------------
-The _tep_get_page_size()_ function returns the size of a memory page on
-the machine, where the trace is generated. The _tep_ argument is trace
-event parser context.
-
-The _tep_set_page_size()_ function stores in the _tep_ context the size of a
-memory page on the machine, where the trace is generated.
-The _tep_ argument is trace event parser context.
-The _page_size_ argument is the size of a memory page, in bytes.
-
-RETURN VALUE
-------------
-The _tep_get_page_size()_ function returns size of the memory page, in bytes.
-
-EXAMPLE
--------
-[source,c]
---
-#include <unistd.h>
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
- int page_size = getpagesize();
-
- tep_set_page_size(tep, page_size);
-
- printf("The page size for this machine is %d\n", tep_get_page_size(tep));
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
deleted file mode 100644
index f248114ca1ff..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_parse_event, tep_parse_format - Parse the event format information
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
-enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
---
-
-DESCRIPTION
------------
-The _tep_parse_event()_ function parses the event format and creates an event
-structure to quickly parse raw data for a given event. The _tep_ argument is
-the trace event parser context. The created event structure is stored in the
-_tep_ context. The _buf_ argument is a buffer with _size_, where the event
-format data is. The event format data can be taken from
-tracefs/events/.../.../format files. The _sys_ argument is the system of
-the event.
-
-The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only
-difference is in the extra _eventp_ argument, where the newly created event
-structure is returned.
-
-RETURN VALUE
-------------
-Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success,
-or TEP_ERRNO__... in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *buf;
-int size;
-struct tep_event *event = NULL;
-buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size);
-if (tep_parse_event(tep, buf, size, "ftrace") != 0) {
- /* Failed to parse the ftrace print format */
-}
-
-if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) {
- /* Failed to parse the ftrace print format */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
deleted file mode 100644
index c90f16c7d8e6..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_parse_header_page - Parses the data stored in the header page.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
---
-
-DESCRIPTION
------------
-The _tep_parse_header_page()_ function parses the header page data from _buf_,
-and initializes the _tep_, trace event parser context, with it. The buffer
-_buf_ is with _size_, and is supposed to be copied from
-tracefs/events/header_page.
-
-Some old kernels do not have header page info, in this case the
-_tep_parse_header_page()_ function can be called with _size_ equal to 0. The
-_tep_ context is initialized with default values. The _long_size_ can be used in
-this use case, to set the size of a long integer to be used.
-
-RETURN VALUE
-------------
-The _tep_parse_header_page()_ function returns 0 in case of success, or -1
-in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *buf;
-int size;
-buf = read_file("/sys/kernel/tracing/events/header_page", &size);
-if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) {
- /* Failed to parse the header page */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
deleted file mode 100644
index 4d6394397d92..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
-void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
-void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
- void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
- const char pass:[*]path,
- const char pass:[*]name,
- void pass:[*]data),
- void pass:[*]_data_);
---
-
-DESCRIPTION
------------
-The _tep_load_plugins()_ function loads all plugins, located in the plugin
-directories. The _tep_ argument is trace event parser context.
-The plugin directories are :
-[verse]
---
- - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
- - System's plugin directory, defined at the library compile time. It
- depends on the library installation prefix and usually is
- _(install_preffix)/lib/traceevent/plugins_
- - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
- - User's plugin directory, located at _~/.local/lib/traceevent/plugins_
- - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
---
-Loading of plugins can be controlled by the _tep_flags_, using the
-_tep_set_flag()_ API:
-[verse]
---
- _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in
- the system's plugin directory.
- _TEP_DISABLE_PLUGINS_ - do not load any plugins.
---
-The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if
-loading of all plugins is not the desired case.
-
-The _tep_unload_plugins()_ function unloads the plugins, previously loaded by
-_tep_load_plugins()_. The _tep_ argument is trace event parser context. The
-_plugin_list_ is the list of loaded plugins, returned by
-the _tep_load_plugins()_ function.
-
-The _tep_load_plugins_hook_ function walks through all directories with plugins
-and calls user specified _load_plugin()_ hook for each plugin file. Only files
-with given _suffix_ are considered to be plugins. The _data_ is a user specified
-context, passed to _load_plugin()_. Directories and the walk order are the same
-as in _tep_load_plugins()_ API.
-
-RETURN VALUE
-------------
-The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
-or NULL in case no plugins are loaded.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_plugin_list *plugins = tep_load_plugins(tep);
-if (plugins == NULL) {
- /* no plugins are loaded */
-}
-...
-tep_unload_plugins(plugins, tep);
-...
-void print_plugin(struct tep_handle *tep, const char *path,
- const char *name, void *data)
-{
- pritnf("Found libtraceevent plugin %s/%s\n", path, name);
-}
-...
-tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
deleted file mode 100644
index e9a69116c78b..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
+++ /dev/null
@@ -1,137 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags -
-Extract common fields from a record.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *trace_flag_type* {
- _TRACE_FLAG_IRQS_OFF_,
- _TRACE_FLAG_IRQS_NOSUPPORT_,
- _TRACE_FLAG_NEED_RESCHED_,
- _TRACE_FLAG_HARDIRQ_,
- _TRACE_FLAG_SOFTIRQ_,
-};
-
-int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
---
-
-DESCRIPTION
------------
-This set of functions can be used to extract common fields from a record.
-
-The _tep_data_type()_ function gets the event id from the record _rec_.
-It reads the "common_type" field. The _tep_ argument is the trace event parser
-context.
-
-The _tep_data_pid()_ function gets the process id from the record _rec_.
-It reads the "common_pid" field. The _tep_ argument is the trace event parser
-context.
-
-The _tep_data_preempt_count()_ function gets the preemption count from the
-record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is
-the trace event parser context.
-
-The _tep_data_flags()_ function gets the latency flags from the record _rec_.
-It reads the "common_flags" field. The _tep_ argument is the trace event parser
-context. Supported latency flags are:
-[verse]
---
- _TRACE_FLAG_IRQS_OFF_, Interrupts are disabled.
- _TRACE_FLAG_IRQS_NOSUPPORT_, Reading IRQ flag is not supported by the architecture.
- _TRACE_FLAG_NEED_RESCHED_, Task needs rescheduling.
- _TRACE_FLAG_HARDIRQ_, Hard IRQ is running.
- _TRACE_FLAG_SOFTIRQ_, Soft IRQ is running.
---
-
-RETURN VALUE
-------------
-The _tep_data_type()_ function returns an integer, representing the event id.
-
-The _tep_data_pid()_ function returns an integer, representing the process id
-
-The _tep_data_preempt_count()_ function returns an integer, representing the
-preemption count.
-
-The _tep_data_flags()_ function returns an integer, representing the latency
-flags. Look at the _trace_flag_type_ enum for supported flags.
-
-All these functions in case of an error return a negative integer.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void process_record(struct tep_record *record)
-{
- int data;
-
- data = tep_data_type(tep, record);
- if (data >= 0) {
- /* Got the ID of the event */
- }
-
- data = tep_data_pid(tep, record);
- if (data >= 0) {
- /* Got the process ID */
- }
-
- data = tep_data_preempt_count(tep, record);
- if (data >= 0) {
- /* Got the preemption count */
- }
-
- data = tep_data_flags(tep, record);
- if (data >= 0) {
- /* Got the latency flags */
- }
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
deleted file mode 100644
index 53d37d72a1c1..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
+++ /dev/null
@@ -1,156 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_event_handler, tep_unregister_event_handler - Register /
-unregisters a callback function to parse an event information.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_reg_handler* {
- _TEP_REGISTER_SUCCESS_,
- _TEP_REGISTER_SUCCESS_OVERWRITE_,
-};
-
-int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
-int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
-
-typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context);
---
-
-DESCRIPTION
------------
-The _tep_register_event_handler()_ function registers a handler function,
-which is going to be called to parse the information for a given event.
-The _tep_ argument is the trace event parser context. The _id_ argument is
-the id of the event. The _sys_name_ argument is the name of the system,
-the event belongs to. The _event_name_ argument is the name of the event.
-If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and
-_event_name_ are used. The _func_ is a pointer to the function, which is going
-to be called to parse the event information. The _context_ argument is a pointer
-to the context data, which will be passed to the _func_. If a handler function
-for the same event is already registered, it will be overridden with the new
-one. This mechanism allows a developer to override the parsing of a given event.
-If for some reason the default print format is not sufficient, the developer
-can register a function for an event to be used to parse the data instead.
-
-The _tep_unregister_event_handler()_ function unregisters the handler function,
-previously registered with _tep_register_event_handler()_. The _tep_ argument
-is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_,
-and _context_ are the same arguments, as when the callback function _func_ was
-registered.
-
-The _tep_event_handler_func_ is the type of the custom event handler
-function. The _s_ argument is the trace sequence, it can be used to create a
-custom string, describing the event. A _record_ to get the event from is passed
-as input parameter and also the _event_ - the handle to the record's event. The
-_context_ is custom context, set when the custom event handler is registered.
-
-RETURN VALUE
-------------
-The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_
-if the new handler is registered successfully or
-_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten.
-If there is not enough memory to complete the registration,
-TEP_ERRNO__MEM_ALLOC_FAILED is returned.
-
-The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed
-successful or, -1 if the event was not found.
-
-The _tep_event_handler_func_ should return -1 in case of an error,
-or 0 otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int timer_expire_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- trace_seq_printf(s, "hrtimer=");
-
- if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1)
- tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1);
-
- trace_seq_printf(s, " now=");
-
- tep_print_num_field(s, "%llu", event, "now", record, 1);
-
- tep_print_func_field(s, " function=%s", event, "function", record, 0);
-
- return 0;
-}
-...
- int ret;
-
- ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
- timer_expire_handler, NULL);
- if (ret < 0) {
- char buf[32];
-
- tep_strerror(tep, ret, buf, 32)
- printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf);
- } else {
- switch (ret) {
- case TEP_REGISTER_SUCCESS:
- printf ("Registered handler for hrtimer_expire_entry\n");
- break;
- case TEP_REGISTER_SUCCESS_OVERWRITE:
- printf ("Overwrote handler for hrtimer_expire_entry\n");
- break;
- }
- }
-...
- ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
- timer_expire_handler, NULL);
- if ( ret )
- printf ("Failed to unregister handler for hrtimer_expire_entry\n");
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences
- related APIs. Trace sequences are used to allow a function to call
- several other functions to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
deleted file mode 100644
index 708dce91ebd8..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
+++ /dev/null
@@ -1,155 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_print_function,tep_unregister_print_function -
-Registers / Unregisters a helper function.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_func_arg_type* {
- TEP_FUNC_ARG_VOID,
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_LONG,
- TEP_FUNC_ARG_STRING,
- TEP_FUNC_ARG_PTR,
- TEP_FUNC_ARG_MAX_TYPES
-};
-
-typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args);
-
-int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
-int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
---
-
-DESCRIPTION
------------
-Some events may have helper functions in the print format arguments.
-This allows a plugin to dynamically create a way to process one of
-these functions.
-
-The _tep_register_print_function()_ registers such helper function. The _tep_
-argument is the trace event parser context. The _func_ argument is a pointer
-to the helper function. The _ret_type_ argument is the return type of the
-helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name
-of the helper function, as seen in the print format arguments. The _..._ is a
-variable list of _tep_func_arg_type_ enums, the _func_ function arguments.
-This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section.
-
-The _tep_unregister_print_function()_ unregisters a helper function, previously
-registered with _tep_register_print_function()_. The _tep_ argument is the
-trace event parser context. The _func_ and _name_ arguments are the same, used
-when the helper function was registered.
-
-The _tep_func_handler_ is the type of the helper function. The _s_ argument is
-the trace sequence, it can be used to create a custom string.
-The _args_ is a list of arguments, defined when the helper function was
-registered.
-
-RETURN VALUE
-------------
-The _tep_register_print_function()_ function returns 0 in case of success.
-In case of an error, TEP_ERRNO_... code is returned.
-
-The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in
-case of an error.
-
-EXAMPLE
--------
-Some events have internal functions calls, that appear in the print format
-output. For example "tracefs/events/i915/g4x_wm/format" has:
-[source,c]
---
-print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
- ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary,
- REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane,
- REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane,
- REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc)
---
-Notice the call to function _yesno()_ in the print arguments. In the kernel
-context, this function has the following implementation:
-[source,c]
---
-static const char *yesno(int x)
-{
- static const char *yes = "yes";
- static const char *no = "no";
-
- return x ? yes : no;
-}
---
-The user space event parser has no idea how to handle this _yesno()_ function.
-The _tep_register_print_function()_ API can be used to register a user space
-helper function, mapped to the kernel's _yesno()_:
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-static const char *yes_no_helper(int x)
-{
- return x ? "yes" : "no";
-}
-...
- if ( tep_register_print_function(tep,
- yes_no_helper,
- TEP_FUNC_ARG_STRING,
- "yesno",
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_VOID) != 0) {
- /* Failed to register yes_no_helper function */
- }
-
-/*
- Now, when the event parser encounters this yesno() function, it will know
- how to handle it.
-*/
-...
- if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) {
- /* Failed to unregister yes_no_helper function */
- }
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences
- related APIs. Trace sequences are used to allow a function to call
- several other functions to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
deleted file mode 100644
index b0599780b9a6..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_set_flag, tep_clear_flag, tep_test_flag -
-Manage flags of trace event parser context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_flag* {
- _TEP_NSEC_OUTPUT_,
- _TEP_DISABLE_SYS_PLUGINS_,
- _TEP_DISABLE_PLUGINS_
-};
-void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
---
-
-DESCRIPTION
------------
-Trace event parser context flags are defined in *enum tep_flag*:
-[verse]
---
-_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds.
-_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin
- directory. This directory is defined at library compile
- time, and usually depends on library installation
- prefix: (install_preffix)/lib/traceevent/plugins
-_TEP_DISABLE_PLUGINS_ - disable all library plugins:
- - in system's plugin directory
- - in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
- - in user's home directory, _~/.traceevent/plugins_
---
-Note: plugin related flags must me set before calling _tep_load_plugins()_ API.
-
-The _tep_set_flag()_ function sets _flag_ to _tep_ context.
-
-The _tep_clear_flag()_ function clears _flag_ from _tep_ context.
-
-The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context.
-
-RETURN VALUE
-------------
-_tep_test_flag()_ function returns true if _flag_ is set, false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-/* Print timestamps in nanoseconds */
-tep_set_flag(tep, TEP_NSEC_OUTPUT);
-...
-if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) {
- /* print timestamps in nanoseconds */
-} else {
- /* print timestamps in microseconds */
-}
-...
-/* Print timestamps in microseconds */
-tep_clear_flag(tep, TEP_NSEC_OUTPUT);
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
deleted file mode 100644
index ee4062a00c9f..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_strerror - Returns a string describing regular errno and tep error number.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
-
---
-DESCRIPTION
------------
-The _tep_strerror()_ function converts tep error number into a human
-readable string.
-The _tep_ argument is trace event parser context. The _errnum_ is a regular
-errno, defined in errno.h, or a tep error number. The string, describing this
-error number is copied in the _buf_ argument. The _buflen_ argument is
-the size of the _buf_.
-
-It as a thread safe wrapper around strerror_r(). The library function has two
-different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always
-behaves as the POSIX version - the error string is copied in the user supplied
-buffer.
-
-RETURN VALUE
-------------
-The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the
-string is copied into _buf_. If _errnum_ is not a valid error number,
--1 is returned and _buf_ is not modified.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char buf[32];
-char *pool = calloc(1, 128);
-if (tep == NULL) {
- tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32);
- printf ("The pool is not initialized, %s", buf);
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
deleted file mode 100644
index 8ac6aa174e12..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate,
-trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf,
-trace_seq_do_fprintf, trace_seq_do_printf -
-Initialize / destroy a trace sequence.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *trace_seq_init*(struct trace_seq pass:[*]_s_);
-void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
-void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
-void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
-int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
-int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
-int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._);
-int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
-int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
-int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
---
-
-DESCRIPTION
------------
-Trace sequences are used to allow a function to call several other functions
-to create a string of data to use.
-
-The _trace_seq_init()_ function initializes the trace sequence _s_.
-
-The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees
-all its resources that it had used.
-
-The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All
-characters already written in _s_ will be deleted.
-
-The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts
-the null character pass:['\0'] at the end of the buffer.
-
-The _trace_seq_putc()_ function puts a single character _c_ in the trace
-sequence _s_.
-
-The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the
-trace sequence _s_.
-
-The _trace_seq_printf()_ function puts a formated string _fmt _with
-variable arguments _..._ in the trace sequence _s_.
-
-The _trace_seq_vprintf()_ function puts a formated string _fmt _with
-list of arguments _args_ in the trace sequence _s_.
-
-The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to
-the standard output stdout.
-
-The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_
-to the given file _fp_.
-
-RETURN VALUE
-------------
-Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of
-characters put in the trace sequence, or 0 in case of an error
-
-Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the
-trace oversizes the buffer's free space, the number of characters printed, or
-a negative value in case of an error.
-
-Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the
-number of printed characters, or -1 in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-...
-void foo_seq_print(struct trace_seq *tseq, char *format, ...)
-{
- va_list ap;
- va_start(ap, format);
- if (trace_seq_vprintf(tseq, format, ap) <= 0) {
- /* Failed to print in the trace sequence */
- }
- va_end(ap);
-}
-
-trace_seq_reset(&seq);
-
-char *str = " MAN page example";
-if (trace_seq_puts(&seq, str) != strlen(str)) {
- /* Failed to put str in the trace sequence */
-}
-if (trace_seq_putc(&seq, ':') != 1) {
- /* Failed to put ':' in the trace sequence */
-}
-if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) {
- /* Failed to print in the trace sequence */
-}
-foo_seq_print( &seq, " %d\n", 2);
-
-trace_seq_terminate(&seq);
-...
-
-if (trace_seq_do_printf(&seq) < 0 ) {
- /* Failed to print the sequence buffer to the standard output */
-}
-FILE *fp = fopen("trace.txt", "w");
-if (trace_seq_do_fprintf(&seq, fp) < 0 ) [
- /* Failed to print the sequence buffer to the trace.txt file */
-}
-
-trace_seq_destroy(&seq);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences related APIs.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt
deleted file mode 100644
index d530a7ce8fb2..000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent.txt
+++ /dev/null
@@ -1,192 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-libtraceevent - Linux kernel trace event library
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-Management of tep handler data structure and access of its members:
- struct tep_handle pass:[*]*tep_alloc*(void);
- void *tep_free*(struct tep_handle pass:[*]_tep_);
- void *tep_ref*(struct tep_handle pass:[*]_tep_);
- void *tep_unref*(struct tep_handle pass:[*]_tep_);
- int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
- void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
- void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
- bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_);
- int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
- void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
- int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
- void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
- int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
- void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
- int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
- int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
- bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
- int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
-
-Register / unregister APIs:
- int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
- int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
- int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
- int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
- int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
- int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
-
-Plugins management:
- struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
- void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
- char pass:[*]pass:[*]*tep_plugin_list_options*(void);
- void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_);
- int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_);
- void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_);
- void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_);
-
-Event related APIs:
- struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
- struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
- int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
- struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
- struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
- void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._);
-
-Event finding:
- struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
- struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
- struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
-
-Parsing of event files:
- int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
- enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
- enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
-
-APIs related to fields from event's format files:
- struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
- struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
- void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
- int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
- int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
- int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
- int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
-
-Event fields printing:
- void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
- void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
- int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
- int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
-
-Event fields finding:
- struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
- struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
- struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
-
-Functions resolver:
- int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
- void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
- const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
- unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-
-Filter management:
- struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
- enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
- enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
- int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
- int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
- void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
- void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
- char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
- int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
- int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
- int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
-
-Parsing various data from the records:
- int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
- int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
- int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
- int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-
-Command and task related APIs:
- const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_);
- struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
- int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
- int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
- bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
- int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_);
-
-Endian related APIs:
- int *tep_is_bigendian*(void);
- unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
- bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
- void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
- bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
- void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
-
-Trace sequences:
-*#include <trace-seq.h>*
- void *trace_seq_init*(struct trace_seq pass:[*]_s_);
- void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
- void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
- int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...);
- int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
- int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
- int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
- void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
- int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
- int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
---
-
-DESCRIPTION
------------
-The libtraceevent(3) library provides APIs to access kernel tracepoint events,
-located in the tracefs file system under the events directory.
-
-ENVIRONMENT
------------
-[verse]
---
-TRACEEVENT_PLUGIN_DIR
- Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins.
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
- Header file to include in order to have access to the library APIs.
-*trace-seq.h*
- Header file to include in order to have access to trace sequences related APIs.
- Trace sequences are used to allow a function to call several other functions
- to create a string of data to use.
-*-ltraceevent*
- Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl
deleted file mode 100644
index b4d315cb8c47..000000000000
--- a/tools/lib/traceevent/Documentation/manpage-1.72.xsl
+++ /dev/null
@@ -1,14 +0,0 @@
-<!-- manpage-1.72.xsl:
- special settings for manpages rendered from asciidoc+docbook
- handles peculiarities in docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the special values for the roff control characters
- needed for docbook-xsl 1.72.0 -->
-<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
-<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl
deleted file mode 100644
index a264fa616093..000000000000
--- a/tools/lib/traceevent/Documentation/manpage-base.xsl
+++ /dev/null
@@ -1,35 +0,0 @@
-<!-- manpage-base.xsl:
- special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
-<!-- these params silence some output from xmlto -->
-<xsl:param name="man.output.quietly" select="1"/>
-<xsl:param name="refentry.meta.get.quietly" select="1"/>
-
-<!-- convert asciidoc callouts to man page format;
- git.docbook.backslash and git.docbook.dot params
- must be supplied by another XSL file or other means -->
-<xsl:template match="co">
- <xsl:value-of select="concat(
- $git.docbook.backslash,'fB(',
- substring-after(@id,'-'),')',
- $git.docbook.backslash,'fR')"/>
-</xsl:template>
-<xsl:template match="calloutlist">
- <xsl:value-of select="$git.docbook.dot"/>
- <xsl:text>sp&#10;</xsl:text>
- <xsl:apply-templates/>
- <xsl:text>&#10;</xsl:text>
-</xsl:template>
-<xsl:template match="callout">
- <xsl:value-of select="concat(
- $git.docbook.backslash,'fB',
- substring-after(@arearefs,'-'),
- '. ',$git.docbook.backslash,'fR')"/>
- <xsl:apply-templates/>
- <xsl:value-of select="$git.docbook.dot"/>
- <xsl:text>br&#10;</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
deleted file mode 100644
index 608eb5df6281..000000000000
--- a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
+++ /dev/null
@@ -1,17 +0,0 @@
-<!-- manpage-bold-literal.xsl:
- special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
-<!-- render literal text as bold (instead of plain or monospace);
- this makes literal text easier to distinguish in manpages
- viewed on a tty -->
-<xsl:template match="literal">
- <xsl:value-of select="$git.docbook.backslash"/>
- <xsl:text>fB</xsl:text>
- <xsl:apply-templates/>
- <xsl:value-of select="$git.docbook.backslash"/>
- <xsl:text>fR</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl
deleted file mode 100644
index a48f5b11f3dc..000000000000
--- a/tools/lib/traceevent/Documentation/manpage-normal.xsl
+++ /dev/null
@@ -1,13 +0,0 @@
-<!-- manpage-normal.xsl:
- special settings for manpages rendered from asciidoc+docbook
- handles anything we want to keep away from docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the normal values for the roff control characters -->
-<xsl:param name="git.docbook.backslash">\</xsl:param>
-<xsl:param name="git.docbook.dot" >.</xsl:param>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
deleted file mode 100644
index a63c7632a87d..000000000000
--- a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
+++ /dev/null
@@ -1,21 +0,0 @@
-<!-- manpage-suppress-sp.xsl:
- special settings for manpages rendered from asciidoc+docbook
- handles erroneous, inline .sp in manpage output of some
- versions of docbook-xsl -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
-<!-- attempt to work around spurious .sp at the tail of the line
- that some versions of docbook stylesheets seem to add -->
-<xsl:template match="simpara">
- <xsl:variable name="content">
- <xsl:apply-templates/>
- </xsl:variable>
- <xsl:value-of select="normalize-space($content)"/>
- <xsl:if test="not(ancestor::authorblurb) and
- not(ancestor::personblurb)">
- <xsl:text>&#10;&#10;</xsl:text>
- </xsl:if>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
deleted file mode 100644
index c874c017c636..000000000000
--- a/tools/lib/traceevent/Makefile
+++ /dev/null
@@ -1,300 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# trace-cmd version
-EP_VERSION = 1
-EP_PATCHLEVEL = 1
-EP_EXTRAVERSION = 0
-
-# file format version
-FILE_VERSION = 6
-
-MAKEFLAGS += --no-print-directory
-
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
- $(if $(or $(findstring environment,$(origin $(1))),\
- $(findstring command line,$(origin $(1)))),,\
- $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,NM,$(CROSS_COMPILE)nm)
-$(call allow-override,PKG_CONFIG,pkg-config)
-
-EXT = -std=gnu99
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
-ifeq ($(LP64), 1)
- libdir_relative_temp = lib64
-else
- libdir_relative_temp = lib
-endif
-
-libdir_relative ?= $(libdir_relative_temp)
-prefix ?= /usr/local
-libdir = $(prefix)/$(libdir_relative)
-man_dir = $(prefix)/share/man
-man_dir_SQ = '$(subst ','\'',$(man_dir))'
-pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \
- --variable pc_path pkg-config | tr ":" " "))
-includedir_relative = traceevent
-includedir = $(prefix)/include/$(includedir_relative)
-includedir_SQ = '$(subst ','\'',$(includedir))'
-
-export man_dir man_dir_SQ INSTALL
-export DESTDIR DESTDIR_SQ
-export EVENT_PARSE_VERSION
-
-include ../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-export prefix libdir src obj
-
-# Shell quotes
-libdir_SQ = $(subst ','\'',$(libdir))
-libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-
-CONFIG_INCLUDES =
-CONFIG_LIBS =
-CONFIG_FLAGS =
-
-VERSION = $(EP_VERSION)
-PATCHLEVEL = $(EP_PATCHLEVEL)
-EXTRAVERSION = $(EP_EXTRAVERSION)
-
-OBJ = $@
-N =
-
-EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
-
-LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
-LIB_INSTALL = libtraceevent.a libtraceevent.so*
-LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
-
-INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
-# Append required CFLAGS
-override CFLAGS += -fPIC
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
-# Disable command line variables (CFLAGS) override from top
-# level Makefile (perf), otherwise build Makefile will get
-# the same command line setup.
-MAKEOVERRIDES=
-
-export srctree OUTPUT CC LD CFLAGS V
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-
-TE_IN := $(OUTPUT)libtraceevent-in.o
-LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
-
-CMD_TARGETS = $(LIB_TARGET)
-
-TARGETS = $(CMD_TARGETS)
-
-all: all_cmd plugins
-
-all_cmd: $(CMD_TARGETS)
-
-$(TE_IN): force
- $(Q)$(MAKE) $(build)=libtraceevent
-
-$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
- $(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
- @ln -sf $(@F) $(OUTPUT)libtraceevent.so
- @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION)
-
-$(OUTPUT)libtraceevent.a: $(TE_IN)
- $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
-
-$(OUTPUT)%.so: $(OUTPUT)%-in.o
- $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
-
-define make_version.h
- (echo '/* This file is automatically generated. Do not modify. */'; \
- echo \#define VERSION_CODE $(shell \
- expr $(VERSION) \* 256 + $(PATCHLEVEL)); \
- echo '#define EXTRAVERSION ' $(EXTRAVERSION); \
- echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \
- echo '#define FILE_VERSION '$(FILE_VERSION); \
- ) > $1
-endef
-
-define update_version.h
- ($(call make_version.h, $@.tmp); \
- if [ -r $@ ] && cmp -s $@ $@.tmp; then \
- rm -f $@.tmp; \
- else \
- echo ' UPDATE $@'; \
- mv -f $@.tmp $@; \
- fi);
-endef
-
-ep_version.h: force
- $(Q)$(N)$(call update_version.h)
-
-VERSION_FILES = ep_version.h
-
-define update_dir
- (echo $1 > $@.tmp; \
- if [ -r $@ ] && cmp -s $@ $@.tmp; then \
- rm -f $@.tmp; \
- else \
- echo ' UPDATE $@'; \
- mv -f $@.tmp $@; \
- fi);
-endef
-
-tags: force
- $(RM) tags
- find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
- --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS: force
- $(RM) TAGS
- find . -name '*.[ch]' | xargs etags \
- --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install_mkdir
- if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
- fi
-endef
-
-define do_install
- $(call do_install_mkdir,$2); \
- $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
-endef
-
-PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
-PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
-define do_install_pkgconfig_file
- if [ -n "${pkgconfig_dir}" ]; then \
- cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \
- sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
- sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
- sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \
- sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \
- $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \
- else \
- (echo Failed to locate pkg-config directory) 1>&2; \
- fi
-endef
-
-install_lib: all_cmd install_plugins install_headers install_pkgconfig
- $(call QUIET_INSTALL, $(LIB_TARGET)) \
- $(call do_install_mkdir,$(libdir_SQ)); \
- cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
-
-install_pkgconfig:
- $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
- $(call do_install_pkgconfig_file,$(prefix))
-
-install_headers:
- $(call QUIET_INSTALL, headers) \
- $(call do_install,event-parse.h,$(includedir_SQ),644); \
- $(call do_install,event-utils.h,$(includedir_SQ),644); \
- $(call do_install,trace-seq.h,$(includedir_SQ),644); \
- $(call do_install,kbuffer.h,$(includedir_SQ),644)
-
-install: install_lib
-
-clean: clean_plugins
- $(call QUIET_CLEAN, libtraceevent) \
- $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
- $(RM) TRACEEVENT-CFLAGS tags TAGS; \
- $(RM) $(PKG_CONFIG_FILE)
-
-PHONY += doc
-doc:
- $(call descend,Documentation)
-
-PHONY += doc-clean
-doc-clean:
- $(call descend,Documentation,clean)
-
-PHONY += doc-install
-doc-install:
- $(call descend,Documentation,install)
-
-PHONY += doc-uninstall
-doc-uninstall:
- $(call descend,Documentation,uninstall)
-
-PHONY += help
-help:
- @echo 'Possible targets:'
- @echo''
- @echo ' all - default, compile the library and the'\
- 'plugins'
- @echo ' plugins - compile the plugins'
- @echo ' install - install the library, the plugins,'\
- 'the header and pkgconfig files'
- @echo ' clean - clean the library and the plugins object files'
- @echo ' doc - compile the documentation files - man'\
- 'and html pages, in the Documentation directory'
- @echo ' doc-clean - clean the documentation files'
- @echo ' doc-install - install the man pages'
- @echo ' doc-uninstall - uninstall the man pages'
- @echo''
-
-PHONY += plugins
-plugins:
- $(call descend,plugins)
-
-PHONY += install_plugins
-install_plugins:
- $(call descend,plugins,install)
-
-PHONY += clean_plugins
-clean_plugins:
- $(call descend,plugins,clean)
-
-force:
-
-# Declare the contents of the .PHONY variable as phony. We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
deleted file mode 100644
index f8361e45d446..000000000000
--- a/tools/lib/traceevent/event-parse-api.c
+++ /dev/null
@@ -1,333 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-
-/**
- * tep_get_event - returns the event with the given index
- * @tep: a handle to the tep_handle
- * @index: index of the requested event, in the range 0 .. nr_events
- *
- * This returns pointer to the element of the events array with the given index
- * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned.
- */
-struct tep_event *tep_get_event(struct tep_handle *tep, int index)
-{
- if (tep && tep->events && index < tep->nr_events)
- return tep->events[index];
-
- return NULL;
-}
-
-/**
- * tep_get_first_event - returns the first event in the events array
- * @tep: a handle to the tep_handle
- *
- * This returns pointer to the first element of the events array
- * If @tep is NULL, NULL is returned.
- */
-struct tep_event *tep_get_first_event(struct tep_handle *tep)
-{
- return tep_get_event(tep, 0);
-}
-
-/**
- * tep_get_events_count - get the number of defined events
- * @tep: a handle to the tep_handle
- *
- * This returns number of elements in event array
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_events_count(struct tep_handle *tep)
-{
- if (tep)
- return tep->nr_events;
- return 0;
-}
-
-/**
- * tep_set_flag - set event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag, or combination of flags to be set
- * can be any combination from enum tep_flag
- *
- * This sets a flag or combination of flags from enum tep_flag
- */
-void tep_set_flag(struct tep_handle *tep, int flag)
-{
- if (tep)
- tep->flags |= flag;
-}
-
-/**
- * tep_clear_flag - clear event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag to be cleared
- *
- * This clears a tep flag
- */
-void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag)
-{
- if (tep)
- tep->flags &= ~flag;
-}
-
-/**
- * tep_test_flag - check the state of event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag to be checked
- *
- * This returns the state of the requested tep flag.
- * Returns: true if the flag is set, false otherwise.
- */
-bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
-{
- if (tep)
- return tep->flags & flag;
- return false;
-}
-
-__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data)
-{
- unsigned short swap;
-
- if (!tep || tep->host_bigendian == tep->file_bigendian)
- return data;
-
- swap = ((data & 0xffULL) << 8) |
- ((data & (0xffULL << 8)) >> 8);
-
- return swap;
-}
-
-__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data)
-{
- unsigned int swap;
-
- if (!tep || tep->host_bigendian == tep->file_bigendian)
- return data;
-
- swap = ((data & 0xffULL) << 24) |
- ((data & (0xffULL << 8)) << 8) |
- ((data & (0xffULL << 16)) >> 8) |
- ((data & (0xffULL << 24)) >> 24);
-
- return swap;
-}
-
-__hidden unsigned long long
-data2host8(struct tep_handle *tep, unsigned long long data)
-{
- unsigned long long swap;
-
- if (!tep || tep->host_bigendian == tep->file_bigendian)
- return data;
-
- swap = ((data & 0xffULL) << 56) |
- ((data & (0xffULL << 8)) << 40) |
- ((data & (0xffULL << 16)) << 24) |
- ((data & (0xffULL << 24)) << 8) |
- ((data & (0xffULL << 32)) >> 8) |
- ((data & (0xffULL << 40)) >> 24) |
- ((data & (0xffULL << 48)) >> 40) |
- ((data & (0xffULL << 56)) >> 56);
-
- return swap;
-}
-
-/**
- * tep_get_header_page_size - get size of the header page
- * @tep: a handle to the tep_handle
- *
- * This returns size of the header page
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_header_page_size(struct tep_handle *tep)
-{
- if (tep)
- return tep->header_page_size_size;
- return 0;
-}
-
-/**
- * tep_get_header_timestamp_size - get size of the timestamp in the header page
- * @tep: a handle to the tep_handle
- *
- * This returns size of the timestamp in the header page
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_header_timestamp_size(struct tep_handle *tep)
-{
- if (tep)
- return tep->header_page_ts_size;
- return 0;
-}
-
-/**
- * tep_get_cpus - get the number of CPUs
- * @tep: a handle to the tep_handle
- *
- * This returns the number of CPUs
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_cpus(struct tep_handle *tep)
-{
- if (tep)
- return tep->cpus;
- return 0;
-}
-
-/**
- * tep_set_cpus - set the number of CPUs
- * @tep: a handle to the tep_handle
- *
- * This sets the number of CPUs
- */
-void tep_set_cpus(struct tep_handle *tep, int cpus)
-{
- if (tep)
- tep->cpus = cpus;
-}
-
-/**
- * tep_get_long_size - get the size of a long integer on the traced machine
- * @tep: a handle to the tep_handle
- *
- * This returns the size of a long integer on the traced machine
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_long_size(struct tep_handle *tep)
-{
- if (tep)
- return tep->long_size;
- return 0;
-}
-
-/**
- * tep_set_long_size - set the size of a long integer on the traced machine
- * @tep: a handle to the tep_handle
- * @size: size, in bytes, of a long integer
- *
- * This sets the size of a long integer on the traced machine
- */
-void tep_set_long_size(struct tep_handle *tep, int long_size)
-{
- if (tep)
- tep->long_size = long_size;
-}
-
-/**
- * tep_get_page_size - get the size of a memory page on the traced machine
- * @tep: a handle to the tep_handle
- *
- * This returns the size of a memory page on the traced machine
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_page_size(struct tep_handle *tep)
-{
- if (tep)
- return tep->page_size;
- return 0;
-}
-
-/**
- * tep_set_page_size - set the size of a memory page on the traced machine
- * @tep: a handle to the tep_handle
- * @_page_size: size of a memory page, in bytes
- *
- * This sets the size of a memory page on the traced machine
- */
-void tep_set_page_size(struct tep_handle *tep, int _page_size)
-{
- if (tep)
- tep->page_size = _page_size;
-}
-
-/**
- * tep_is_file_bigendian - return the endian of the file
- * @tep: a handle to the tep_handle
- *
- * This returns true if the file is in big endian order
- * If @tep is NULL, false is returned.
- */
-bool tep_is_file_bigendian(struct tep_handle *tep)
-{
- if (tep)
- return (tep->file_bigendian == TEP_BIG_ENDIAN);
- return false;
-}
-
-/**
- * tep_set_file_bigendian - set if the file is in big endian order
- * @tep: a handle to the tep_handle
- * @endian: non zero, if the file is in big endian order
- *
- * This sets if the file is in big endian order
- */
-void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian)
-{
- if (tep)
- tep->file_bigendian = endian;
-}
-
-/**
- * tep_is_local_bigendian - return the endian of the saved local machine
- * @tep: a handle to the tep_handle
- *
- * This returns true if the saved local machine in @tep is big endian.
- * If @tep is NULL, false is returned.
- */
-bool tep_is_local_bigendian(struct tep_handle *tep)
-{
- if (tep)
- return (tep->host_bigendian == TEP_BIG_ENDIAN);
- return 0;
-}
-
-/**
- * tep_set_local_bigendian - set the stored local machine endian order
- * @tep: a handle to the tep_handle
- * @endian: non zero, if the local host has big endian order
- *
- * This sets the endian order for the local machine.
- */
-void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian)
-{
- if (tep)
- tep->host_bigendian = endian;
-}
-
-/**
- * tep_is_old_format - get if an old kernel is used
- * @tep: a handle to the tep_handle
- *
- * This returns true, if an old kernel is used to generate the tracing events or
- * false if a new kernel is used. Old kernels did not have header page info.
- * If @tep is NULL, false is returned.
- */
-bool tep_is_old_format(struct tep_handle *tep)
-{
- if (tep)
- return tep->old_format;
- return false;
-}
-
-/**
- * tep_set_test_filters - set a flag to test a filter string
- * @tep: a handle to the tep_handle
- * @test_filters: the new value of the test_filters flag
- *
- * This sets a flag to test a filter string. If this flag is set, when
- * tep_filter_add_filter_str() API as called,it will print the filter string
- * instead of adding it.
- */
-void tep_set_test_filters(struct tep_handle *tep, int test_filters)
-{
- if (tep)
- tep->test_filters = test_filters;
-}
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
deleted file mode 100644
index fd4bbcfbb849..000000000000
--- a/tools/lib/traceevent/event-parse-local.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#ifndef _PARSE_EVENTS_INT_H
-#define _PARSE_EVENTS_INT_H
-
-struct tep_cmdline;
-struct cmdline_list;
-struct func_map;
-struct func_list;
-struct event_handler;
-struct func_resolver;
-struct tep_plugins_dir;
-
-#define __hidden __attribute__((visibility ("hidden")))
-
-struct tep_handle {
- int ref_count;
-
- int header_page_ts_offset;
- int header_page_ts_size;
- int header_page_size_offset;
- int header_page_size_size;
- int header_page_data_offset;
- int header_page_data_size;
- int header_page_overwrite;
-
- enum tep_endian file_bigendian;
- enum tep_endian host_bigendian;
-
- int old_format;
-
- int cpus;
- int long_size;
- int page_size;
-
- struct tep_cmdline *cmdlines;
- struct cmdline_list *cmdlist;
- int cmdline_count;
-
- struct func_map *func_map;
- struct func_resolver *func_resolver;
- struct func_list *funclist;
- unsigned int func_count;
-
- struct printk_map *printk_map;
- struct printk_list *printklist;
- unsigned int printk_count;
-
- struct tep_event **events;
- int nr_events;
- struct tep_event **sort_events;
- enum tep_event_sort_type last_type;
-
- int type_offset;
- int type_size;
-
- int pid_offset;
- int pid_size;
-
- int pc_offset;
- int pc_size;
-
- int flags_offset;
- int flags_size;
-
- int ld_offset;
- int ld_size;
-
- int test_filters;
-
- int flags;
-
- struct tep_format_field *bprint_ip_field;
- struct tep_format_field *bprint_fmt_field;
- struct tep_format_field *bprint_buf_field;
-
- struct event_handler *handlers;
- struct tep_function_handler *func_handlers;
-
- /* cache */
- struct tep_event *last_event;
-
- struct tep_plugins_dir *plugins_dir;
-};
-
-enum tep_print_parse_type {
- PRINT_FMT_STRING,
- PRINT_FMT_ARG_DIGIT,
- PRINT_FMT_ARG_POINTER,
- PRINT_FMT_ARG_STRING,
-};
-
-struct tep_print_parse {
- struct tep_print_parse *next;
-
- char *format;
- int ls;
- enum tep_print_parse_type type;
- struct tep_print_arg *arg;
- struct tep_print_arg *len_as_arg;
-};
-
-void free_tep_event(struct tep_event *event);
-void free_tep_format_field(struct tep_format_field *field);
-void free_tep_plugin_paths(struct tep_handle *tep);
-
-unsigned short data2host2(struct tep_handle *tep, unsigned short data);
-unsigned int data2host4(struct tep_handle *tep, unsigned int data);
-unsigned long long data2host8(struct tep_handle *tep, unsigned long long data);
-
-/* access to the internal parser */
-int peek_char(void);
-void init_input_buf(const char *buf, unsigned long long size);
-unsigned long long get_input_buf_ptr(void);
-const char *get_input_buf(void);
-enum tep_event_type read_token(char **tok);
-void free_token(char *tok);
-
-#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
deleted file mode 100644
index fe58843d047c..000000000000
--- a/tools/lib/traceevent/event-parse.c
+++ /dev/null
@@ -1,7603 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- *
- * The parts for function graph printing was taken and modified from the
- * Linux Kernel that were written by
- * - Copyright (C) 2009 Frederic Weisbecker,
- * Frederic Weisbecker gave his permission to relicense the code to
- * the Lesser General Public License.
- */
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdint.h>
-#include <limits.h>
-#include <linux/time64.h>
-
-#include <netinet/in.h>
-#include "event-parse.h"
-
-#include "event-parse-local.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-static const char *input_buf;
-static unsigned long long input_buf_ptr;
-static unsigned long long input_buf_siz;
-
-static int is_flag_field;
-static int is_symbolic_field;
-
-static int show_warning = 1;
-
-#define do_warning(fmt, ...) \
- do { \
- if (show_warning) \
- warning(fmt, ##__VA_ARGS__); \
- } while (0)
-
-#define do_warning_event(event, fmt, ...) \
- do { \
- if (!show_warning) \
- continue; \
- \
- if (event) \
- warning("[%s:%s] " fmt, event->system, \
- event->name, ##__VA_ARGS__); \
- else \
- warning(fmt, ##__VA_ARGS__); \
- } while (0)
-
-/**
- * init_input_buf - init buffer for parsing
- * @buf: buffer to parse
- * @size: the size of the buffer
- *
- * Initializes the internal buffer that tep_read_token() will parse.
- */
-__hidden void init_input_buf(const char *buf, unsigned long long size)
-{
- input_buf = buf;
- input_buf_siz = size;
- input_buf_ptr = 0;
-}
-
-__hidden const char *get_input_buf(void)
-{
- return input_buf;
-}
-
-__hidden unsigned long long get_input_buf_ptr(void)
-{
- return input_buf_ptr;
-}
-
-struct event_handler {
- struct event_handler *next;
- int id;
- const char *sys_name;
- const char *event_name;
- tep_event_handler_func func;
- void *context;
-};
-
-struct func_params {
- struct func_params *next;
- enum tep_func_arg_type type;
-};
-
-struct tep_function_handler {
- struct tep_function_handler *next;
- enum tep_func_arg_type ret_type;
- char *name;
- tep_func_handler func;
- struct func_params *params;
- int nr_args;
-};
-
-static unsigned long long
-process_defined_func(struct trace_seq *s, void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg);
-
-static void free_func_handle(struct tep_function_handler *func);
-
-void breakpoint(void)
-{
- static int x;
- x++;
-}
-
-static struct tep_print_arg *alloc_arg(void)
-{
- return calloc(1, sizeof(struct tep_print_arg));
-}
-
-struct tep_cmdline {
- char *comm;
- int pid;
-};
-
-static int cmdline_cmp(const void *a, const void *b)
-{
- const struct tep_cmdline *ca = a;
- const struct tep_cmdline *cb = b;
-
- if (ca->pid < cb->pid)
- return -1;
- if (ca->pid > cb->pid)
- return 1;
-
- return 0;
-}
-
-/* Looking for where to place the key */
-static int cmdline_slot_cmp(const void *a, const void *b)
-{
- const struct tep_cmdline *ca = a;
- const struct tep_cmdline *cb = b;
- const struct tep_cmdline *cb1 = cb + 1;
-
- if (ca->pid < cb->pid)
- return -1;
-
- if (ca->pid > cb->pid) {
- if (ca->pid <= cb1->pid)
- return 0;
- return 1;
- }
-
- return 0;
-}
-
-struct cmdline_list {
- struct cmdline_list *next;
- char *comm;
- int pid;
-};
-
-static int cmdline_init(struct tep_handle *tep)
-{
- struct cmdline_list *cmdlist = tep->cmdlist;
- struct cmdline_list *item;
- struct tep_cmdline *cmdlines;
- int i;
-
- cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count);
- if (!cmdlines)
- return -1;
-
- i = 0;
- while (cmdlist) {
- cmdlines[i].pid = cmdlist->pid;
- cmdlines[i].comm = cmdlist->comm;
- i++;
- item = cmdlist;
- cmdlist = cmdlist->next;
- free(item);
- }
-
- qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
-
- tep->cmdlines = cmdlines;
- tep->cmdlist = NULL;
-
- return 0;
-}
-
-static const char *find_cmdline(struct tep_handle *tep, int pid)
-{
- const struct tep_cmdline *comm;
- struct tep_cmdline key;
-
- if (!pid)
- return "<idle>";
-
- if (!tep->cmdlines && cmdline_init(tep))
- return "<not enough memory for cmdlines!>";
-
- key.pid = pid;
-
- comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
- sizeof(*tep->cmdlines), cmdline_cmp);
-
- if (comm)
- return comm->comm;
- return "<...>";
-}
-
-/**
- * tep_is_pid_registered - return if a pid has a cmdline registered
- * @tep: a handle to the trace event parser context
- * @pid: The pid to check if it has a cmdline registered with.
- *
- * Returns true if the pid has a cmdline mapped to it
- * false otherwise.
- */
-bool tep_is_pid_registered(struct tep_handle *tep, int pid)
-{
- const struct tep_cmdline *comm;
- struct tep_cmdline key;
-
- if (!pid)
- return true;
-
- if (!tep->cmdlines && cmdline_init(tep))
- return false;
-
- key.pid = pid;
-
- comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
- sizeof(*tep->cmdlines), cmdline_cmp);
-
- if (comm)
- return true;
- return false;
-}
-
-/*
- * If the command lines have been converted to an array, then
- * we must add this pid. This is much slower than when cmdlines
- * are added before the array is initialized.
- */
-static int add_new_comm(struct tep_handle *tep,
- const char *comm, int pid, bool override)
-{
- struct tep_cmdline *cmdlines = tep->cmdlines;
- struct tep_cmdline *cmdline;
- struct tep_cmdline key;
- char *new_comm;
- int cnt;
-
- if (!pid)
- return 0;
-
- /* avoid duplicates */
- key.pid = pid;
-
- cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count,
- sizeof(*tep->cmdlines), cmdline_cmp);
- if (cmdline) {
- if (!override) {
- errno = EEXIST;
- return -1;
- }
- new_comm = strdup(comm);
- if (!new_comm) {
- errno = ENOMEM;
- return -1;
- }
- free(cmdline->comm);
- cmdline->comm = new_comm;
-
- return 0;
- }
-
- cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1));
- if (!cmdlines) {
- errno = ENOMEM;
- return -1;
- }
- tep->cmdlines = cmdlines;
-
- key.comm = strdup(comm);
- if (!key.comm) {
- errno = ENOMEM;
- return -1;
- }
-
- if (!tep->cmdline_count) {
- /* no entries yet */
- tep->cmdlines[0] = key;
- tep->cmdline_count++;
- return 0;
- }
-
- /* Now find where we want to store the new cmdline */
- cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1,
- sizeof(*tep->cmdlines), cmdline_slot_cmp);
-
- cnt = tep->cmdline_count;
- if (cmdline) {
- /* cmdline points to the one before the spot we want */
- cmdline++;
- cnt -= cmdline - tep->cmdlines;
-
- } else {
- /* The new entry is either before or after the list */
- if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) {
- tep->cmdlines[tep->cmdline_count++] = key;
- return 0;
- }
- cmdline = &tep->cmdlines[0];
- }
- memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline)));
- *cmdline = key;
-
- tep->cmdline_count++;
-
- return 0;
-}
-
-static int _tep_register_comm(struct tep_handle *tep,
- const char *comm, int pid, bool override)
-{
- struct cmdline_list *item;
-
- if (tep->cmdlines)
- return add_new_comm(tep, comm, pid, override);
-
- item = malloc(sizeof(*item));
- if (!item)
- return -1;
-
- if (comm)
- item->comm = strdup(comm);
- else
- item->comm = strdup("<...>");
- if (!item->comm) {
- free(item);
- return -1;
- }
- item->pid = pid;
- item->next = tep->cmdlist;
-
- tep->cmdlist = item;
- tep->cmdline_count++;
-
- return 0;
-}
-
-/**
- * tep_register_comm - register a pid / comm mapping
- * @tep: a handle to the trace event parser context
- * @comm: the command line to register
- * @pid: the pid to map the command line to
- *
- * This adds a mapping to search for command line names with
- * a given pid. The comm is duplicated. If a command with the same pid
- * already exist, -1 is returned and errno is set to EEXIST
- */
-int tep_register_comm(struct tep_handle *tep, const char *comm, int pid)
-{
- return _tep_register_comm(tep, comm, pid, false);
-}
-
-/**
- * tep_override_comm - register a pid / comm mapping
- * @tep: a handle to the trace event parser context
- * @comm: the command line to register
- * @pid: the pid to map the command line to
- *
- * This adds a mapping to search for command line names with
- * a given pid. The comm is duplicated. If a command with the same pid
- * already exist, the command string is udapted with the new one
- */
-int tep_override_comm(struct tep_handle *tep, const char *comm, int pid)
-{
- if (!tep->cmdlines && cmdline_init(tep)) {
- errno = ENOMEM;
- return -1;
- }
- return _tep_register_comm(tep, comm, pid, true);
-}
-
-struct func_map {
- unsigned long long addr;
- char *func;
- char *mod;
-};
-
-struct func_list {
- struct func_list *next;
- unsigned long long addr;
- char *func;
- char *mod;
-};
-
-static int func_cmp(const void *a, const void *b)
-{
- const struct func_map *fa = a;
- const struct func_map *fb = b;
-
- if (fa->addr < fb->addr)
- return -1;
- if (fa->addr > fb->addr)
- return 1;
-
- return 0;
-}
-
-/*
- * We are searching for a record in between, not an exact
- * match.
- */
-static int func_bcmp(const void *a, const void *b)
-{
- const struct func_map *fa = a;
- const struct func_map *fb = b;
-
- if ((fa->addr == fb->addr) ||
-
- (fa->addr > fb->addr &&
- fa->addr < (fb+1)->addr))
- return 0;
-
- if (fa->addr < fb->addr)
- return -1;
-
- return 1;
-}
-
-static int func_map_init(struct tep_handle *tep)
-{
- struct func_list *funclist;
- struct func_list *item;
- struct func_map *func_map;
- int i;
-
- func_map = malloc(sizeof(*func_map) * (tep->func_count + 1));
- if (!func_map)
- return -1;
-
- funclist = tep->funclist;
-
- i = 0;
- while (funclist) {
- func_map[i].func = funclist->func;
- func_map[i].addr = funclist->addr;
- func_map[i].mod = funclist->mod;
- i++;
- item = funclist;
- funclist = funclist->next;
- free(item);
- }
-
- qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp);
-
- /*
- * Add a special record at the end.
- */
- func_map[tep->func_count].func = NULL;
- func_map[tep->func_count].addr = 0;
- func_map[tep->func_count].mod = NULL;
-
- tep->func_map = func_map;
- tep->funclist = NULL;
-
- return 0;
-}
-
-static struct func_map *
-__find_func(struct tep_handle *tep, unsigned long long addr)
-{
- struct func_map *func;
- struct func_map key;
-
- if (!tep->func_map)
- func_map_init(tep);
-
- key.addr = addr;
-
- func = bsearch(&key, tep->func_map, tep->func_count,
- sizeof(*tep->func_map), func_bcmp);
-
- return func;
-}
-
-struct func_resolver {
- tep_func_resolver_t *func;
- void *priv;
- struct func_map map;
-};
-
-/**
- * tep_set_function_resolver - set an alternative function resolver
- * @tep: a handle to the trace event parser context
- * @resolver: function to be used
- * @priv: resolver function private state.
- *
- * Some tools may have already a way to resolve kernel functions, allow them to
- * keep using it instead of duplicating all the entries inside tep->funclist.
- */
-int tep_set_function_resolver(struct tep_handle *tep,
- tep_func_resolver_t *func, void *priv)
-{
- struct func_resolver *resolver = malloc(sizeof(*resolver));
-
- if (resolver == NULL)
- return -1;
-
- resolver->func = func;
- resolver->priv = priv;
-
- free(tep->func_resolver);
- tep->func_resolver = resolver;
-
- return 0;
-}
-
-/**
- * tep_reset_function_resolver - reset alternative function resolver
- * @tep: a handle to the trace event parser context
- *
- * Stop using whatever alternative resolver was set, use the default
- * one instead.
- */
-void tep_reset_function_resolver(struct tep_handle *tep)
-{
- free(tep->func_resolver);
- tep->func_resolver = NULL;
-}
-
-static struct func_map *
-find_func(struct tep_handle *tep, unsigned long long addr)
-{
- struct func_map *map;
-
- if (!tep->func_resolver)
- return __find_func(tep, addr);
-
- map = &tep->func_resolver->map;
- map->mod = NULL;
- map->addr = addr;
- map->func = tep->func_resolver->func(tep->func_resolver->priv,
- &map->addr, &map->mod);
- if (map->func == NULL)
- return NULL;
-
- return map;
-}
-
-/**
- * tep_find_function - find a function by a given address
- * @tep: a handle to the trace event parser context
- * @addr: the address to find the function with
- *
- * Returns a pointer to the function stored that has the given
- * address. Note, the address does not have to be exact, it
- * will select the function that would contain the address.
- */
-const char *tep_find_function(struct tep_handle *tep, unsigned long long addr)
-{
- struct func_map *map;
-
- map = find_func(tep, addr);
- if (!map)
- return NULL;
-
- return map->func;
-}
-
-/**
- * tep_find_function_address - find a function address by a given address
- * @tep: a handle to the trace event parser context
- * @addr: the address to find the function with
- *
- * Returns the address the function starts at. This can be used in
- * conjunction with tep_find_function to print both the function
- * name and the function offset.
- */
-unsigned long long
-tep_find_function_address(struct tep_handle *tep, unsigned long long addr)
-{
- struct func_map *map;
-
- map = find_func(tep, addr);
- if (!map)
- return 0;
-
- return map->addr;
-}
-
-/**
- * tep_register_function - register a function with a given address
- * @tep: a handle to the trace event parser context
- * @function: the function name to register
- * @addr: the address the function starts at
- * @mod: the kernel module the function may be in (NULL for none)
- *
- * This registers a function name with an address and module.
- * The @func passed in is duplicated.
- */
-int tep_register_function(struct tep_handle *tep, char *func,
- unsigned long long addr, char *mod)
-{
- struct func_list *item = malloc(sizeof(*item));
-
- if (!item)
- return -1;
-
- item->next = tep->funclist;
- item->func = strdup(func);
- if (!item->func)
- goto out_free;
-
- if (mod) {
- item->mod = strdup(mod);
- if (!item->mod)
- goto out_free_func;
- } else
- item->mod = NULL;
- item->addr = addr;
-
- tep->funclist = item;
- tep->func_count++;
-
- return 0;
-
-out_free_func:
- free(item->func);
- item->func = NULL;
-out_free:
- free(item);
- errno = ENOMEM;
- return -1;
-}
-
-/**
- * tep_print_funcs - print out the stored functions
- * @tep: a handle to the trace event parser context
- *
- * This prints out the stored functions.
- */
-void tep_print_funcs(struct tep_handle *tep)
-{
- int i;
-
- if (!tep->func_map)
- func_map_init(tep);
-
- for (i = 0; i < (int)tep->func_count; i++) {
- printf("%016llx %s",
- tep->func_map[i].addr,
- tep->func_map[i].func);
- if (tep->func_map[i].mod)
- printf(" [%s]\n", tep->func_map[i].mod);
- else
- printf("\n");
- }
-}
-
-struct printk_map {
- unsigned long long addr;
- char *printk;
-};
-
-struct printk_list {
- struct printk_list *next;
- unsigned long long addr;
- char *printk;
-};
-
-static int printk_cmp(const void *a, const void *b)
-{
- const struct printk_map *pa = a;
- const struct printk_map *pb = b;
-
- if (pa->addr < pb->addr)
- return -1;
- if (pa->addr > pb->addr)
- return 1;
-
- return 0;
-}
-
-static int printk_map_init(struct tep_handle *tep)
-{
- struct printk_list *printklist;
- struct printk_list *item;
- struct printk_map *printk_map;
- int i;
-
- printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1));
- if (!printk_map)
- return -1;
-
- printklist = tep->printklist;
-
- i = 0;
- while (printklist) {
- printk_map[i].printk = printklist->printk;
- printk_map[i].addr = printklist->addr;
- i++;
- item = printklist;
- printklist = printklist->next;
- free(item);
- }
-
- qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp);
-
- tep->printk_map = printk_map;
- tep->printklist = NULL;
-
- return 0;
-}
-
-static struct printk_map *
-find_printk(struct tep_handle *tep, unsigned long long addr)
-{
- struct printk_map *printk;
- struct printk_map key;
-
- if (!tep->printk_map && printk_map_init(tep))
- return NULL;
-
- key.addr = addr;
-
- printk = bsearch(&key, tep->printk_map, tep->printk_count,
- sizeof(*tep->printk_map), printk_cmp);
-
- return printk;
-}
-
-/**
- * tep_register_print_string - register a string by its address
- * @tep: a handle to the trace event parser context
- * @fmt: the string format to register
- * @addr: the address the string was located at
- *
- * This registers a string by the address it was stored in the kernel.
- * The @fmt passed in is duplicated.
- */
-int tep_register_print_string(struct tep_handle *tep, const char *fmt,
- unsigned long long addr)
-{
- struct printk_list *item = malloc(sizeof(*item));
- char *p;
-
- if (!item)
- return -1;
-
- item->next = tep->printklist;
- item->addr = addr;
-
- /* Strip off quotes and '\n' from the end */
- if (fmt[0] == '"')
- fmt++;
- item->printk = strdup(fmt);
- if (!item->printk)
- goto out_free;
-
- p = item->printk + strlen(item->printk) - 1;
- if (*p == '"')
- *p = 0;
-
- p -= 2;
- if (strcmp(p, "\\n") == 0)
- *p = 0;
-
- tep->printklist = item;
- tep->printk_count++;
-
- return 0;
-
-out_free:
- free(item);
- errno = ENOMEM;
- return -1;
-}
-
-/**
- * tep_print_printk - print out the stored strings
- * @tep: a handle to the trace event parser context
- *
- * This prints the string formats that were stored.
- */
-void tep_print_printk(struct tep_handle *tep)
-{
- int i;
-
- if (!tep->printk_map)
- printk_map_init(tep);
-
- for (i = 0; i < (int)tep->printk_count; i++) {
- printf("%016llx %s\n",
- tep->printk_map[i].addr,
- tep->printk_map[i].printk);
- }
-}
-
-static struct tep_event *alloc_event(void)
-{
- return calloc(1, sizeof(struct tep_event));
-}
-
-static int add_event(struct tep_handle *tep, struct tep_event *event)
-{
- int i;
- struct tep_event **events = realloc(tep->events, sizeof(event) *
- (tep->nr_events + 1));
- if (!events)
- return -1;
-
- tep->events = events;
-
- for (i = 0; i < tep->nr_events; i++) {
- if (tep->events[i]->id > event->id)
- break;
- }
- if (i < tep->nr_events)
- memmove(&tep->events[i + 1],
- &tep->events[i],
- sizeof(event) * (tep->nr_events - i));
-
- tep->events[i] = event;
- tep->nr_events++;
-
- event->tep = tep;
-
- return 0;
-}
-
-static int event_item_type(enum tep_event_type type)
-{
- switch (type) {
- case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE:
- return 1;
- case TEP_EVENT_ERROR ... TEP_EVENT_DELIM:
- default:
- return 0;
- }
-}
-
-static void free_flag_sym(struct tep_print_flag_sym *fsym)
-{
- struct tep_print_flag_sym *next;
-
- while (fsym) {
- next = fsym->next;
- free(fsym->value);
- free(fsym->str);
- free(fsym);
- fsym = next;
- }
-}
-
-static void free_arg(struct tep_print_arg *arg)
-{
- struct tep_print_arg *farg;
-
- if (!arg)
- return;
-
- switch (arg->type) {
- case TEP_PRINT_ATOM:
- free(arg->atom.atom);
- break;
- case TEP_PRINT_FIELD:
- free(arg->field.name);
- break;
- case TEP_PRINT_FLAGS:
- free_arg(arg->flags.field);
- free(arg->flags.delim);
- free_flag_sym(arg->flags.flags);
- break;
- case TEP_PRINT_SYMBOL:
- free_arg(arg->symbol.field);
- free_flag_sym(arg->symbol.symbols);
- break;
- case TEP_PRINT_HEX:
- case TEP_PRINT_HEX_STR:
- free_arg(arg->hex.field);
- free_arg(arg->hex.size);
- break;
- case TEP_PRINT_INT_ARRAY:
- free_arg(arg->int_array.field);
- free_arg(arg->int_array.count);
- free_arg(arg->int_array.el_size);
- break;
- case TEP_PRINT_TYPE:
- free(arg->typecast.type);
- free_arg(arg->typecast.item);
- break;
- case TEP_PRINT_STRING:
- case TEP_PRINT_BSTRING:
- free(arg->string.string);
- break;
- case TEP_PRINT_BITMASK:
- free(arg->bitmask.bitmask);
- break;
- case TEP_PRINT_DYNAMIC_ARRAY:
- case TEP_PRINT_DYNAMIC_ARRAY_LEN:
- free(arg->dynarray.index);
- break;
- case TEP_PRINT_OP:
- free(arg->op.op);
- free_arg(arg->op.left);
- free_arg(arg->op.right);
- break;
- case TEP_PRINT_FUNC:
- while (arg->func.args) {
- farg = arg->func.args;
- arg->func.args = farg->next;
- free_arg(farg);
- }
- break;
-
- case TEP_PRINT_NULL:
- default:
- break;
- }
-
- free(arg);
-}
-
-static enum tep_event_type get_type(int ch)
-{
- if (ch == '\n')
- return TEP_EVENT_NEWLINE;
- if (isspace(ch))
- return TEP_EVENT_SPACE;
- if (isalnum(ch) || ch == '_')
- return TEP_EVENT_ITEM;
- if (ch == '\'')
- return TEP_EVENT_SQUOTE;
- if (ch == '"')
- return TEP_EVENT_DQUOTE;
- if (!isprint(ch))
- return TEP_EVENT_NONE;
- if (ch == '(' || ch == ')' || ch == ',')
- return TEP_EVENT_DELIM;
-
- return TEP_EVENT_OP;
-}
-
-static int __read_char(void)
-{
- if (input_buf_ptr >= input_buf_siz)
- return -1;
-
- return input_buf[input_buf_ptr++];
-}
-
-/**
- * peek_char - peek at the next character that will be read
- *
- * Returns the next character read, or -1 if end of buffer.
- */
-__hidden int peek_char(void)
-{
- if (input_buf_ptr >= input_buf_siz)
- return -1;
-
- return input_buf[input_buf_ptr];
-}
-
-static int extend_token(char **tok, char *buf, int size)
-{
- char *newtok = realloc(*tok, size);
-
- if (!newtok) {
- free(*tok);
- *tok = NULL;
- return -1;
- }
-
- if (!*tok)
- strcpy(newtok, buf);
- else
- strcat(newtok, buf);
- *tok = newtok;
-
- return 0;
-}
-
-static enum tep_event_type force_token(const char *str, char **tok);
-
-static enum tep_event_type __read_token(char **tok)
-{
- char buf[BUFSIZ];
- int ch, last_ch, quote_ch, next_ch;
- int i = 0;
- int tok_size = 0;
- enum tep_event_type type;
-
- *tok = NULL;
-
-
- ch = __read_char();
- if (ch < 0)
- return TEP_EVENT_NONE;
-
- type = get_type(ch);
- if (type == TEP_EVENT_NONE)
- return type;
-
- buf[i++] = ch;
-
- switch (type) {
- case TEP_EVENT_NEWLINE:
- case TEP_EVENT_DELIM:
- if (asprintf(tok, "%c", ch) < 0)
- return TEP_EVENT_ERROR;
-
- return type;
-
- case TEP_EVENT_OP:
- switch (ch) {
- case '-':
- next_ch = peek_char();
- if (next_ch == '>') {
- buf[i++] = __read_char();
- break;
- }
- /* fall through */
- case '+':
- case '|':
- case '&':
- case '>':
- case '<':
- last_ch = ch;
- ch = peek_char();
- if (ch != last_ch)
- goto test_equal;
- buf[i++] = __read_char();
- switch (last_ch) {
- case '>':
- case '<':
- goto test_equal;
- default:
- break;
- }
- break;
- case '!':
- case '=':
- goto test_equal;
- default: /* what should we do instead? */
- break;
- }
- buf[i] = 0;
- *tok = strdup(buf);
- return type;
-
- test_equal:
- ch = peek_char();
- if (ch == '=')
- buf[i++] = __read_char();
- goto out;
-
- case TEP_EVENT_DQUOTE:
- case TEP_EVENT_SQUOTE:
- /* don't keep quotes */
- i--;
- quote_ch = ch;
- last_ch = 0;
- concat:
- do {
- if (i == (BUFSIZ - 1)) {
- buf[i] = 0;
- tok_size += BUFSIZ;
-
- if (extend_token(tok, buf, tok_size) < 0)
- return TEP_EVENT_NONE;
- i = 0;
- }
- last_ch = ch;
- ch = __read_char();
- buf[i++] = ch;
- /* the '\' '\' will cancel itself */
- if (ch == '\\' && last_ch == '\\')
- last_ch = 0;
- } while (ch != quote_ch || last_ch == '\\');
- /* remove the last quote */
- i--;
-
- /*
- * For strings (double quotes) check the next token.
- * If it is another string, concatinate the two.
- */
- if (type == TEP_EVENT_DQUOTE) {
- unsigned long long save_input_buf_ptr = input_buf_ptr;
-
- do {
- ch = __read_char();
- } while (isspace(ch));
- if (ch == '"')
- goto concat;
- input_buf_ptr = save_input_buf_ptr;
- }
-
- goto out;
-
- case TEP_EVENT_ERROR ... TEP_EVENT_SPACE:
- case TEP_EVENT_ITEM:
- default:
- break;
- }
-
- while (get_type(peek_char()) == type) {
- if (i == (BUFSIZ - 1)) {
- buf[i] = 0;
- tok_size += BUFSIZ;
-
- if (extend_token(tok, buf, tok_size) < 0)
- return TEP_EVENT_NONE;
- i = 0;
- }
- ch = __read_char();
- buf[i++] = ch;
- }
-
- out:
- buf[i] = 0;
- if (extend_token(tok, buf, tok_size + i + 1) < 0)
- return TEP_EVENT_NONE;
-
- if (type == TEP_EVENT_ITEM) {
- /*
- * Older versions of the kernel has a bug that
- * creates invalid symbols and will break the mac80211
- * parsing. This is a work around to that bug.
- *
- * See Linux kernel commit:
- * 811cb50baf63461ce0bdb234927046131fc7fa8b
- */
- if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
- free(*tok);
- *tok = NULL;
- return force_token("\"%s\" ", tok);
- } else if (strcmp(*tok, "STA_PR_FMT") == 0) {
- free(*tok);
- *tok = NULL;
- return force_token("\" sta:%pM\" ", tok);
- } else if (strcmp(*tok, "VIF_PR_FMT") == 0) {
- free(*tok);
- *tok = NULL;
- return force_token("\" vif:%p(%d)\" ", tok);
- }
- }
-
- return type;
-}
-
-static enum tep_event_type force_token(const char *str, char **tok)
-{
- const char *save_input_buf;
- unsigned long long save_input_buf_ptr;
- unsigned long long save_input_buf_siz;
- enum tep_event_type type;
-
- /* save off the current input pointers */
- save_input_buf = input_buf;
- save_input_buf_ptr = input_buf_ptr;
- save_input_buf_siz = input_buf_siz;
-
- init_input_buf(str, strlen(str));
-
- type = __read_token(tok);
-
- /* reset back to original token */
- input_buf = save_input_buf;
- input_buf_ptr = save_input_buf_ptr;
- input_buf_siz = save_input_buf_siz;
-
- return type;
-}
-
-/**
- * free_token - free a token returned by tep_read_token
- * @token: the token to free
- */
-__hidden void free_token(char *tok)
-{
- if (tok)
- free(tok);
-}
-
-/**
- * read_token - access to utilities to use the tep parser
- * @tok: The token to return
- *
- * This will parse tokens from the string given by
- * tep_init_data().
- *
- * Returns the token type.
- */
-__hidden enum tep_event_type read_token(char **tok)
-{
- enum tep_event_type type;
-
- for (;;) {
- type = __read_token(tok);
- if (type != TEP_EVENT_SPACE)
- return type;
-
- free_token(*tok);
- }
-
- /* not reached */
- *tok = NULL;
- return TEP_EVENT_NONE;
-}
-
-/* no newline */
-static enum tep_event_type read_token_item(char **tok)
-{
- enum tep_event_type type;
-
- for (;;) {
- type = __read_token(tok);
- if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE)
- return type;
- free_token(*tok);
- *tok = NULL;
- }
-
- /* not reached */
- *tok = NULL;
- return TEP_EVENT_NONE;
-}
-
-static int test_type(enum tep_event_type type, enum tep_event_type expect)
-{
- if (type != expect) {
- do_warning("Error: expected type %d but read %d",
- expect, type);
- return -1;
- }
- return 0;
-}
-
-static int test_type_token(enum tep_event_type type, const char *token,
- enum tep_event_type expect, const char *expect_tok)
-{
- if (type != expect) {
- do_warning("Error: expected type %d but read %d",
- expect, type);
- return -1;
- }
-
- if (strcmp(token, expect_tok) != 0) {
- do_warning("Error: expected '%s' but read '%s'",
- expect_tok, token);
- return -1;
- }
- return 0;
-}
-
-static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok)
-{
- enum tep_event_type type;
-
- if (newline_ok)
- type = read_token(tok);
- else
- type = read_token_item(tok);
- return test_type(type, expect);
-}
-
-static int read_expect_type(enum tep_event_type expect, char **tok)
-{
- return __read_expect_type(expect, tok, 1);
-}
-
-static int __read_expected(enum tep_event_type expect, const char *str,
- int newline_ok)
-{
- enum tep_event_type type;
- char *token;
- int ret;
-
- if (newline_ok)
- type = read_token(&token);
- else
- type = read_token_item(&token);
-
- ret = test_type_token(type, token, expect, str);
-
- free_token(token);
-
- return ret;
-}
-
-static int read_expected(enum tep_event_type expect, const char *str)
-{
- return __read_expected(expect, str, 1);
-}
-
-static int read_expected_item(enum tep_event_type expect, const char *str)
-{
- return __read_expected(expect, str, 0);
-}
-
-static char *event_read_name(void)
-{
- char *token;
-
- if (read_expected(TEP_EVENT_ITEM, "name") < 0)
- return NULL;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return NULL;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
-
- return token;
-
- fail:
- free_token(token);
- return NULL;
-}
-
-static int event_read_id(void)
-{
- char *token;
- int id;
-
- if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0)
- return -1;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return -1;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
-
- id = strtoul(token, NULL, 0);
- free_token(token);
- return id;
-
- fail:
- free_token(token);
- return -1;
-}
-
-static int field_is_string(struct tep_format_field *field)
-{
- if ((field->flags & TEP_FIELD_IS_ARRAY) &&
- (strstr(field->type, "char") || strstr(field->type, "u8") ||
- strstr(field->type, "s8")))
- return 1;
-
- return 0;
-}
-
-static int field_is_dynamic(struct tep_format_field *field)
-{
- if (strncmp(field->type, "__data_loc", 10) == 0)
- return 1;
-
- return 0;
-}
-
-static int field_is_long(struct tep_format_field *field)
-{
- /* includes long long */
- if (strstr(field->type, "long"))
- return 1;
-
- return 0;
-}
-
-static unsigned int type_size(const char *name)
-{
- /* This covers all TEP_FIELD_IS_STRING types. */
- static struct {
- const char *type;
- unsigned int size;
- } table[] = {
- { "u8", 1 },
- { "u16", 2 },
- { "u32", 4 },
- { "u64", 8 },
- { "s8", 1 },
- { "s16", 2 },
- { "s32", 4 },
- { "s64", 8 },
- { "char", 1 },
- { },
- };
- int i;
-
- for (i = 0; table[i].type; i++) {
- if (!strcmp(table[i].type, name))
- return table[i].size;
- }
-
- return 0;
-}
-
-static int append(char **buf, const char *delim, const char *str)
-{
- char *new_buf;
-
- new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1);
- if (!new_buf)
- return -1;
- strcat(new_buf, delim);
- strcat(new_buf, str);
- *buf = new_buf;
- return 0;
-}
-
-static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
-{
- struct tep_format_field *field = NULL;
- enum tep_event_type type;
- char *token;
- char *last_token;
- char *delim = " ";
- int count = 0;
- int ret;
-
- do {
- unsigned int size_dynamic = 0;
-
- type = read_token(&token);
- if (type == TEP_EVENT_NEWLINE) {
- free_token(token);
- return count;
- }
-
- count++;
-
- if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
- goto fail;
- free_token(token);
-
- type = read_token(&token);
- /*
- * The ftrace fields may still use the "special" name.
- * Just ignore it.
- */
- if (event->flags & TEP_EVENT_FL_ISFTRACE &&
- type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) {
- free_token(token);
- type = read_token(&token);
- }
-
- if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0)
- goto fail;
-
- free_token(token);
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
-
- last_token = token;
-
- field = calloc(1, sizeof(*field));
- if (!field)
- goto fail;
-
- field->event = event;
-
- /* read the rest of the type */
- for (;;) {
- type = read_token(&token);
- if (type == TEP_EVENT_ITEM ||
- (type == TEP_EVENT_OP && strcmp(token, "*") == 0) ||
- /*
- * Some of the ftrace fields are broken and have
- * an illegal "." in them.
- */
- (event->flags & TEP_EVENT_FL_ISFTRACE &&
- type == TEP_EVENT_OP && strcmp(token, ".") == 0)) {
-
- if (strcmp(token, "*") == 0)
- field->flags |= TEP_FIELD_IS_POINTER;
-
- if (field->type) {
- ret = append(&field->type, delim, last_token);
- free(last_token);
- if (ret < 0)
- goto fail;
- } else
- field->type = last_token;
- last_token = token;
- delim = " ";
- continue;
- }
-
- /* Handle __attribute__((user)) */
- if ((type == TEP_EVENT_DELIM) &&
- strcmp("__attribute__", last_token) == 0 &&
- token[0] == '(') {
- int depth = 1;
- int ret;
-
- ret = append(&field->type, " ", last_token);
- ret |= append(&field->type, "", "(");
- if (ret < 0)
- goto fail;
-
- delim = " ";
- while ((type = read_token(&token)) != TEP_EVENT_NONE) {
- if (type == TEP_EVENT_DELIM) {
- if (token[0] == '(')
- depth++;
- else if (token[0] == ')')
- depth--;
- if (!depth)
- break;
- ret = append(&field->type, "", token);
- delim = "";
- } else {
- ret = append(&field->type, delim, token);
- delim = " ";
- }
- if (ret < 0)
- goto fail;
- free(last_token);
- last_token = token;
- }
- continue;
- }
- break;
- }
-
- if (!field->type) {
- do_warning_event(event, "%s: no type found", __func__);
- goto fail;
- }
- field->name = field->alias = last_token;
-
- if (test_type(type, TEP_EVENT_OP))
- goto fail;
-
- if (strcmp(token, "[") == 0) {
- enum tep_event_type last_type = type;
- char *brackets = token;
-
- field->flags |= TEP_FIELD_IS_ARRAY;
-
- type = read_token(&token);
-
- if (type == TEP_EVENT_ITEM)
- field->arraylen = strtoul(token, NULL, 0);
- else
- field->arraylen = 0;
-
- while (strcmp(token, "]") != 0) {
- const char *delim;
-
- if (last_type == TEP_EVENT_ITEM &&
- type == TEP_EVENT_ITEM)
- delim = " ";
- else
- delim = "";
-
- last_type = type;
-
- ret = append(&brackets, delim, token);
- if (ret < 0) {
- free(brackets);
- goto fail;
- }
- /* We only care about the last token */
- field->arraylen = strtoul(token, NULL, 0);
- free_token(token);
- type = read_token(&token);
- if (type == TEP_EVENT_NONE) {
- free(brackets);
- do_warning_event(event, "failed to find token");
- goto fail;
- }
- }
-
- free_token(token);
-
- ret = append(&brackets, "", "]");
- if (ret < 0) {
- free(brackets);
- goto fail;
- }
-
- /* add brackets to type */
-
- type = read_token(&token);
- /*
- * If the next token is not an OP, then it is of
- * the format: type [] item;
- */
- if (type == TEP_EVENT_ITEM) {
- ret = append(&field->type, " ", field->name);
- if (ret < 0) {
- free(brackets);
- goto fail;
- }
- ret = append(&field->type, "", brackets);
-
- size_dynamic = type_size(field->name);
- free_token(field->name);
- field->name = field->alias = token;
- type = read_token(&token);
- } else {
- ret = append(&field->type, "", brackets);
- if (ret < 0) {
- free(brackets);
- goto fail;
- }
- }
- free(brackets);
- }
-
- if (field_is_string(field))
- field->flags |= TEP_FIELD_IS_STRING;
- if (field_is_dynamic(field))
- field->flags |= TEP_FIELD_IS_DYNAMIC;
- if (field_is_long(field))
- field->flags |= TEP_FIELD_IS_LONG;
-
- if (test_type_token(type, token, TEP_EVENT_OP, ";"))
- goto fail;
- free_token(token);
-
- if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
- goto fail_expect;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- goto fail_expect;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token))
- goto fail;
- field->offset = strtoul(token, NULL, 0);
- free_token(token);
-
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- goto fail_expect;
-
- if (read_expected(TEP_EVENT_ITEM, "size") < 0)
- goto fail_expect;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- goto fail_expect;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token))
- goto fail;
- field->size = strtoul(token, NULL, 0);
- free_token(token);
-
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- goto fail_expect;
-
- type = read_token(&token);
- if (type != TEP_EVENT_NEWLINE) {
- /* newer versions of the kernel have a "signed" type */
- if (test_type_token(type, token, TEP_EVENT_ITEM, "signed"))
- goto fail;
-
- free_token(token);
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- goto fail_expect;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token))
- goto fail;
-
- if (strtoul(token, NULL, 0))
- field->flags |= TEP_FIELD_IS_SIGNED;
-
- free_token(token);
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- goto fail_expect;
-
- if (read_expect_type(TEP_EVENT_NEWLINE, &token))
- goto fail;
- }
-
- free_token(token);
-
- if (field->flags & TEP_FIELD_IS_ARRAY) {
- if (field->arraylen)
- field->elementsize = field->size / field->arraylen;
- else if (field->flags & TEP_FIELD_IS_DYNAMIC)
- field->elementsize = size_dynamic;
- else if (field->flags & TEP_FIELD_IS_STRING)
- field->elementsize = 1;
- else if (field->flags & TEP_FIELD_IS_LONG)
- field->elementsize = event->tep ?
- event->tep->long_size :
- sizeof(long);
- } else
- field->elementsize = field->size;
-
- *fields = field;
- fields = &field->next;
-
- } while (1);
-
- return 0;
-
-fail:
- free_token(token);
-fail_expect:
- if (field) {
- free(field->type);
- free(field->name);
- free(field);
- }
- return -1;
-}
-
-static int event_read_format(struct tep_event *event)
-{
- char *token;
- int ret;
-
- if (read_expected_item(TEP_EVENT_ITEM, "format") < 0)
- return -1;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return -1;
-
- if (read_expect_type(TEP_EVENT_NEWLINE, &token))
- goto fail;
- free_token(token);
-
- ret = event_read_fields(event, &event->format.common_fields);
- if (ret < 0)
- return ret;
- event->format.nr_common = ret;
-
- ret = event_read_fields(event, &event->format.fields);
- if (ret < 0)
- return ret;
- event->format.nr_fields = ret;
-
- return 0;
-
- fail:
- free_token(token);
- return -1;
-}
-
-static enum tep_event_type
-process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
- char **tok, enum tep_event_type type);
-
-static enum tep_event_type
-process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- enum tep_event_type type;
- char *token;
-
- type = read_token(&token);
- *tok = token;
-
- return process_arg_token(event, arg, tok, type);
-}
-
-static enum tep_event_type
-process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok);
-
-/*
- * For __print_symbolic() and __print_flags, we need to completely
- * evaluate the first argument, which defines what to print next.
- */
-static enum tep_event_type
-process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- enum tep_event_type type;
-
- type = process_arg(event, arg, tok);
-
- while (type == TEP_EVENT_OP) {
- type = process_op(event, arg, tok);
- }
-
- return type;
-}
-
-static enum tep_event_type
-process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok)
-{
- struct tep_print_arg *arg, *left, *right;
- enum tep_event_type type;
- char *token = NULL;
-
- arg = alloc_arg();
- left = alloc_arg();
- right = alloc_arg();
-
- if (!arg || !left || !right) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- /* arg will be freed at out_free */
- free_arg(left);
- free_arg(right);
- goto out_free;
- }
-
- arg->type = TEP_PRINT_OP;
- arg->op.left = left;
- arg->op.right = right;
-
- *tok = NULL;
- type = process_arg(event, left, &token);
-
- again:
- if (type == TEP_EVENT_ERROR)
- goto out_free;
-
- /* Handle other operations in the arguments */
- if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) {
- type = process_op(event, left, &token);
- goto again;
- }
-
- if (test_type_token(type, token, TEP_EVENT_OP, ":"))
- goto out_free;
-
- arg->op.op = token;
-
- type = process_arg(event, right, &token);
-
- top->op.right = arg;
-
- *tok = token;
- return type;
-
-out_free:
- /* Top may point to itself */
- top->op.right = NULL;
- free_token(token);
- free_arg(arg);
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_array(struct tep_event *event, struct tep_print_arg *top, char **tok)
-{
- struct tep_print_arg *arg;
- enum tep_event_type type;
- char *token = NULL;
-
- arg = alloc_arg();
- if (!arg) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- /* '*tok' is set to top->op.op. No need to free. */
- *tok = NULL;
- return TEP_EVENT_ERROR;
- }
-
- *tok = NULL;
- type = process_arg(event, arg, &token);
- if (test_type_token(type, token, TEP_EVENT_OP, "]"))
- goto out_free;
-
- top->op.right = arg;
-
- free_token(token);
- type = read_token_item(&token);
- *tok = token;
-
- return type;
-
-out_free:
- free_token(token);
- free_arg(arg);
- return TEP_EVENT_ERROR;
-}
-
-static int get_op_prio(char *op)
-{
- if (!op[1]) {
- switch (op[0]) {
- case '~':
- case '!':
- return 4;
- case '*':
- case '/':
- case '%':
- return 6;
- case '+':
- case '-':
- return 7;
- /* '>>' and '<<' are 8 */
- case '<':
- case '>':
- return 9;
- /* '==' and '!=' are 10 */
- case '&':
- return 11;
- case '^':
- return 12;
- case '|':
- return 13;
- case '?':
- return 16;
- default:
- do_warning("unknown op '%c'", op[0]);
- return -1;
- }
- } else {
- if (strcmp(op, "++") == 0 ||
- strcmp(op, "--") == 0) {
- return 3;
- } else if (strcmp(op, ">>") == 0 ||
- strcmp(op, "<<") == 0) {
- return 8;
- } else if (strcmp(op, ">=") == 0 ||
- strcmp(op, "<=") == 0) {
- return 9;
- } else if (strcmp(op, "==") == 0 ||
- strcmp(op, "!=") == 0) {
- return 10;
- } else if (strcmp(op, "&&") == 0) {
- return 14;
- } else if (strcmp(op, "||") == 0) {
- return 15;
- } else {
- do_warning("unknown op '%s'", op);
- return -1;
- }
- }
-}
-
-static int set_op_prio(struct tep_print_arg *arg)
-{
-
- /* single ops are the greatest */
- if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL)
- arg->op.prio = 0;
- else
- arg->op.prio = get_op_prio(arg->op.op);
-
- return arg->op.prio;
-}
-
-/* Note, *tok does not get freed, but will most likely be saved */
-static enum tep_event_type
-process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- struct tep_print_arg *left, *right = NULL;
- enum tep_event_type type;
- char *token;
-
- /* the op is passed in via tok */
- token = *tok;
-
- if (arg->type == TEP_PRINT_OP && !arg->op.left) {
- /* handle single op */
- if (token[1]) {
- do_warning_event(event, "bad op token %s", token);
- goto out_free;
- }
- switch (token[0]) {
- case '~':
- case '!':
- case '+':
- case '-':
- break;
- default:
- do_warning_event(event, "bad op token %s", token);
- goto out_free;
-
- }
-
- /* make an empty left */
- left = alloc_arg();
- if (!left)
- goto out_warn_free;
-
- left->type = TEP_PRINT_NULL;
- arg->op.left = left;
-
- right = alloc_arg();
- if (!right)
- goto out_warn_free;
-
- arg->op.right = right;
-
- /* do not free the token, it belongs to an op */
- *tok = NULL;
- type = process_arg(event, right, tok);
-
- } else if (strcmp(token, "?") == 0) {
-
- left = alloc_arg();
- if (!left)
- goto out_warn_free;
-
- /* copy the top arg to the left */
- *left = *arg;
-
- arg->type = TEP_PRINT_OP;
- arg->op.op = token;
- arg->op.left = left;
- arg->op.prio = 0;
-
- /* it will set arg->op.right */
- type = process_cond(event, arg, tok);
-
- } else if (strcmp(token, ">>") == 0 ||
- strcmp(token, "<<") == 0 ||
- strcmp(token, "&") == 0 ||
- strcmp(token, "|") == 0 ||
- strcmp(token, "&&") == 0 ||
- strcmp(token, "||") == 0 ||
- strcmp(token, "-") == 0 ||
- strcmp(token, "+") == 0 ||
- strcmp(token, "*") == 0 ||
- strcmp(token, "^") == 0 ||
- strcmp(token, "/") == 0 ||
- strcmp(token, "%") == 0 ||
- strcmp(token, "<") == 0 ||
- strcmp(token, ">") == 0 ||
- strcmp(token, "<=") == 0 ||
- strcmp(token, ">=") == 0 ||
- strcmp(token, "==") == 0 ||
- strcmp(token, "!=") == 0) {
-
- left = alloc_arg();
- if (!left)
- goto out_warn_free;
-
- /* copy the top arg to the left */
- *left = *arg;
-
- arg->type = TEP_PRINT_OP;
- arg->op.op = token;
- arg->op.left = left;
- arg->op.right = NULL;
-
- if (set_op_prio(arg) == -1) {
- event->flags |= TEP_EVENT_FL_FAILED;
- /* arg->op.op (= token) will be freed at out_free */
- arg->op.op = NULL;
- goto out_free;
- }
-
- type = read_token_item(&token);
- *tok = token;
-
- /* could just be a type pointer */
- if ((strcmp(arg->op.op, "*") == 0) &&
- type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
- int ret;
-
- if (left->type != TEP_PRINT_ATOM) {
- do_warning_event(event, "bad pointer type");
- goto out_free;
- }
- ret = append(&left->atom.atom, " ", "*");
- if (ret < 0)
- goto out_warn_free;
-
- free(arg->op.op);
- *arg = *left;
- free(left);
-
- return type;
- }
-
- right = alloc_arg();
- if (!right)
- goto out_warn_free;
-
- type = process_arg_token(event, right, tok, type);
- if (type == TEP_EVENT_ERROR) {
- free_arg(right);
- /* token was freed in process_arg_token() via *tok */
- token = NULL;
- goto out_free;
- }
-
- if (right->type == TEP_PRINT_OP &&
- get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
- struct tep_print_arg tmp;
-
- /* rotate ops according to the priority */
- arg->op.right = right->op.left;
-
- tmp = *arg;
- *arg = *right;
- *right = tmp;
-
- arg->op.left = right;
- } else {
- arg->op.right = right;
- }
-
- } else if (strcmp(token, "[") == 0) {
-
- left = alloc_arg();
- if (!left)
- goto out_warn_free;
-
- *left = *arg;
-
- arg->type = TEP_PRINT_OP;
- arg->op.op = token;
- arg->op.left = left;
-
- arg->op.prio = 0;
-
- /* it will set arg->op.right */
- type = process_array(event, arg, tok);
-
- } else {
- do_warning_event(event, "unknown op '%s'", token);
- event->flags |= TEP_EVENT_FL_FAILED;
- /* the arg is now the left side */
- goto out_free;
- }
-
- if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) {
- int prio;
-
- /* higher prios need to be closer to the root */
- prio = get_op_prio(*tok);
-
- if (prio > arg->op.prio)
- return process_op(event, arg, tok);
-
- return process_op(event, right, tok);
- }
-
- return type;
-
-out_warn_free:
- do_warning_event(event, "%s: not enough memory!", __func__);
-out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
- char **tok)
-{
- enum tep_event_type type;
- char *field;
- char *token;
-
- if (read_expected(TEP_EVENT_OP, "->") < 0)
- goto out_err;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto out_free;
- field = token;
-
- arg->type = TEP_PRINT_FIELD;
- arg->field.name = field;
-
- if (is_flag_field) {
- arg->field.field = tep_find_any_field(event, arg->field.name);
- arg->field.field->flags |= TEP_FIELD_IS_FLAG;
- is_flag_field = 0;
- } else if (is_symbolic_field) {
- arg->field.field = tep_find_any_field(event, arg->field.name);
- arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC;
- is_symbolic_field = 0;
- }
-
- type = read_token(&token);
- *tok = token;
-
- return type;
-
- out_free:
- free_token(token);
- out_err:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static int alloc_and_process_delim(struct tep_event *event, char *next_token,
- struct tep_print_arg **print_arg)
-{
- struct tep_print_arg *field;
- enum tep_event_type type;
- char *token;
- int ret = 0;
-
- field = alloc_arg();
- if (!field) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- errno = ENOMEM;
- return -1;
- }
-
- type = process_arg(event, field, &token);
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) {
- errno = EINVAL;
- ret = -1;
- free_arg(field);
- goto out_free_token;
- }
-
- *print_arg = field;
-
-out_free_token:
- free_token(token);
-
- return ret;
-}
-
-static char *arg_eval (struct tep_print_arg *arg);
-
-static unsigned long long
-eval_type_str(unsigned long long val, const char *type, int pointer)
-{
- int sign = 0;
- char *ref;
- int len;
-
- len = strlen(type);
-
- if (pointer) {
-
- if (type[len-1] != '*') {
- do_warning("pointer expected with non pointer type");
- return val;
- }
-
- ref = malloc(len);
- if (!ref) {
- do_warning("%s: not enough memory!", __func__);
- return val;
- }
- memcpy(ref, type, len);
-
- /* chop off the " *" */
- ref[len - 2] = 0;
-
- val = eval_type_str(val, ref, 0);
- free(ref);
- return val;
- }
-
- /* check if this is a pointer */
- if (type[len - 1] == '*')
- return val;
-
- /* Try to figure out the arg size*/
- if (strncmp(type, "struct", 6) == 0)
- /* all bets off */
- return val;
-
- if (strcmp(type, "u8") == 0)
- return val & 0xff;
-
- if (strcmp(type, "u16") == 0)
- return val & 0xffff;
-
- if (strcmp(type, "u32") == 0)
- return val & 0xffffffff;
-
- if (strcmp(type, "u64") == 0 ||
- strcmp(type, "s64") == 0)
- return val;
-
- if (strcmp(type, "s8") == 0)
- return (unsigned long long)(char)val & 0xff;
-
- if (strcmp(type, "s16") == 0)
- return (unsigned long long)(short)val & 0xffff;
-
- if (strcmp(type, "s32") == 0)
- return (unsigned long long)(int)val & 0xffffffff;
-
- if (strncmp(type, "unsigned ", 9) == 0) {
- sign = 0;
- type += 9;
- }
-
- if (strcmp(type, "char") == 0) {
- if (sign)
- return (unsigned long long)(char)val & 0xff;
- else
- return val & 0xff;
- }
-
- if (strcmp(type, "short") == 0) {
- if (sign)
- return (unsigned long long)(short)val & 0xffff;
- else
- return val & 0xffff;
- }
-
- if (strcmp(type, "int") == 0) {
- if (sign)
- return (unsigned long long)(int)val & 0xffffffff;
- else
- return val & 0xffffffff;
- }
-
- return val;
-}
-
-/*
- * Try to figure out the type.
- */
-static unsigned long long
-eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer)
-{
- if (arg->type != TEP_PRINT_TYPE) {
- do_warning("expected type argument");
- return 0;
- }
-
- return eval_type_str(val, arg->typecast.type, pointer);
-}
-
-static int arg_num_eval(struct tep_print_arg *arg, long long *val)
-{
- long long left, right;
- int ret = 1;
-
- switch (arg->type) {
- case TEP_PRINT_ATOM:
- *val = strtoll(arg->atom.atom, NULL, 0);
- break;
- case TEP_PRINT_TYPE:
- ret = arg_num_eval(arg->typecast.item, val);
- if (!ret)
- break;
- *val = eval_type(*val, arg, 0);
- break;
- case TEP_PRINT_OP:
- switch (arg->op.op[0]) {
- case '|':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- if (arg->op.op[1])
- *val = left || right;
- else
- *val = left | right;
- break;
- case '&':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- if (arg->op.op[1])
- *val = left && right;
- else
- *val = left & right;
- break;
- case '<':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- switch (arg->op.op[1]) {
- case 0:
- *val = left < right;
- break;
- case '<':
- *val = left << right;
- break;
- case '=':
- *val = left <= right;
- break;
- default:
- do_warning("unknown op '%s'", arg->op.op);
- ret = 0;
- }
- break;
- case '>':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- switch (arg->op.op[1]) {
- case 0:
- *val = left > right;
- break;
- case '>':
- *val = left >> right;
- break;
- case '=':
- *val = left >= right;
- break;
- default:
- do_warning("unknown op '%s'", arg->op.op);
- ret = 0;
- }
- break;
- case '=':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
-
- if (arg->op.op[1] != '=') {
- do_warning("unknown op '%s'", arg->op.op);
- ret = 0;
- } else
- *val = left == right;
- break;
- case '!':
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
-
- switch (arg->op.op[1]) {
- case '=':
- *val = left != right;
- break;
- default:
- do_warning("unknown op '%s'", arg->op.op);
- ret = 0;
- }
- break;
- case '-':
- /* check for negative */
- if (arg->op.left->type == TEP_PRINT_NULL)
- left = 0;
- else
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- *val = left - right;
- break;
- case '+':
- if (arg->op.left->type == TEP_PRINT_NULL)
- left = 0;
- else
- ret = arg_num_eval(arg->op.left, &left);
- if (!ret)
- break;
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- *val = left + right;
- break;
- case '~':
- ret = arg_num_eval(arg->op.right, &right);
- if (!ret)
- break;
- *val = ~right;
- break;
- default:
- do_warning("unknown op '%s'", arg->op.op);
- ret = 0;
- }
- break;
-
- case TEP_PRINT_NULL:
- case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
- case TEP_PRINT_STRING:
- case TEP_PRINT_BSTRING:
- case TEP_PRINT_BITMASK:
- default:
- do_warning("invalid eval type %d", arg->type);
- ret = 0;
-
- }
- return ret;
-}
-
-static char *arg_eval (struct tep_print_arg *arg)
-{
- long long val;
- static char buf[24];
-
- switch (arg->type) {
- case TEP_PRINT_ATOM:
- return arg->atom.atom;
- case TEP_PRINT_TYPE:
- return arg_eval(arg->typecast.item);
- case TEP_PRINT_OP:
- if (!arg_num_eval(arg, &val))
- break;
- sprintf(buf, "%lld", val);
- return buf;
-
- case TEP_PRINT_NULL:
- case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
- case TEP_PRINT_STRING:
- case TEP_PRINT_BSTRING:
- case TEP_PRINT_BITMASK:
- default:
- do_warning("invalid eval type %d", arg->type);
- break;
- }
-
- return NULL;
-}
-
-static enum tep_event_type
-process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok)
-{
- enum tep_event_type type;
- struct tep_print_arg *arg = NULL;
- struct tep_print_flag_sym *field;
- char *token = *tok;
- char *value;
-
- do {
- free_token(token);
- type = read_token_item(&token);
- if (test_type_token(type, token, TEP_EVENT_OP, "{"))
- break;
-
- arg = alloc_arg();
- if (!arg)
- goto out_free;
-
- free_token(token);
- type = process_arg(event, arg, &token);
-
- if (type == TEP_EVENT_OP)
- type = process_op(event, arg, &token);
-
- if (type == TEP_EVENT_ERROR)
- goto out_free;
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
- goto out_free;
-
- field = calloc(1, sizeof(*field));
- if (!field)
- goto out_free;
-
- value = arg_eval(arg);
- if (value == NULL)
- goto out_free_field;
- field->value = strdup(value);
- if (field->value == NULL)
- goto out_free_field;
-
- free_arg(arg);
- arg = alloc_arg();
- if (!arg)
- goto out_free;
-
- free_token(token);
- type = process_arg(event, arg, &token);
- if (test_type_token(type, token, TEP_EVENT_OP, "}"))
- goto out_free_field;
-
- value = arg_eval(arg);
- if (value == NULL)
- goto out_free_field;
- field->str = strdup(value);
- if (field->str == NULL)
- goto out_free_field;
- free_arg(arg);
- arg = NULL;
-
- *list = field;
- list = &field->next;
-
- free_token(token);
- type = read_token_item(&token);
- } while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0);
-
- *tok = token;
- return type;
-
-out_free_field:
- free_flag_sym(field);
-out_free:
- free_arg(arg);
- free_token(token);
- *tok = NULL;
-
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- struct tep_print_arg *field;
- enum tep_event_type type;
- char *token = NULL;
-
- memset(arg, 0, sizeof(*arg));
- arg->type = TEP_PRINT_FLAGS;
-
- field = alloc_arg();
- if (!field) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- goto out_free;
- }
-
- type = process_field_arg(event, field, &token);
-
- /* Handle operations in the first argument */
- while (type == TEP_EVENT_OP)
- type = process_op(event, field, &token);
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
- goto out_free_field;
- free_token(token);
-
- arg->flags.field = field;
-
- type = read_token_item(&token);
- if (event_item_type(type)) {
- arg->flags.delim = token;
- type = read_token_item(&token);
- }
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
- goto out_free;
-
- type = process_fields(event, &arg->flags.flags, &token);
- if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
- goto out_free;
-
- free_token(token);
- type = read_token_item(tok);
- return type;
-
-out_free_field:
- free_arg(field);
-out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- struct tep_print_arg *field;
- enum tep_event_type type;
- char *token = NULL;
-
- memset(arg, 0, sizeof(*arg));
- arg->type = TEP_PRINT_SYMBOL;
-
- field = alloc_arg();
- if (!field) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- goto out_free;
- }
-
- type = process_field_arg(event, field, &token);
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
- goto out_free_field;
-
- arg->symbol.field = field;
-
- type = process_fields(event, &arg->symbol.symbols, &token);
- if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
- goto out_free;
-
- free_token(token);
- type = read_token_item(tok);
- return type;
-
-out_free_field:
- free_arg(field);
-out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_hex_common(struct tep_event *event, struct tep_print_arg *arg,
- char **tok, enum tep_print_arg_type type)
-{
- memset(arg, 0, sizeof(*arg));
- arg->type = type;
-
- if (alloc_and_process_delim(event, ",", &arg->hex.field))
- goto out;
-
- if (alloc_and_process_delim(event, ")", &arg->hex.size))
- goto free_field;
-
- return read_token_item(tok);
-
-free_field:
- free_arg(arg->hex.field);
- arg->hex.field = NULL;
-out:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
-}
-
-static enum tep_event_type
-process_hex_str(struct tep_event *event, struct tep_print_arg *arg,
- char **tok)
-{
- return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
-}
-
-static enum tep_event_type
-process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- memset(arg, 0, sizeof(*arg));
- arg->type = TEP_PRINT_INT_ARRAY;
-
- if (alloc_and_process_delim(event, ",", &arg->int_array.field))
- goto out;
-
- if (alloc_and_process_delim(event, ",", &arg->int_array.count))
- goto free_field;
-
- if (alloc_and_process_delim(event, ")", &arg->int_array.el_size))
- goto free_size;
-
- return read_token_item(tok);
-
-free_size:
- free_arg(arg->int_array.count);
- arg->int_array.count = NULL;
-free_field:
- free_arg(arg->int_array.field);
- arg->int_array.field = NULL;
-out:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- struct tep_format_field *field;
- enum tep_event_type type;
- char *token;
-
- memset(arg, 0, sizeof(*arg));
- arg->type = TEP_PRINT_DYNAMIC_ARRAY;
-
- /*
- * The item within the parenthesis is another field that holds
- * the index into where the array starts.
- */
- type = read_token(&token);
- *tok = token;
- if (type != TEP_EVENT_ITEM)
- goto out_free;
-
- /* Find the field */
-
- field = tep_find_field(event, token);
- if (!field)
- goto out_free;
-
- arg->dynarray.field = field;
- arg->dynarray.index = 0;
-
- if (read_expected(TEP_EVENT_DELIM, ")") < 0)
- goto out_free;
-
- free_token(token);
- type = read_token_item(&token);
- *tok = token;
- if (type != TEP_EVENT_OP || strcmp(token, "[") != 0)
- return type;
-
- free_token(token);
- arg = alloc_arg();
- if (!arg) {
- do_warning_event(event, "%s: not enough memory!", __func__);
- *tok = NULL;
- return TEP_EVENT_ERROR;
- }
-
- type = process_arg(event, arg, &token);
- if (type == TEP_EVENT_ERROR)
- goto out_free_arg;
-
- if (!test_type_token(type, token, TEP_EVENT_OP, "]"))
- goto out_free_arg;
-
- free_token(token);
- type = read_token_item(tok);
- return type;
-
- out_free_arg:
- free_arg(arg);
- out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
- char **tok)
-{
- struct tep_format_field *field;
- enum tep_event_type type;
- char *token;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto out_free;
-
- arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN;
-
- /* Find the field */
- field = tep_find_field(event, token);
- if (!field)
- goto out_free;
-
- arg->dynarray.field = field;
- arg->dynarray.index = 0;
-
- if (read_expected(TEP_EVENT_DELIM, ")") < 0)
- goto out_err;
-
- free_token(token);
- type = read_token(&token);
- *tok = token;
-
- return type;
-
- out_free:
- free_token(token);
- out_err:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- struct tep_print_arg *item_arg;
- enum tep_event_type type;
- char *token;
-
- type = process_arg(event, arg, &token);
-
- if (type == TEP_EVENT_ERROR)
- goto out_free;
-
- if (type == TEP_EVENT_OP)
- type = process_op(event, arg, &token);
-
- if (type == TEP_EVENT_ERROR)
- goto out_free;
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
- goto out_free;
-
- free_token(token);
- type = read_token_item(&token);
-
- /*
- * If the next token is an item or another open paren, then
- * this was a typecast.
- */
- if (event_item_type(type) ||
- (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) {
-
- /* make this a typecast and contine */
-
- /* prevous must be an atom */
- if (arg->type != TEP_PRINT_ATOM) {
- do_warning_event(event, "previous needed to be TEP_PRINT_ATOM");
- goto out_free;
- }
-
- item_arg = alloc_arg();
- if (!item_arg) {
- do_warning_event(event, "%s: not enough memory!",
- __func__);
- goto out_free;
- }
-
- arg->type = TEP_PRINT_TYPE;
- arg->typecast.type = arg->atom.atom;
- arg->typecast.item = item_arg;
- type = process_arg_token(event, item_arg, &token, type);
-
- }
-
- *tok = token;
- return type;
-
- out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-
-static enum tep_event_type
-process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
- char **tok)
-{
- enum tep_event_type type;
- char *token;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto out_free;
-
- arg->type = TEP_PRINT_STRING;
- arg->string.string = token;
- arg->string.offset = -1;
-
- if (read_expected(TEP_EVENT_DELIM, ")") < 0)
- goto out_err;
-
- type = read_token(&token);
- *tok = token;
-
- return type;
-
- out_free:
- free_token(token);
- out_err:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
- char **tok)
-{
- enum tep_event_type type;
- char *token;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto out_free;
-
- arg->type = TEP_PRINT_BITMASK;
- arg->bitmask.bitmask = token;
- arg->bitmask.offset = -1;
-
- if (read_expected(TEP_EVENT_DELIM, ")") < 0)
- goto out_err;
-
- type = read_token(&token);
- *tok = token;
-
- return type;
-
- out_free:
- free_token(token);
- out_err:
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static struct tep_function_handler *
-find_func_handler(struct tep_handle *tep, char *func_name)
-{
- struct tep_function_handler *func;
-
- if (!tep)
- return NULL;
-
- for (func = tep->func_handlers; func; func = func->next) {
- if (strcmp(func->name, func_name) == 0)
- break;
- }
-
- return func;
-}
-
-static void remove_func_handler(struct tep_handle *tep, char *func_name)
-{
- struct tep_function_handler *func;
- struct tep_function_handler **next;
-
- next = &tep->func_handlers;
- while ((func = *next)) {
- if (strcmp(func->name, func_name) == 0) {
- *next = func->next;
- free_func_handle(func);
- break;
- }
- next = &func->next;
- }
-}
-
-static enum tep_event_type
-process_func_handler(struct tep_event *event, struct tep_function_handler *func,
- struct tep_print_arg *arg, char **tok)
-{
- struct tep_print_arg **next_arg;
- struct tep_print_arg *farg;
- enum tep_event_type type;
- char *token;
- int i;
-
- arg->type = TEP_PRINT_FUNC;
- arg->func.func = func;
-
- *tok = NULL;
-
- next_arg = &(arg->func.args);
- for (i = 0; i < func->nr_args; i++) {
- farg = alloc_arg();
- if (!farg) {
- do_warning_event(event, "%s: not enough memory!",
- __func__);
- return TEP_EVENT_ERROR;
- }
-
- type = process_arg(event, farg, &token);
- if (i < (func->nr_args - 1)) {
- if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) {
- do_warning_event(event,
- "Error: function '%s()' expects %d arguments but event %s only uses %d",
- func->name, func->nr_args,
- event->name, i + 1);
- goto err;
- }
- } else {
- if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) {
- do_warning_event(event,
- "Error: function '%s()' only expects %d arguments but event %s has more",
- func->name, func->nr_args, event->name);
- goto err;
- }
- }
-
- *next_arg = farg;
- next_arg = &(farg->next);
- free_token(token);
- }
-
- type = read_token(&token);
- *tok = token;
-
- return type;
-
-err:
- free_arg(farg);
- free_token(token);
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
- enum tep_event_type type;
- char *token = NULL;
-
- /* Handle __builtin_expect( cond, #) */
- type = process_arg(event, arg, &token);
-
- if (type != TEP_EVENT_DELIM || token[0] != ',')
- goto out_free;
-
- free_token(token);
-
- /* We don't care what the second parameter is of the __builtin_expect() */
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto out_free;
-
- if (read_expected(TEP_EVENT_DELIM, ")") < 0)
- goto out_free;
-
- free_token(token);
- type = read_token_item(tok);
- return type;
-
-out_free:
- free_token(token);
- *tok = NULL;
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_function(struct tep_event *event, struct tep_print_arg *arg,
- char *token, char **tok)
-{
- struct tep_function_handler *func;
-
- if (strcmp(token, "__print_flags") == 0) {
- free_token(token);
- is_flag_field = 1;
- return process_flags(event, arg, tok);
- }
- if (strcmp(token, "__print_symbolic") == 0) {
- free_token(token);
- is_symbolic_field = 1;
- return process_symbols(event, arg, tok);
- }
- if (strcmp(token, "__print_hex") == 0) {
- free_token(token);
- return process_hex(event, arg, tok);
- }
- if (strcmp(token, "__print_hex_str") == 0) {
- free_token(token);
- return process_hex_str(event, arg, tok);
- }
- if (strcmp(token, "__print_array") == 0) {
- free_token(token);
- return process_int_array(event, arg, tok);
- }
- if (strcmp(token, "__get_str") == 0) {
- free_token(token);
- return process_str(event, arg, tok);
- }
- if (strcmp(token, "__get_bitmask") == 0) {
- free_token(token);
- return process_bitmask(event, arg, tok);
- }
- if (strcmp(token, "__get_dynamic_array") == 0) {
- free_token(token);
- return process_dynamic_array(event, arg, tok);
- }
- if (strcmp(token, "__get_dynamic_array_len") == 0) {
- free_token(token);
- return process_dynamic_array_len(event, arg, tok);
- }
- if (strcmp(token, "__builtin_expect") == 0) {
- free_token(token);
- return process_builtin_expect(event, arg, tok);
- }
-
- func = find_func_handler(event->tep, token);
- if (func) {
- free_token(token);
- return process_func_handler(event, func, arg, tok);
- }
-
- do_warning_event(event, "function %s not defined", token);
- free_token(token);
- return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
- char **tok, enum tep_event_type type)
-{
- char *token;
- char *atom;
-
- token = *tok;
-
- switch (type) {
- case TEP_EVENT_ITEM:
- if (strcmp(token, "REC") == 0) {
- free_token(token);
- type = process_entry(event, arg, &token);
- break;
- }
- atom = token;
- /* test the next token */
- type = read_token_item(&token);
-
- /*
- * If the next token is a parenthesis, then this
- * is a function.
- */
- if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) {
- free_token(token);
- token = NULL;
- /* this will free atom. */
- type = process_function(event, arg, atom, &token);
- break;
- }
- /* atoms can be more than one token long */
- while (type == TEP_EVENT_ITEM) {
- int ret;
-
- ret = append(&atom, " ", token);
- if (ret < 0) {
- free(atom);
- *tok = NULL;
- free_token(token);
- return TEP_EVENT_ERROR;
- }
- free_token(token);
- type = read_token_item(&token);
- }
-
- arg->type = TEP_PRINT_ATOM;
- arg->atom.atom = atom;
- break;
-
- case TEP_EVENT_DQUOTE:
- case TEP_EVENT_SQUOTE:
- arg->type = TEP_PRINT_ATOM;
- arg->atom.atom = token;
- type = read_token_item(&token);
- break;
- case TEP_EVENT_DELIM:
- if (strcmp(token, "(") == 0) {
- free_token(token);
- type = process_paren(event, arg, &token);
- break;
- }
- case TEP_EVENT_OP:
- /* handle single ops */
- arg->type = TEP_PRINT_OP;
- arg->op.op = token;
- arg->op.left = NULL;
- type = process_op(event, arg, &token);
-
- /* On error, the op is freed */
- if (type == TEP_EVENT_ERROR)
- arg->op.op = NULL;
-
- /* return error type if errored */
- break;
-
- case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE:
- default:
- do_warning_event(event, "unexpected type %d", type);
- return TEP_EVENT_ERROR;
- }
- *tok = token;
-
- return type;
-}
-
-static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list)
-{
- enum tep_event_type type = TEP_EVENT_ERROR;
- struct tep_print_arg *arg;
- char *token;
- int args = 0;
-
- do {
- if (type == TEP_EVENT_NEWLINE) {
- type = read_token_item(&token);
- continue;
- }
-
- arg = alloc_arg();
- if (!arg) {
- do_warning_event(event, "%s: not enough memory!",
- __func__);
- return -1;
- }
-
- type = process_arg(event, arg, &token);
-
- if (type == TEP_EVENT_ERROR) {
- free_token(token);
- free_arg(arg);
- return -1;
- }
-
- *list = arg;
- args++;
-
- if (type == TEP_EVENT_OP) {
- type = process_op(event, arg, &token);
- free_token(token);
- if (type == TEP_EVENT_ERROR) {
- *list = NULL;
- free_arg(arg);
- return -1;
- }
- list = &arg->next;
- continue;
- }
-
- if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) {
- free_token(token);
- *list = arg;
- list = &arg->next;
- continue;
- }
- break;
- } while (type != TEP_EVENT_NONE);
-
- if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR)
- free_token(token);
-
- return args;
-}
-
-static int event_read_print(struct tep_event *event)
-{
- enum tep_event_type type;
- char *token;
- int ret;
-
- if (read_expected_item(TEP_EVENT_ITEM, "print") < 0)
- return -1;
-
- if (read_expected(TEP_EVENT_ITEM, "fmt") < 0)
- return -1;
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return -1;
-
- if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0)
- goto fail;
-
- concat:
- event->print_fmt.format = token;
- event->print_fmt.args = NULL;
-
- /* ok to have no arg */
- type = read_token_item(&token);
-
- if (type == TEP_EVENT_NONE)
- return 0;
-
- /* Handle concatenation of print lines */
- if (type == TEP_EVENT_DQUOTE) {
- char *cat;
-
- if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
- goto fail;
- free_token(token);
- free_token(event->print_fmt.format);
- event->print_fmt.format = NULL;
- token = cat;
- goto concat;
- }
-
- if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
- goto fail;
-
- free_token(token);
-
- ret = event_read_print_args(event, &event->print_fmt.args);
- if (ret < 0)
- return -1;
-
- return ret;
-
- fail:
- free_token(token);
- return -1;
-}
-
-/**
- * tep_find_common_field - return a common field by event
- * @event: handle for the event
- * @name: the name of the common field to return
- *
- * Returns a common field from the event by the given @name.
- * This only searches the common fields and not all field.
- */
-struct tep_format_field *
-tep_find_common_field(struct tep_event *event, const char *name)
-{
- struct tep_format_field *format;
-
- for (format = event->format.common_fields;
- format; format = format->next) {
- if (strcmp(format->name, name) == 0)
- break;
- }
-
- return format;
-}
-
-/**
- * tep_find_field - find a non-common field
- * @event: handle for the event
- * @name: the name of the non-common field
- *
- * Returns a non-common field by the given @name.
- * This does not search common fields.
- */
-struct tep_format_field *
-tep_find_field(struct tep_event *event, const char *name)
-{
- struct tep_format_field *format;
-
- for (format = event->format.fields;
- format; format = format->next) {
- if (strcmp(format->name, name) == 0)
- break;
- }
-
- return format;
-}
-
-/**
- * tep_find_any_field - find any field by name
- * @event: handle for the event
- * @name: the name of the field
- *
- * Returns a field by the given @name.
- * This searches the common field names first, then
- * the non-common ones if a common one was not found.
- */
-struct tep_format_field *
-tep_find_any_field(struct tep_event *event, const char *name)
-{
- struct tep_format_field *format;
-
- format = tep_find_common_field(event, name);
- if (format)
- return format;
- return tep_find_field(event, name);
-}
-
-/**
- * tep_read_number - read a number from data
- * @tep: a handle to the trace event parser context
- * @ptr: the raw data
- * @size: the size of the data that holds the number
- *
- * Returns the number (converted to host) from the
- * raw data.
- */
-unsigned long long tep_read_number(struct tep_handle *tep,
- const void *ptr, int size)
-{
- unsigned long long val;
-
- switch (size) {
- case 1:
- return *(unsigned char *)ptr;
- case 2:
- return data2host2(tep, *(unsigned short *)ptr);
- case 4:
- return data2host4(tep, *(unsigned int *)ptr);
- case 8:
- memcpy(&val, (ptr), sizeof(unsigned long long));
- return data2host8(tep, val);
- default:
- /* BUG! */
- return 0;
- }
-}
-
-/**
- * tep_read_number_field - read a number from data
- * @field: a handle to the field
- * @data: the raw data to read
- * @value: the value to place the number in
- *
- * Reads raw data according to a field offset and size,
- * and translates it into @value.
- *
- * Returns 0 on success, -1 otherwise.
- */
-int tep_read_number_field(struct tep_format_field *field, const void *data,
- unsigned long long *value)
-{
- if (!field)
- return -1;
- switch (field->size) {
- case 1:
- case 2:
- case 4:
- case 8:
- *value = tep_read_number(field->event->tep,
- data + field->offset, field->size);
- return 0;
- default:
- return -1;
- }
-}
-
-static int get_common_info(struct tep_handle *tep,
- const char *type, int *offset, int *size)
-{
- struct tep_event *event;
- struct tep_format_field *field;
-
- /*
- * All events should have the same common elements.
- * Pick any event to find where the type is;
- */
- if (!tep->events) {
- do_warning("no event_list!");
- return -1;
- }
-
- event = tep->events[0];
- field = tep_find_common_field(event, type);
- if (!field)
- return -1;
-
- *offset = field->offset;
- *size = field->size;
-
- return 0;
-}
-
-static int __parse_common(struct tep_handle *tep, void *data,
- int *size, int *offset, const char *name)
-{
- int ret;
-
- if (!*size) {
- ret = get_common_info(tep, name, offset, size);
- if (ret < 0)
- return ret;
- }
- return tep_read_number(tep, data + *offset, *size);
-}
-
-static int trace_parse_common_type(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->type_size, &tep->type_offset,
- "common_type");
-}
-
-static int parse_common_pid(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->pid_size, &tep->pid_offset,
- "common_pid");
-}
-
-static int parse_common_pc(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->pc_size, &tep->pc_offset,
- "common_preempt_count");
-}
-
-static int parse_common_flags(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->flags_size, &tep->flags_offset,
- "common_flags");
-}
-
-static int parse_common_lock_depth(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->ld_size, &tep->ld_offset,
- "common_lock_depth");
-}
-
-static int parse_common_migrate_disable(struct tep_handle *tep, void *data)
-{
- return __parse_common(tep, data,
- &tep->ld_size, &tep->ld_offset,
- "common_migrate_disable");
-}
-
-static int events_id_cmp(const void *a, const void *b);
-
-/**
- * tep_find_event - find an event by given id
- * @tep: a handle to the trace event parser context
- * @id: the id of the event
- *
- * Returns an event that has a given @id.
- */
-struct tep_event *tep_find_event(struct tep_handle *tep, int id)
-{
- struct tep_event **eventptr;
- struct tep_event key;
- struct tep_event *pkey = &key;
-
- /* Check cache first */
- if (tep->last_event && tep->last_event->id == id)
- return tep->last_event;
-
- key.id = id;
-
- eventptr = bsearch(&pkey, tep->events, tep->nr_events,
- sizeof(*tep->events), events_id_cmp);
-
- if (eventptr) {
- tep->last_event = *eventptr;
- return *eventptr;
- }
-
- return NULL;
-}
-
-/**
- * tep_find_event_by_name - find an event by given name
- * @tep: a handle to the trace event parser context
- * @sys: the system name to search for
- * @name: the name of the event to search for
- *
- * This returns an event with a given @name and under the system
- * @sys. If @sys is NULL the first event with @name is returned.
- */
-struct tep_event *
-tep_find_event_by_name(struct tep_handle *tep,
- const char *sys, const char *name)
-{
- struct tep_event *event = NULL;
- int i;
-
- if (tep->last_event &&
- strcmp(tep->last_event->name, name) == 0 &&
- (!sys || strcmp(tep->last_event->system, sys) == 0))
- return tep->last_event;
-
- for (i = 0; i < tep->nr_events; i++) {
- event = tep->events[i];
- if (strcmp(event->name, name) == 0) {
- if (!sys)
- break;
- if (strcmp(event->system, sys) == 0)
- break;
- }
- }
- if (i == tep->nr_events)
- event = NULL;
-
- tep->last_event = event;
- return event;
-}
-
-static unsigned long long
-eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
-{
- struct tep_handle *tep = event->tep;
- unsigned long long val = 0;
- unsigned long long left, right;
- struct tep_print_arg *typearg = NULL;
- struct tep_print_arg *larg;
- unsigned long offset;
- unsigned int field_size;
-
- switch (arg->type) {
- case TEP_PRINT_NULL:
- /* ?? */
- return 0;
- case TEP_PRINT_ATOM:
- return strtoull(arg->atom.atom, NULL, 0);
- case TEP_PRINT_FIELD:
- if (!arg->field.field) {
- arg->field.field = tep_find_any_field(event, arg->field.name);
- if (!arg->field.field)
- goto out_warning_field;
-
- }
- /* must be a number */
- val = tep_read_number(tep, data + arg->field.field->offset,
- arg->field.field->size);
- break;
- case TEP_PRINT_FLAGS:
- case TEP_PRINT_SYMBOL:
- case TEP_PRINT_INT_ARRAY:
- case TEP_PRINT_HEX:
- case TEP_PRINT_HEX_STR:
- break;
- case TEP_PRINT_TYPE:
- val = eval_num_arg(data, size, event, arg->typecast.item);
- return eval_type(val, arg, 0);
- case TEP_PRINT_STRING:
- case TEP_PRINT_BSTRING:
- case TEP_PRINT_BITMASK:
- return 0;
- case TEP_PRINT_FUNC: {
- struct trace_seq s;
- trace_seq_init(&s);
- val = process_defined_func(&s, data, size, event, arg);
- trace_seq_destroy(&s);
- return val;
- }
- case TEP_PRINT_OP:
- if (strcmp(arg->op.op, "[") == 0) {
- /*
- * Arrays are special, since we don't want
- * to read the arg as is.
- */
- right = eval_num_arg(data, size, event, arg->op.right);
-
- /* handle typecasts */
- larg = arg->op.left;
- while (larg->type == TEP_PRINT_TYPE) {
- if (!typearg)
- typearg = larg;
- larg = larg->typecast.item;
- }
-
- /* Default to long size */
- field_size = tep->long_size;
-
- switch (larg->type) {
- case TEP_PRINT_DYNAMIC_ARRAY:
- offset = tep_read_number(tep,
- data + larg->dynarray.field->offset,
- larg->dynarray.field->size);
- if (larg->dynarray.field->elementsize)
- field_size = larg->dynarray.field->elementsize;
- /*
- * The actual length of the dynamic array is stored
- * in the top half of the field, and the offset
- * is in the bottom half of the 32 bit field.
- */
- offset &= 0xffff;
- offset += right;
- break;
- case TEP_PRINT_FIELD:
- if (!larg->field.field) {
- larg->field.field =
- tep_find_any_field(event, larg->field.name);
- if (!larg->field.field) {
- arg = larg;
- goto out_warning_field;
- }
- }
- field_size = larg->field.field->elementsize;
- offset = larg->field.field->offset +
- right * larg->field.field->elementsize;
- break;
- default:
- goto default_op; /* oops, all bets off */
- }
- val = tep_read_number(tep,
- data + offset, field_size);
- if (typearg)
- val = eval_type(val, typearg, 1);
- break;
- } else if (strcmp(arg->op.op, "?") == 0) {
- left = eval_num_arg(data, size, event, arg->op.left);
- arg = arg->op.right;
- if (left)
- val = eval_num_arg(data, size, event, arg->op.left);
- else
- val = eval_num_arg(data, size, event, arg->op.right);
- break;
- }
- default_op:
- left = eval_num_arg(data, size, event, arg->op.left);
- right = eval_num_arg(data, size, event, arg->op.right);
- switch (arg->op.op[0]) {
- case '!':
- switch (arg->op.op[1]) {
- case 0:
- val = !right;
- break;
- case '=':
- val = left != right;
- break;
- default:
- goto out_warning_op;
- }
- break;
- case '~':
- val = ~right;
- break;
- case '|':
- if (arg->op.op[1])
- val = left || right;
- else
- val = left | right;
- break;
- case '&':
- if (arg->op.op[1])
- val = left && right;
- else
- val = left & right;
- break;
- case '<':
- switch (arg->op.op[1]) {
- case 0:
- val = left < right;
- break;
- case '<':
- val = left << right;
- break;
- case '=':
- val = left <= right;
- break;
- default:
- goto out_warning_op;
- }
- break;
- case '>':
- switch (arg->op.op[1]) {
- case 0:
- val = left > right;
- break;
- case '>':
- val = left >> right;
- break;
- case '=':
- val = left >= right;
- break;
- default:
- goto out_warning_op;
- }
- break;
- case '=':
- if (arg->op.op[1] != '=')
- goto out_warning_op;
-
- val = left == right;
- break;
- case '-':
- val = left - right;
- break;
- case '+':
- val = left + right;
- break;
- case '/':
- val = left / right;
- break;
- case '%':
- val = left % right;
- break;
- case '*':
- val = left * right;
- break;
- default:
- goto out_warning_op;
- }
- break;
- case TEP_PRINT_DYNAMIC_ARRAY_LEN:
- offset = tep_read_number(tep,
- data + arg->dynarray.field->offset,
- arg->dynarray.field->size);
- /*
- * The total allocated length of the dynamic array is
- * stored in the top half of the field, and the offset
- * is in the bottom half of the 32 bit field.
- */
- val = (unsigned long long)(offset >> 16);
- break;
- case TEP_PRINT_DYNAMIC_ARRAY:
- /* Without [], we pass the address to the dynamic data */
- offset = tep_read_number(tep,
- data + arg->dynarray.field->offset,
- arg->dynarray.field->size);
- /*
- * The total allocated length of the dynamic array is
- * stored in the top half of the field, and the offset
- * is in the bottom half of the 32 bit field.
- */
- offset &= 0xffff;
- val = (unsigned long long)((unsigned long)data + offset);
- break;
- default: /* not sure what to do there */
- return 0;
- }
- return val;
-
-out_warning_op:
- do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op);
- return 0;
-
-out_warning_field:
- do_warning_event(event, "%s: field %s not found",
- __func__, arg->field.name);
- return 0;
-}
-
-struct flag {
- const char *name;
- unsigned long long value;
-};
-
-static const struct flag flags[] = {
- { "HI_SOFTIRQ", 0 },
- { "TIMER_SOFTIRQ", 1 },
- { "NET_TX_SOFTIRQ", 2 },
- { "NET_RX_SOFTIRQ", 3 },
- { "BLOCK_SOFTIRQ", 4 },
- { "IRQ_POLL_SOFTIRQ", 5 },
- { "TASKLET_SOFTIRQ", 6 },
- { "SCHED_SOFTIRQ", 7 },
- { "HRTIMER_SOFTIRQ", 8 },
- { "RCU_SOFTIRQ", 9 },
-
- { "HRTIMER_NORESTART", 0 },
- { "HRTIMER_RESTART", 1 },
-};
-
-static long long eval_flag(const char *flag)
-{
- int i;
-
- /*
- * Some flags in the format files do not get converted.
- * If the flag is not numeric, see if it is something that
- * we already know about.
- */
- if (isdigit(flag[0]))
- return strtoull(flag, NULL, 0);
-
- for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
- if (strcmp(flags[i].name, flag) == 0)
- return flags[i].value;
-
- return -1LL;
-}
-
-static void print_str_to_seq(struct trace_seq *s, const char *format,
- int len_arg, const char *str)
-{
- if (len_arg >= 0)
- trace_seq_printf(s, format, len_arg, str);
- else
- trace_seq_printf(s, format, str);
-}
-
-static void print_bitmask_to_seq(struct tep_handle *tep,
- struct trace_seq *s, const char *format,
- int len_arg, const void *data, int size)
-{
- int nr_bits = size * 8;
- int str_size = (nr_bits + 3) / 4;
- int len = 0;
- char buf[3];
- char *str;
- int index;
- int i;
-
- /*
- * The kernel likes to put in commas every 32 bits, we
- * can do the same.
- */
- str_size += (nr_bits - 1) / 32;
-
- str = malloc(str_size + 1);
- if (!str) {
- do_warning("%s: not enough memory!", __func__);
- return;
- }
- str[str_size] = 0;
-
- /* Start out with -2 for the two chars per byte */
- for (i = str_size - 2; i >= 0; i -= 2) {
- /*
- * data points to a bit mask of size bytes.
- * In the kernel, this is an array of long words, thus
- * endianness is very important.
- */
- if (tep->file_bigendian)
- index = size - (len + 1);
- else
- index = len;
-
- snprintf(buf, 3, "%02x", *((unsigned char *)data + index));
- memcpy(str + i, buf, 2);
- len++;
- if (!(len & 3) && i > 0) {
- i--;
- str[i] = ',';
- }
- }
-
- if (len_arg >= 0)
- trace_seq_printf(s, format, len_arg, str);
- else
- trace_seq_printf(s, format, str);
-
- free(str);
-}
-
-static void print_str_arg(struct trace_seq *s, void *data, int size,
- struct tep_event *event, const char *format,
- int len_arg, struct tep_print_arg *arg)
-{
- struct tep_handle *tep = event->tep;
- struct tep_print_flag_sym *flag;
- struct tep_format_field *field;
- struct printk_map *printk;
- long long val, fval;
- unsigned long long addr;
- char *str;
- unsigned char *hex;
- int print;
- int i, len;
-
- switch (arg->type) {
- case TEP_PRINT_NULL:
- /* ?? */
- return;
- case TEP_PRINT_ATOM:
- print_str_to_seq(s, format, len_arg, arg->atom.atom);
- return;
- case TEP_PRINT_FIELD:
- field = arg->field.field;
- if (!field) {
- field = tep_find_any_field(event, arg->field.name);
- if (!field) {
- str = arg->field.name;
- goto out_warning_field;
- }
- arg->field.field = field;
- }
- /* Zero sized fields, mean the rest of the data */
- len = field->size ? : size - field->offset;
-
- /*
- * Some events pass in pointers. If this is not an array
- * and the size is the same as long_size, assume that it
- * is a pointer.
- */
- if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
- field->size == tep->long_size) {
-
- /* Handle heterogeneous recording and processing
- * architectures
- *
- * CASE I:
- * Traces recorded on 32-bit devices (32-bit
- * addressing) and processed on 64-bit devices:
- * In this case, only 32 bits should be read.
- *
- * CASE II:
- * Traces recorded on 64 bit devices and processed
- * on 32-bit devices:
- * In this case, 64 bits must be read.
- */
- addr = (tep->long_size == 8) ?
- *(unsigned long long *)(data + field->offset) :
- (unsigned long long)*(unsigned int *)(data + field->offset);
-
- /* Check if it matches a print format */
- printk = find_printk(tep, addr);
- if (printk)
- trace_seq_puts(s, printk->printk);
- else
- trace_seq_printf(s, "%llx", addr);
- break;
- }
- str = malloc(len + 1);
- if (!str) {
- do_warning_event(event, "%s: not enough memory!",
- __func__);
- return;
- }
- memcpy(str, data + field->offset, len);
- str[len] = 0;
- print_str_to_seq(s, format, len_arg, str);
- free(str);
- break;
- case TEP_PRINT_FLAGS:
- val = eval_num_arg(data, size, event, arg->flags.field);
- print = 0;
- for (flag = arg->flags.flags; flag; flag = flag->next) {
- fval = eval_flag(flag->value);
- if (!val && fval < 0) {
- print_str_to_seq(s, format, len_arg, flag->str);
- break;
- }
- if (fval > 0 && (val & fval) == fval) {
- if (print && arg->flags.delim)
- trace_seq_puts(s, arg->flags.delim);
- print_str_to_seq(s, format, len_arg, flag->str);
- print = 1;
- val &= ~fval;
- }
- }
- if (val) {
- if (print && arg->flags.delim)
- trace_seq_puts(s, arg->flags.delim);
- trace_seq_printf(s, "0x%llx", val);
- }
- break;
- case TEP_PRINT_SYMBOL:
- val = eval_num_arg(data, size, event, arg->symbol.field);
- for (flag = arg->symbol.symbols; flag; flag = flag->next) {
- fval = eval_flag(flag->value);
- if (val == fval) {
- print_str_to_seq(s, format, len_arg, flag->str);
- break;
- }
- }
- if (!flag)
- trace_seq_printf(s, "0x%llx", val);
- break;
- case TEP_PRINT_HEX:
- case TEP_PRINT_HEX_STR:
- if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
- unsigned long offset;
- offset = tep_read_number(tep,
- data + arg->hex.field->dynarray.field->offset,
- arg->hex.field->dynarray.field->size);
- hex = data + (offset & 0xffff);
- } else {
- field = arg->hex.field->field.field;
- if (!field) {
- str = arg->hex.field->field.name;
- field = tep_find_any_field(event, str);
- if (!field)
- goto out_warning_field;
- arg->hex.field->field.field = field;
- }
- hex = data + field->offset;
- }
- len = eval_num_arg(data, size, event, arg->hex.size);
- for (i = 0; i < len; i++) {
- if (i && arg->type == TEP_PRINT_HEX)
- trace_seq_putc(s, ' ');
- trace_seq_printf(s, "%02x", hex[i]);
- }
- break;
-
- case TEP_PRINT_INT_ARRAY: {
- void *num;
- int el_size;
-
- if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
- unsigned long offset;
- struct tep_format_field *field =
- arg->int_array.field->dynarray.field;
- offset = tep_read_number(tep,
- data + field->offset,
- field->size);
- num = data + (offset & 0xffff);
- } else {
- field = arg->int_array.field->field.field;
- if (!field) {
- str = arg->int_array.field->field.name;
- field = tep_find_any_field(event, str);
- if (!field)
- goto out_warning_field;
- arg->int_array.field->field.field = field;
- }
- num = data + field->offset;
- }
- len = eval_num_arg(data, size, event, arg->int_array.count);
- el_size = eval_num_arg(data, size, event,
- arg->int_array.el_size);
- for (i = 0; i < len; i++) {
- if (i)
- trace_seq_putc(s, ' ');
-
- if (el_size == 1) {
- trace_seq_printf(s, "%u", *(uint8_t *)num);
- } else if (el_size == 2) {
- trace_seq_printf(s, "%u", *(uint16_t *)num);
- } else if (el_size == 4) {
- trace_seq_printf(s, "%u", *(uint32_t *)num);
- } else if (el_size == 8) {
- trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
- } else {
- trace_seq_printf(s, "BAD SIZE:%d 0x%x",
- el_size, *(uint8_t *)num);
- el_size = 1;
- }
-
- num += el_size;
- }
- break;
- }
- case TEP_PRINT_TYPE:
- break;
- case TEP_PRINT_STRING: {
- int str_offset;
-
- if (arg->string.offset == -1) {
- struct tep_format_field *f;
-
- f = tep_find_any_field(event, arg->string.string);
- arg->string.offset = f->offset;
- }
- str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
- str_offset &= 0xffff;
- print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
- break;
- }
- case TEP_PRINT_BSTRING:
- print_str_to_seq(s, format, len_arg, arg->string.string);
- break;
- case TEP_PRINT_BITMASK: {
- int bitmask_offset;
- int bitmask_size;
-
- if (arg->bitmask.offset == -1) {
- struct tep_format_field *f;
-
- f = tep_find_any_field(event, arg->bitmask.bitmask);
- arg->bitmask.offset = f->offset;
- }
- bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
- bitmask_size = bitmask_offset >> 16;
- bitmask_offset &= 0xffff;
- print_bitmask_to_seq(tep, s, format, len_arg,
- data + bitmask_offset, bitmask_size);
- break;
- }
- case TEP_PRINT_OP:
- /*
- * The only op for string should be ? :
- */
- if (arg->op.op[0] != '?')
- return;
- val = eval_num_arg(data, size, event, arg->op.left);
- if (val)
- print_str_arg(s, data, size, event,
- format, len_arg, arg->op.right->op.left);
- else
- print_str_arg(s, data, size, event,
- format, len_arg, arg->op.right->op.right);
- break;
- case TEP_PRINT_FUNC:
- process_defined_func(s, data, size, event, arg);
- break;
- default:
- /* well... */
- break;
- }
-
- return;
-
-out_warning_field:
- do_warning_event(event, "%s: field %s not found",
- __func__, arg->field.name);
-}
-
-static unsigned long long
-process_defined_func(struct trace_seq *s, void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
-{
- struct tep_function_handler *func_handle = arg->func.func;
- struct func_params *param;
- unsigned long long *args;
- unsigned long long ret;
- struct tep_print_arg *farg;
- struct trace_seq str;
- struct save_str {
- struct save_str *next;
- char *str;
- } *strings = NULL, *string;
- int i;
-
- if (!func_handle->nr_args) {
- ret = (*func_handle->func)(s, NULL);
- goto out;
- }
-
- farg = arg->func.args;
- param = func_handle->params;
-
- ret = ULLONG_MAX;
- args = malloc(sizeof(*args) * func_handle->nr_args);
- if (!args)
- goto out;
-
- for (i = 0; i < func_handle->nr_args; i++) {
- switch (param->type) {
- case TEP_FUNC_ARG_INT:
- case TEP_FUNC_ARG_LONG:
- case TEP_FUNC_ARG_PTR:
- args[i] = eval_num_arg(data, size, event, farg);
- break;
- case TEP_FUNC_ARG_STRING:
- trace_seq_init(&str);
- print_str_arg(&str, data, size, event, "%s", -1, farg);
- trace_seq_terminate(&str);
- string = malloc(sizeof(*string));
- if (!string) {
- do_warning_event(event, "%s(%d): malloc str",
- __func__, __LINE__);
- goto out_free;
- }
- string->next = strings;
- string->str = strdup(str.buffer);
- if (!string->str) {
- free(string);
- do_warning_event(event, "%s(%d): malloc str",
- __func__, __LINE__);
- goto out_free;
- }
- args[i] = (uintptr_t)string->str;
- strings = string;
- trace_seq_destroy(&str);
- break;
- default:
- /*
- * Something went totally wrong, this is not
- * an input error, something in this code broke.
- */
- do_warning_event(event, "Unexpected end of arguments\n");
- goto out_free;
- }
- farg = farg->next;
- param = param->next;
- }
-
- ret = (*func_handle->func)(s, args);
-out_free:
- free(args);
- while (strings) {
- string = strings;
- strings = string->next;
- free(string->str);
- free(string);
- }
-
- out:
- /* TBD : handle return type here */
- return ret;
-}
-
-static void free_args(struct tep_print_arg *args)
-{
- struct tep_print_arg *next;
-
- while (args) {
- next = args->next;
-
- free_arg(args);
- args = next;
- }
-}
-
-static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
-{
- struct tep_handle *tep = event->tep;
- struct tep_format_field *field, *ip_field;
- struct tep_print_arg *args, *arg, **next;
- unsigned long long ip, val;
- char *ptr;
- void *bptr;
- int vsize = 0;
-
- field = tep->bprint_buf_field;
- ip_field = tep->bprint_ip_field;
-
- if (!field) {
- field = tep_find_field(event, "buf");
- if (!field) {
- do_warning_event(event, "can't find buffer field for binary printk");
- return NULL;
- }
- ip_field = tep_find_field(event, "ip");
- if (!ip_field) {
- do_warning_event(event, "can't find ip field for binary printk");
- return NULL;
- }
- tep->bprint_buf_field = field;
- tep->bprint_ip_field = ip_field;
- }
-
- ip = tep_read_number(tep, data + ip_field->offset, ip_field->size);
-
- /*
- * The first arg is the IP pointer.
- */
- args = alloc_arg();
- if (!args) {
- do_warning_event(event, "%s(%d): not enough memory!",
- __func__, __LINE__);
- return NULL;
- }
- arg = args;
- arg->next = NULL;
- next = &arg->next;
-
- arg->type = TEP_PRINT_ATOM;
-
- if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
- goto out_free;
-
- /* skip the first "%ps: " */
- for (ptr = fmt + 5, bptr = data + field->offset;
- bptr < data + size && *ptr; ptr++) {
- int ls = 0;
-
- if (*ptr == '%') {
- process_again:
- ptr++;
- switch (*ptr) {
- case '%':
- break;
- case 'l':
- ls++;
- goto process_again;
- case 'L':
- ls = 2;
- goto process_again;
- case '0' ... '9':
- goto process_again;
- case '.':
- goto process_again;
- case 'z':
- case 'Z':
- ls = 1;
- goto process_again;
- case 'p':
- ls = 1;
- if (isalnum(ptr[1])) {
- ptr++;
- /* Check for special pointers */
- switch (*ptr) {
- case 's':
- case 'S':
- case 'x':
- break;
- case 'f':
- case 'F':
- /*
- * Pre-5.5 kernels use %pf and
- * %pF for printing symbols
- * while kernels since 5.5 use
- * %pfw for fwnodes. So check
- * %p[fF] isn't followed by 'w'.
- */
- if (ptr[1] != 'w')
- break;
- /* fall through */
- default:
- /*
- * Older kernels do not process
- * dereferenced pointers.
- * Only process if the pointer
- * value is a printable.
- */
- if (isprint(*(char *)bptr))
- goto process_string;
- }
- }
- /* fall through */
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- switch (ls) {
- case 0:
- vsize = 4;
- break;
- case 1:
- vsize = tep->long_size;
- break;
- case 2:
- vsize = 8;
- break;
- default:
- vsize = ls; /* ? */
- break;
- }
- /* fall through */
- case '*':
- if (*ptr == '*')
- vsize = 4;
-
- /* the pointers are always 4 bytes aligned */
- bptr = (void *)(((unsigned long)bptr + 3) &
- ~3);
- val = tep_read_number(tep, bptr, vsize);
- bptr += vsize;
- arg = alloc_arg();
- if (!arg) {
- do_warning_event(event, "%s(%d): not enough memory!",
- __func__, __LINE__);
- goto out_free;
- }
- arg->next = NULL;
- arg->type = TEP_PRINT_ATOM;
- if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
- free(arg);
- goto out_free;
- }
- *next = arg;
- next = &arg->next;
- /*
- * The '*' case means that an arg is used as the length.
- * We need to continue to figure out for what.
- */
- if (*ptr == '*')
- goto process_again;
-
- break;
- case 's':
- process_string:
- arg = alloc_arg();
- if (!arg) {
- do_warning_event(event, "%s(%d): not enough memory!",
- __func__, __LINE__);
- goto out_free;
- }
- arg->next = NULL;
- arg->type = TEP_PRINT_BSTRING;
- arg->string.string = strdup(bptr);
- if (!arg->string.string)
- goto out_free;
- bptr += strlen(bptr) + 1;
- *next = arg;
- next = &arg->next;
- default:
- break;
- }
- }
- }
-
- return args;
-
-out_free:
- free_args(args);
- return NULL;
-}
-
-static char *
-get_bprint_format(void *data, int size __maybe_unused,
- struct tep_event *event)
-{
- struct tep_handle *tep = event->tep;
- unsigned long long addr;
- struct tep_format_field *field;
- struct printk_map *printk;
- char *format;
-
- field = tep->bprint_fmt_field;
-
- if (!field) {
- field = tep_find_field(event, "fmt");
- if (!field) {
- do_warning_event(event, "can't find format field for binary printk");
- return NULL;
- }
- tep->bprint_fmt_field = field;
- }
-
- addr = tep_read_number(tep, data + field->offset, field->size);
-
- printk = find_printk(tep, addr);
- if (!printk) {
- if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0)
- return NULL;
- return format;
- }
-
- if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0)
- return NULL;
-
- return format;
-}
-
-static int print_mac_arg(struct trace_seq *s, const char *format,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
- bool reverse = false;
- unsigned char *buf;
- int ret = 0;
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return 0;
- }
-
- if (arg->type != TEP_PRINT_FIELD) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
- arg->type);
- return 0;
- }
-
- if (format[0] == 'm') {
- fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
- } else if (format[0] == 'M' && format[1] == 'F') {
- fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
- ret++;
- }
- if (format[1] == 'R') {
- reverse = true;
- ret++;
- }
-
- if (!arg->field.field) {
- arg->field.field =
- tep_find_any_field(event, arg->field.name);
- if (!arg->field.field) {
- do_warning_event(event, "%s: field %s not found",
- __func__, arg->field.name);
- return ret;
- }
- }
- if (arg->field.field->size != 6) {
- trace_seq_printf(s, "INVALIDMAC");
- return ret;
- }
-
- buf = data + arg->field.field->offset;
- if (reverse)
- trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
- else
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
-
- return ret;
-}
-
-static int parse_ip4_print_args(struct tep_handle *tep,
- const char *ptr, bool *reverse)
-{
- int ret = 0;
-
- *reverse = false;
-
- /* hnbl */
- switch (*ptr) {
- case 'h':
- if (tep->file_bigendian)
- *reverse = false;
- else
- *reverse = true;
- ret++;
- break;
- case 'l':
- *reverse = true;
- ret++;
- break;
- case 'n':
- case 'b':
- ret++;
- /* fall through */
- default:
- *reverse = false;
- break;
- }
-
- return ret;
-}
-
-static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
-{
- const char *fmt;
-
- if (i == 'i')
- fmt = "%03d.%03d.%03d.%03d";
- else
- fmt = "%d.%d.%d.%d";
-
- if (reverse)
- trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
- else
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
-
-}
-
-static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
-{
- return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) |
- (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL;
-}
-
-static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
-{
- return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
-}
-
-static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
-{
- int i, j, range;
- unsigned char zerolength[8];
- int longest = 1;
- int colonpos = -1;
- uint16_t word;
- uint8_t hi, lo;
- bool needcolon = false;
- bool useIPv4;
- struct in6_addr in6;
-
- memcpy(&in6, addr, sizeof(struct in6_addr));
-
- useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
-
- memset(zerolength, 0, sizeof(zerolength));
-
- if (useIPv4)
- range = 6;
- else
- range = 8;
-
- /* find position of longest 0 run */
- for (i = 0; i < range; i++) {
- for (j = i; j < range; j++) {
- if (in6.s6_addr16[j] != 0)
- break;
- zerolength[i]++;
- }
- }
- for (i = 0; i < range; i++) {
- if (zerolength[i] > longest) {
- longest = zerolength[i];
- colonpos = i;
- }
- }
- if (longest == 1) /* don't compress a single 0 */
- colonpos = -1;
-
- /* emit address */
- for (i = 0; i < range; i++) {
- if (i == colonpos) {
- if (needcolon || i == 0)
- trace_seq_printf(s, ":");
- trace_seq_printf(s, ":");
- needcolon = false;
- i += longest - 1;
- continue;
- }
- if (needcolon) {
- trace_seq_printf(s, ":");
- needcolon = false;
- }
- /* hex u16 without leading 0s */
- word = ntohs(in6.s6_addr16[i]);
- hi = word >> 8;
- lo = word & 0xff;
- if (hi)
- trace_seq_printf(s, "%x%02x", hi, lo);
- else
- trace_seq_printf(s, "%x", lo);
-
- needcolon = true;
- }
-
- if (useIPv4) {
- if (needcolon)
- trace_seq_printf(s, ":");
- print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
- }
-
- return;
-}
-
-static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
-{
- int j;
-
- for (j = 0; j < 16; j += 2) {
- trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]);
- if (i == 'I' && j < 14)
- trace_seq_printf(s, ":");
- }
-}
-
-/*
- * %pi4 print an IPv4 address with leading zeros
- * %pI4 print an IPv4 address without leading zeros
- * %pi6 print an IPv6 address without colons
- * %pI6 print an IPv6 address with colons
- * %pI6c print an IPv6 address in compressed form with colons
- * %pISpc print an IP address based on sockaddr; p adds port.
- */
-static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- bool reverse = false;
- unsigned char *buf;
- int ret;
-
- ret = parse_ip4_print_args(event->tep, ptr, &reverse);
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return ret;
- }
-
- if (arg->type != TEP_PRINT_FIELD) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return ret;
- }
-
- if (!arg->field.field) {
- arg->field.field =
- tep_find_any_field(event, arg->field.name);
- if (!arg->field.field) {
- do_warning("%s: field %s not found",
- __func__, arg->field.name);
- return ret;
- }
- }
-
- buf = data + arg->field.field->offset;
-
- if (arg->field.field->size != 4) {
- trace_seq_printf(s, "INVALIDIPv4");
- return ret;
- }
-
- print_ip4_addr(s, i, reverse, buf);
- return ret;
-
-}
-
-static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- char have_c = 0;
- unsigned char *buf;
- int rc = 0;
-
- /* pI6c */
- if (i == 'I' && *ptr == 'c') {
- have_c = 1;
- ptr++;
- rc++;
- }
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return rc;
- }
-
- if (arg->type != TEP_PRINT_FIELD) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return rc;
- }
-
- if (!arg->field.field) {
- arg->field.field =
- tep_find_any_field(event, arg->field.name);
- if (!arg->field.field) {
- do_warning("%s: field %s not found",
- __func__, arg->field.name);
- return rc;
- }
- }
-
- buf = data + arg->field.field->offset;
-
- if (arg->field.field->size != 16) {
- trace_seq_printf(s, "INVALIDIPv6");
- return rc;
- }
-
- if (have_c)
- print_ip6c_addr(s, buf);
- else
- print_ip6_addr(s, i, buf);
-
- return rc;
-}
-
-static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- char have_c = 0, have_p = 0;
- unsigned char *buf;
- struct sockaddr_storage *sa;
- bool reverse = false;
- int rc = 0;
- int ret;
-
- /* pISpc */
- if (i == 'I') {
- if (*ptr == 'p') {
- have_p = 1;
- ptr++;
- rc++;
- }
- if (*ptr == 'c') {
- have_c = 1;
- ptr++;
- rc++;
- }
- }
- ret = parse_ip4_print_args(event->tep, ptr, &reverse);
- ptr += ret;
- rc += ret;
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return rc;
- }
-
- if (arg->type != TEP_PRINT_FIELD) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return rc;
- }
-
- if (!arg->field.field) {
- arg->field.field =
- tep_find_any_field(event, arg->field.name);
- if (!arg->field.field) {
- do_warning("%s: field %s not found",
- __func__, arg->field.name);
- return rc;
- }
- }
-
- sa = (struct sockaddr_storage *) (data + arg->field.field->offset);
-
- if (sa->ss_family == AF_INET) {
- struct sockaddr_in *sa4 = (struct sockaddr_in *) sa;
-
- if (arg->field.field->size < sizeof(struct sockaddr_in)) {
- trace_seq_printf(s, "INVALIDIPv4");
- return rc;
- }
-
- print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
- if (have_p)
- trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
-
-
- } else if (sa->ss_family == AF_INET6) {
- struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa;
-
- if (arg->field.field->size < sizeof(struct sockaddr_in6)) {
- trace_seq_printf(s, "INVALIDIPv6");
- return rc;
- }
-
- if (have_p)
- trace_seq_printf(s, "[");
-
- buf = (unsigned char *) &sa6->sin6_addr;
- if (have_c)
- print_ip6c_addr(s, buf);
- else
- print_ip6_addr(s, i, buf);
-
- if (have_p)
- trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port));
- }
-
- return rc;
-}
-
-static int print_ip_arg(struct trace_seq *s, const char *ptr,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- char i = *ptr; /* 'i' or 'I' */
- int rc = 1;
-
- /* IP version */
- ptr++;
-
- switch (*ptr) {
- case '4':
- rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
- break;
- case '6':
- rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
- break;
- case 'S':
- rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
- break;
- default:
- return 0;
- }
-
- return rc;
-}
-
-static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
-static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-
-static int print_uuid_arg(struct trace_seq *s, const char *ptr,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- const int *index = uuid_index;
- char *format = "%02x";
- int ret = 0;
- char *buf;
- int i;
-
- switch (*(ptr + 1)) {
- case 'L':
- format = "%02X";
- /* fall through */
- case 'l':
- index = guid_index;
- ret++;
- break;
- case 'B':
- format = "%02X";
- /* fall through */
- case 'b':
- ret++;
- break;
- }
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return ret;
- }
-
- if (arg->type != TEP_PRINT_FIELD) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return ret;
- }
-
- if (!arg->field.field) {
- arg->field.field =
- tep_find_any_field(event, arg->field.name);
- if (!arg->field.field) {
- do_warning("%s: field %s not found",
- __func__, arg->field.name);
- return ret;
- }
- }
-
- if (arg->field.field->size != 16) {
- trace_seq_printf(s, "INVALIDUUID");
- return ret;
- }
-
- buf = data + arg->field.field->offset;
-
- for (i = 0; i < 16; i++) {
- trace_seq_printf(s, format, buf[index[i]] & 0xff);
- switch (i) {
- case 3:
- case 5:
- case 7:
- case 9:
- trace_seq_printf(s, "-");
- break;
- }
- }
-
- return ret;
-}
-
-static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg, int print_len)
-{
- int plen = print_len;
- char *delim = " ";
- int ret = 0;
- char *buf;
- int i;
- unsigned long offset;
- int arr_len;
-
- switch (*(ptr + 1)) {
- case 'C':
- delim = ":";
- ret++;
- break;
- case 'D':
- delim = "-";
- ret++;
- break;
- case 'N':
- delim = "";
- ret++;
- break;
- }
-
- if (arg->type == TEP_PRINT_FUNC) {
- process_defined_func(s, data, size, event, arg);
- return ret;
- }
-
- if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
- trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return ret;
- }
-
- offset = tep_read_number(event->tep,
- data + arg->dynarray.field->offset,
- arg->dynarray.field->size);
- arr_len = (unsigned long long)(offset >> 16);
- buf = data + (offset & 0xffff);
-
- if (arr_len < plen)
- plen = arr_len;
-
- if (plen < 1)
- return ret;
-
- trace_seq_printf(s, "%02x", buf[0] & 0xff);
- for (i = 1; i < plen; i++)
- trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
-
- return ret;
-}
-
-static int is_printable_array(char *p, unsigned int len)
-{
- unsigned int i;
-
- for (i = 0; i < len && p[i]; i++)
- if (!isprint(p[i]) && !isspace(p[i]))
- return 0;
- return 1;
-}
-
-void tep_print_field(struct trace_seq *s, void *data,
- struct tep_format_field *field)
-{
- unsigned long long val;
- unsigned int offset, len, i;
- struct tep_handle *tep = field->event->tep;
-
- if (field->flags & TEP_FIELD_IS_ARRAY) {
- offset = field->offset;
- len = field->size;
- if (field->flags & TEP_FIELD_IS_DYNAMIC) {
- val = tep_read_number(tep, data + offset, len);
- offset = val;
- len = offset >> 16;
- offset &= 0xffff;
- }
- if (field->flags & TEP_FIELD_IS_STRING &&
- is_printable_array(data + offset, len)) {
- trace_seq_printf(s, "%s", (char *)data + offset);
- } else {
- trace_seq_puts(s, "ARRAY[");
- for (i = 0; i < len; i++) {
- if (i)
- trace_seq_puts(s, ", ");
- trace_seq_printf(s, "%02x",
- *((unsigned char *)data + offset + i));
- }
- trace_seq_putc(s, ']');
- field->flags &= ~TEP_FIELD_IS_STRING;
- }
- } else {
- val = tep_read_number(tep, data + field->offset,
- field->size);
- if (field->flags & TEP_FIELD_IS_POINTER) {
- trace_seq_printf(s, "0x%llx", val);
- } else if (field->flags & TEP_FIELD_IS_SIGNED) {
- switch (field->size) {
- case 4:
- /*
- * If field is long then print it in hex.
- * A long usually stores pointers.
- */
- if (field->flags & TEP_FIELD_IS_LONG)
- trace_seq_printf(s, "0x%x", (int)val);
- else
- trace_seq_printf(s, "%d", (int)val);
- break;
- case 2:
- trace_seq_printf(s, "%2d", (short)val);
- break;
- case 1:
- trace_seq_printf(s, "%1d", (char)val);
- break;
- default:
- trace_seq_printf(s, "%lld", val);
- }
- } else {
- if (field->flags & TEP_FIELD_IS_LONG)
- trace_seq_printf(s, "0x%llx", val);
- else
- trace_seq_printf(s, "%llu", val);
- }
- }
-}
-
-void tep_print_fields(struct trace_seq *s, void *data,
- int size __maybe_unused, struct tep_event *event)
-{
- struct tep_format_field *field;
-
- field = event->format.fields;
- while (field) {
- trace_seq_printf(s, " %s=", field->name);
- tep_print_field(s, data, field);
- field = field->next;
- }
-}
-
-static int print_function(struct trace_seq *s, const char *format,
- void *data, int size, struct tep_event *event,
- struct tep_print_arg *arg)
-{
- struct func_map *func;
- unsigned long long val;
-
- val = eval_num_arg(data, size, event, arg);
- func = find_func(event->tep, val);
- if (func) {
- trace_seq_puts(s, func->func);
- if (*format == 'F' || *format == 'S')
- trace_seq_printf(s, "+0x%llx", val - func->addr);
- } else {
- if (event->tep->long_size == 4)
- trace_seq_printf(s, "0x%lx", (long)val);
- else
- trace_seq_printf(s, "0x%llx", (long long)val);
- }
-
- return 0;
-}
-
-static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
- void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
-{
- unsigned long long val;
- int ret = 1;
-
- if (arg->type == TEP_PRINT_BSTRING) {
- trace_seq_puts(s, arg->string.string);
- return 0;
- }
- while (*format) {
- if (*format == 'p') {
- format++;
- break;
- }
- format++;
- }
-
- switch (*format) {
- case 'F':
- case 'f':
- case 'S':
- case 's':
- ret += print_function(s, format, data, size, event, arg);
- break;
- case 'M':
- case 'm':
- ret += print_mac_arg(s, format, data, size, event, arg);
- break;
- case 'I':
- case 'i':
- ret += print_ip_arg(s, format, data, size, event, arg);
- break;
- case 'U':
- ret += print_uuid_arg(s, format, data, size, event, arg);
- break;
- case 'h':
- ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
- break;
- default:
- ret = 0;
- val = eval_num_arg(data, size, event, arg);
- trace_seq_printf(s, "%p", (void *)(intptr_t)val);
- break;
- }
-
- return ret;
-
-}
-
-static int print_arg_number(struct trace_seq *s, const char *format, int plen,
- void *data, int size, int ls,
- struct tep_event *event, struct tep_print_arg *arg)
-{
- unsigned long long val;
-
- val = eval_num_arg(data, size, event, arg);
-
- switch (ls) {
- case -2:
- if (plen >= 0)
- trace_seq_printf(s, format, plen, (char)val);
- else
- trace_seq_printf(s, format, (char)val);
- break;
- case -1:
- if (plen >= 0)
- trace_seq_printf(s, format, plen, (short)val);
- else
- trace_seq_printf(s, format, (short)val);
- break;
- case 0:
- if (plen >= 0)
- trace_seq_printf(s, format, plen, (int)val);
- else
- trace_seq_printf(s, format, (int)val);
- break;
- case 1:
- if (plen >= 0)
- trace_seq_printf(s, format, plen, (long)val);
- else
- trace_seq_printf(s, format, (long)val);
- break;
- case 2:
- if (plen >= 0)
- trace_seq_printf(s, format, plen, (long long)val);
- else
- trace_seq_printf(s, format, (long long)val);
- break;
- default:
- do_warning_event(event, "bad count (%d)", ls);
- event->flags |= TEP_EVENT_FL_FAILED;
- }
- return 0;
-}
-
-
-static void print_arg_string(struct trace_seq *s, const char *format, int plen,
- void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
-{
- struct trace_seq p;
-
- /* Use helper trace_seq */
- trace_seq_init(&p);
- print_str_arg(&p, data, size, event,
- format, plen, arg);
- trace_seq_terminate(&p);
- trace_seq_puts(s, p.buffer);
- trace_seq_destroy(&p);
-}
-
-static int parse_arg_format_pointer(const char *format)
-{
- int ret = 0;
- int index;
- int loop;
-
- switch (*format) {
- case 'F':
- case 'S':
- case 'f':
- case 's':
- ret++;
- break;
- case 'M':
- case 'm':
- /* [mM]R , [mM]F */
- switch (format[1]) {
- case 'R':
- case 'F':
- ret++;
- break;
- }
- ret++;
- break;
- case 'I':
- case 'i':
- index = 2;
- loop = 1;
- switch (format[1]) {
- case 'S':
- /*[S][pfs]*/
- while (loop) {
- switch (format[index]) {
- case 'p':
- case 'f':
- case 's':
- ret++;
- index++;
- break;
- default:
- loop = 0;
- break;
- }
- }
- /* fall through */
- case '4':
- /* [4S][hnbl] */
- switch (format[index]) {
- case 'h':
- case 'n':
- case 'l':
- case 'b':
- ret++;
- index++;
- break;
- }
- if (format[1] == '4') {
- ret++;
- break;
- }
- /* fall through */
- case '6':
- /* [6S]c */
- if (format[index] == 'c')
- ret++;
- ret++;
- break;
- }
- ret++;
- break;
- case 'U':
- switch (format[1]) {
- case 'L':
- case 'l':
- case 'B':
- case 'b':
- ret++;
- break;
- }
- ret++;
- break;
- case 'h':
- switch (format[1]) {
- case 'C':
- case 'D':
- case 'N':
- ret++;
- break;
- }
- ret++;
- break;
- default:
- break;
- }
-
- return ret;
-}
-
-static void free_parse_args(struct tep_print_parse *arg)
-{
- struct tep_print_parse *del;
-
- while (arg) {
- del = arg;
- arg = del->next;
- free(del->format);
- free(del);
- }
-}
-
-static int parse_arg_add(struct tep_print_parse **parse, char *format,
- enum tep_print_parse_type type,
- struct tep_print_arg *arg,
- struct tep_print_arg *len_as_arg,
- int ls)
-{
- struct tep_print_parse *parg = NULL;
-
- parg = calloc(1, sizeof(*parg));
- if (!parg)
- goto error;
- parg->format = strdup(format);
- if (!parg->format)
- goto error;
- parg->type = type;
- parg->arg = arg;
- parg->len_as_arg = len_as_arg;
- parg->ls = ls;
- *parse = parg;
- return 0;
-error:
- if (parg) {
- free(parg->format);
- free(parg);
- }
- return -1;
-}
-
-static int parse_arg_format(struct tep_print_parse **parse,
- struct tep_event *event,
- const char *format, struct tep_print_arg **arg)
-{
- struct tep_print_arg *len_arg = NULL;
- char print_format[32];
- const char *start = format;
- int ret = 0;
- int ls = 0;
- int res;
- int len;
-
- format++;
- ret++;
- for (; *format; format++) {
- switch (*format) {
- case '#':
- /* FIXME: need to handle properly */
- break;
- case 'h':
- ls--;
- break;
- case 'l':
- ls++;
- break;
- case 'L':
- ls = 2;
- break;
- case '.':
- case 'z':
- case 'Z':
- case '0' ... '9':
- case '-':
- break;
- case '*':
- /* The argument is the length. */
- if (!*arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- if (len_arg) {
- do_warning_event(event, "argument already matched");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- len_arg = *arg;
- *arg = (*arg)->next;
- break;
- case 'p':
- if (!*arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- res = parse_arg_format_pointer(format + 1);
- if (res > 0) {
- format += res;
- ret += res;
- }
- len = ((unsigned long)format + 1) -
- (unsigned long)start;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
- memcpy(print_format, start, len);
- print_format[len] = 0;
-
- parse_arg_add(parse, print_format,
- PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
- *arg = (*arg)->next;
- ret++;
- return ret;
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- if (!*arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
-
- len = ((unsigned long)format + 1) -
- (unsigned long)start;
-
- /* should never happen */
- if (len > 30) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
- memcpy(print_format, start, len);
- print_format[len] = 0;
-
- if (event->tep->long_size == 8 && ls == 1 &&
- sizeof(long) != 8) {
- char *p;
-
- /* make %l into %ll */
- if (ls == 1 && (p = strchr(print_format, 'l')))
- memmove(p+1, p, strlen(p)+1);
- ls = 2;
- }
- if (ls < -2 || ls > 2) {
- do_warning_event(event, "bad count (%d)", ls);
- event->flags |= TEP_EVENT_FL_FAILED;
- }
- parse_arg_add(parse, print_format,
- PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
- *arg = (*arg)->next;
- ret++;
- return ret;
- case 's':
- if (!*arg) {
- do_warning_event(event, "no matching argument");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
-
- len = ((unsigned long)format + 1) -
- (unsigned long)start;
-
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
-
- memcpy(print_format, start, len);
- print_format[len] = 0;
-
- parse_arg_add(parse, print_format,
- PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
- *arg = (*arg)->next;
- ret++;
- return ret;
- default:
- snprintf(print_format, 32, ">%c<", *format);
- parse_arg_add(parse, print_format,
- PRINT_FMT_STRING, NULL, NULL, 0);
- ret++;
- return ret;
- }
- ret++;
- }
-
-out_failed:
- return ret;
-
-}
-
-static int parse_arg_string(struct tep_print_parse **parse, const char *format)
-{
- struct trace_seq s;
- int ret = 0;
-
- trace_seq_init(&s);
- for (; *format; format++) {
- if (*format == '\\') {
- format++;
- ret++;
- switch (*format) {
- case 'n':
- trace_seq_putc(&s, '\n');
- break;
- case 't':
- trace_seq_putc(&s, '\t');
- break;
- case 'r':
- trace_seq_putc(&s, '\r');
- break;
- case '\\':
- trace_seq_putc(&s, '\\');
- break;
- default:
- trace_seq_putc(&s, *format);
- break;
- }
- } else if (*format == '%') {
- if (*(format + 1) == '%') {
- trace_seq_putc(&s, '%');
- format++;
- ret++;
- } else
- break;
- } else
- trace_seq_putc(&s, *format);
-
- ret++;
- }
- trace_seq_terminate(&s);
- parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
- trace_seq_destroy(&s);
-
- return ret;
-}
-
-static struct tep_print_parse *
-parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
-{
- struct tep_print_parse *parse_ret = NULL;
- struct tep_print_parse **parse = NULL;
- int ret;
- int len;
-
- len = strlen(format);
- while (*format) {
- if (!parse_ret)
- parse = &parse_ret;
- if (*format == '%' && *(format + 1) != '%')
- ret = parse_arg_format(parse, event, format, &arg);
- else
- ret = parse_arg_string(parse, format);
- if (*parse)
- parse = &((*parse)->next);
-
- len -= ret;
- if (len > 0)
- format += ret;
- else
- break;
- }
- return parse_ret;
-}
-
-static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
- void *data, int size, struct tep_event *event)
-{
- int len_arg;
-
- while (parse) {
- if (parse->len_as_arg)
- len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
- switch (parse->type) {
- case PRINT_FMT_ARG_DIGIT:
- print_arg_number(s, parse->format,
- parse->len_as_arg ? len_arg : -1, data,
- size, parse->ls, event, parse->arg);
- break;
- case PRINT_FMT_ARG_POINTER:
- print_arg_pointer(s, parse->format,
- parse->len_as_arg ? len_arg : 1,
- data, size, event, parse->arg);
- break;
- case PRINT_FMT_ARG_STRING:
- print_arg_string(s, parse->format,
- parse->len_as_arg ? len_arg : -1,
- data, size, event, parse->arg);
- break;
- case PRINT_FMT_STRING:
- default:
- trace_seq_printf(s, "%s", parse->format);
- break;
- }
- parse = parse->next;
- }
-}
-
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
-{
- struct tep_print_parse *parse = event->print_fmt.print_cache;
- struct tep_print_arg *args = NULL;
- char *bprint_fmt = NULL;
-
- if (event->flags & TEP_EVENT_FL_FAILED) {
- trace_seq_printf(s, "[FAILED TO PARSE]");
- tep_print_fields(s, data, size, event);
- return;
- }
-
- if (event->flags & TEP_EVENT_FL_ISBPRINT) {
- bprint_fmt = get_bprint_format(data, size, event);
- args = make_bprint_args(bprint_fmt, data, size, event);
- parse = parse_args(event, bprint_fmt, args);
- }
-
- print_event_cache(parse, s, data, size, event);
-
- if (event->flags & TEP_EVENT_FL_ISBPRINT) {
- free_parse_args(parse);
- free_args(args);
- free(bprint_fmt);
- }
-}
-
-/*
- * This parses out the Latency format (interrupts disabled,
- * need rescheduling, in hard/soft interrupt, preempt count
- * and lock depth) and places it into the trace_seq.
- */
-static void data_latency_format(struct tep_handle *tep, struct trace_seq *s,
- char *format, struct tep_record *record)
-{
- static int check_lock_depth = 1;
- static int check_migrate_disable = 1;
- static int lock_depth_exists;
- static int migrate_disable_exists;
- unsigned int lat_flags;
- struct trace_seq sq;
- unsigned int pc;
- int lock_depth = 0;
- int migrate_disable = 0;
- int hardirq;
- int softirq;
- void *data = record->data;
-
- trace_seq_init(&sq);
- lat_flags = parse_common_flags(tep, data);
- pc = parse_common_pc(tep, data);
- /* lock_depth may not always exist */
- if (lock_depth_exists)
- lock_depth = parse_common_lock_depth(tep, data);
- else if (check_lock_depth) {
- lock_depth = parse_common_lock_depth(tep, data);
- if (lock_depth < 0)
- check_lock_depth = 0;
- else
- lock_depth_exists = 1;
- }
-
- /* migrate_disable may not always exist */
- if (migrate_disable_exists)
- migrate_disable = parse_common_migrate_disable(tep, data);
- else if (check_migrate_disable) {
- migrate_disable = parse_common_migrate_disable(tep, data);
- if (migrate_disable < 0)
- check_migrate_disable = 0;
- else
- migrate_disable_exists = 1;
- }
-
- hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
- softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
-
- trace_seq_printf(&sq, "%c%c%c",
- (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
- (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
- 'X' : '.',
- (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
- 'N' : '.',
- (hardirq && softirq) ? 'H' :
- hardirq ? 'h' : softirq ? 's' : '.');
-
- if (pc)
- trace_seq_printf(&sq, "%x", pc);
- else
- trace_seq_printf(&sq, ".");
-
- if (migrate_disable_exists) {
- if (migrate_disable < 0)
- trace_seq_printf(&sq, ".");
- else
- trace_seq_printf(&sq, "%d", migrate_disable);
- }
-
- if (lock_depth_exists) {
- if (lock_depth < 0)
- trace_seq_printf(&sq, ".");
- else
- trace_seq_printf(&sq, "%d", lock_depth);
- }
-
- if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) {
- s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
- return;
- }
-
- trace_seq_terminate(&sq);
- trace_seq_puts(s, sq.buffer);
- trace_seq_destroy(&sq);
- trace_seq_terminate(s);
-}
-
-/**
- * tep_data_type - parse out the given event type
- * @tep: a handle to the trace event parser context
- * @rec: the record to read from
- *
- * This returns the event id from the @rec.
- */
-int tep_data_type(struct tep_handle *tep, struct tep_record *rec)
-{
- return trace_parse_common_type(tep, rec->data);
-}
-
-/**
- * tep_data_pid - parse the PID from record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the PID from a record.
- */
-int tep_data_pid(struct tep_handle *tep, struct tep_record *rec)
-{
- return parse_common_pid(tep, rec->data);
-}
-
-/**
- * tep_data_preempt_count - parse the preempt count from the record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the preempt count from a record.
- */
-int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec)
-{
- return parse_common_pc(tep, rec->data);
-}
-
-/**
- * tep_data_flags - parse the latency flags from the record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the latency flags from a record.
- *
- * Use trace_flag_type enum for the flags (see event-parse.h).
- */
-int tep_data_flags(struct tep_handle *tep, struct tep_record *rec)
-{
- return parse_common_flags(tep, rec->data);
-}
-
-/**
- * tep_data_comm_from_pid - return the command line from PID
- * @tep: a handle to the trace event parser context
- * @pid: the PID of the task to search for
- *
- * This returns a pointer to the command line that has the given
- * @pid.
- */
-const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid)
-{
- const char *comm;
-
- comm = find_cmdline(tep, pid);
- return comm;
-}
-
-static struct tep_cmdline *
-pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next)
-{
- struct cmdline_list *cmdlist = (struct cmdline_list *)next;
-
- if (cmdlist)
- cmdlist = cmdlist->next;
- else
- cmdlist = tep->cmdlist;
-
- while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
- cmdlist = cmdlist->next;
-
- return (struct tep_cmdline *)cmdlist;
-}
-
-/**
- * tep_data_pid_from_comm - return the pid from a given comm
- * @tep: a handle to the trace event parser context
- * @comm: the cmdline to find the pid from
- * @next: the cmdline structure to find the next comm
- *
- * This returns the cmdline structure that holds a pid for a given
- * comm, or NULL if none found. As there may be more than one pid for
- * a given comm, the result of this call can be passed back into
- * a recurring call in the @next parameter, and then it will find the
- * next pid.
- * Also, it does a linear search, so it may be slow.
- */
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
- struct tep_cmdline *next)
-{
- struct tep_cmdline *cmdline;
-
- /*
- * If the cmdlines have not been converted yet, then use
- * the list.
- */
- if (!tep->cmdlines)
- return pid_from_cmdlist(tep, comm, next);
-
- if (next) {
- /*
- * The next pointer could have been still from
- * a previous call before cmdlines were created
- */
- if (next < tep->cmdlines ||
- next >= tep->cmdlines + tep->cmdline_count)
- next = NULL;
- else
- cmdline = next++;
- }
-
- if (!next)
- cmdline = tep->cmdlines;
-
- while (cmdline < tep->cmdlines + tep->cmdline_count) {
- if (strcmp(cmdline->comm, comm) == 0)
- return cmdline;
- cmdline++;
- }
- return NULL;
-}
-
-/**
- * tep_cmdline_pid - return the pid associated to a given cmdline
- * @tep: a handle to the trace event parser context
- * @cmdline: The cmdline structure to get the pid from
- *
- * Returns the pid for a give cmdline. If @cmdline is NULL, then
- * -1 is returned.
- */
-int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline)
-{
- struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
-
- if (!cmdline)
- return -1;
-
- /*
- * If cmdlines have not been created yet, or cmdline is
- * not part of the array, then treat it as a cmdlist instead.
- */
- if (!tep->cmdlines ||
- cmdline < tep->cmdlines ||
- cmdline >= tep->cmdlines + tep->cmdline_count)
- return cmdlist->pid;
-
- return cmdline->pid;
-}
-
-/*
- * This parses the raw @data using the given @event information and
- * writes the print format into the trace_seq.
- */
-static void print_event_info(struct trace_seq *s, char *format, bool raw,
- struct tep_event *event, struct tep_record *record)
-{
- int print_pretty = 1;
-
- if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
- tep_print_fields(s, record->data, record->size, event);
- else {
-
- if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE))
- print_pretty = event->handler(s, record, event,
- event->context);
-
- if (print_pretty)
- pretty_print(s, record->data, record->size, event);
- }
-
- trace_seq_terminate(s);
-}
-
-/**
- * tep_find_event_by_record - return the event from a given record
- * @tep: a handle to the trace event parser context
- * @record: The record to get the event from
- *
- * Returns the associated event for a given record, or NULL if non is
- * is found.
- */
-struct tep_event *
-tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record)
-{
- int type;
-
- if (record->size < 0) {
- do_warning("ug! negative record size %d", record->size);
- return NULL;
- }
-
- type = trace_parse_common_type(tep, record->data);
-
- return tep_find_event(tep, type);
-}
-
-/*
- * Writes the timestamp of the record into @s. Time divisor and precision can be
- * specified as part of printf @format string. Example:
- * "%3.1000d" - divide the time by 1000 and print the first 3 digits
- * before the dot. Thus, the timestamp "123456000" will be printed as
- * "123.456"
- */
-static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
- char *format, struct tep_event *event,
- struct tep_record *record)
-{
- unsigned long long time;
- char *divstr;
- int prec = 0, pr;
- int div = 0;
- int p10 = 1;
-
- if (isdigit(*(format + 1)))
- prec = atoi(format + 1);
- divstr = strchr(format, '.');
- if (divstr && isdigit(*(divstr + 1)))
- div = atoi(divstr + 1);
- time = record->ts;
- if (div) {
- time += div / 2;
- time /= div;
- }
- pr = prec;
- while (pr--)
- p10 *= 10;
-
- if (p10 > 1 && p10 < time)
- trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
- else
- trace_seq_printf(s, "%12llu", time);
-}
-
-struct print_event_type {
- enum {
- EVENT_TYPE_INT = 1,
- EVENT_TYPE_STRING,
- EVENT_TYPE_UNKNOWN,
- } type;
- char format[32];
-};
-
-static void print_string(struct tep_handle *tep, struct trace_seq *s,
- struct tep_record *record, struct tep_event *event,
- const char *arg, struct print_event_type *type)
-{
- const char *comm;
- int pid;
-
- if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) {
- data_latency_format(tep, s, type->format, record);
- } else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) {
- pid = parse_common_pid(tep, record->data);
- comm = find_cmdline(tep, pid);
- trace_seq_printf(s, type->format, comm);
- } else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) {
- print_event_info(s, type->format, true, event, record);
- } else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) {
- print_event_info(s, type->format, false, event, record);
- } else if (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) {
- trace_seq_printf(s, type->format, event->name);
- } else {
- trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg);
- }
-
-}
-
-static void print_int(struct tep_handle *tep, struct trace_seq *s,
- struct tep_record *record, struct tep_event *event,
- int arg, struct print_event_type *type)
-{
- int param;
-
- switch (arg) {
- case TEP_PRINT_CPU:
- param = record->cpu;
- break;
- case TEP_PRINT_PID:
- param = parse_common_pid(tep, record->data);
- break;
- case TEP_PRINT_TIME:
- return print_event_time(tep, s, type->format, event, record);
- default:
- return;
- }
- trace_seq_printf(s, type->format, param);
-}
-
-static int tep_print_event_param_type(char *format,
- struct print_event_type *type)
-{
- char *str = format + 1;
- int i = 1;
-
- type->type = EVENT_TYPE_UNKNOWN;
- while (*str) {
- switch (*str) {
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- type->type = EVENT_TYPE_INT;
- break;
- case 's':
- type->type = EVENT_TYPE_STRING;
- break;
- }
- str++;
- i++;
- if (type->type != EVENT_TYPE_UNKNOWN)
- break;
- }
- memset(type->format, 0, 32);
- memcpy(type->format, format, i < 32 ? i : 31);
- return i;
-}
-
-/**
- * tep_print_event - Write various event information
- * @tep: a handle to the trace event parser context
- * @s: the trace_seq to write to
- * @record: The record to get the event from
- * @format: a printf format string. Supported event fileds:
- * TEP_PRINT_PID, "%d" - event PID
- * TEP_PRINT_CPU, "%d" - event CPU
- * TEP_PRINT_COMM, "%s" - event command string
- * TEP_PRINT_NAME, "%s" - event name
- * TEP_PRINT_LATENCY, "%s" - event latency
- * TEP_PRINT_TIME, %d - event time stamp. A divisor and precision
- * can be specified as part of this format string:
- * "%precision.divisord". Example:
- * "%3.1000d" - divide the time by 1000 and print the first
- * 3 digits before the dot. Thus, the time stamp
- * "123456000" will be printed as "123.456"
- * TEP_PRINT_INFO, "%s" - event information. If any width is specified in
- * the format string, the event information will be printed
- * in raw format.
- * Writes the specified event information into @s.
- */
-void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
- struct tep_record *record, const char *fmt, ...)
-{
- struct print_event_type type;
- char *format = strdup(fmt);
- char *current = format;
- char *str = format;
- int offset;
- va_list args;
- struct tep_event *event;
-
- if (!format)
- return;
-
- event = tep_find_event_by_record(tep, record);
- va_start(args, fmt);
- while (*current) {
- current = strchr(str, '%');
- if (!current) {
- trace_seq_puts(s, str);
- break;
- }
- memset(&type, 0, sizeof(type));
- offset = tep_print_event_param_type(current, &type);
- *current = '\0';
- trace_seq_puts(s, str);
- current += offset;
- switch (type.type) {
- case EVENT_TYPE_STRING:
- print_string(tep, s, record, event,
- va_arg(args, char*), &type);
- break;
- case EVENT_TYPE_INT:
- print_int(tep, s, record, event,
- va_arg(args, int), &type);
- break;
- case EVENT_TYPE_UNKNOWN:
- default:
- trace_seq_printf(s, "[UNKNOWN TYPE]");
- break;
- }
- str = current;
-
- }
- va_end(args);
- free(format);
-}
-
-static int events_id_cmp(const void *a, const void *b)
-{
- struct tep_event * const * ea = a;
- struct tep_event * const * eb = b;
-
- if ((*ea)->id < (*eb)->id)
- return -1;
-
- if ((*ea)->id > (*eb)->id)
- return 1;
-
- return 0;
-}
-
-static int events_name_cmp(const void *a, const void *b)
-{
- struct tep_event * const * ea = a;
- struct tep_event * const * eb = b;
- int res;
-
- res = strcmp((*ea)->name, (*eb)->name);
- if (res)
- return res;
-
- res = strcmp((*ea)->system, (*eb)->system);
- if (res)
- return res;
-
- return events_id_cmp(a, b);
-}
-
-static int events_system_cmp(const void *a, const void *b)
-{
- struct tep_event * const * ea = a;
- struct tep_event * const * eb = b;
- int res;
-
- res = strcmp((*ea)->system, (*eb)->system);
- if (res)
- return res;
-
- res = strcmp((*ea)->name, (*eb)->name);
- if (res)
- return res;
-
- return events_id_cmp(a, b);
-}
-
-static struct tep_event **list_events_copy(struct tep_handle *tep)
-{
- struct tep_event **events;
-
- if (!tep)
- return NULL;
-
- events = malloc(sizeof(*events) * (tep->nr_events + 1));
- if (!events)
- return NULL;
-
- memcpy(events, tep->events, sizeof(*events) * tep->nr_events);
- events[tep->nr_events] = NULL;
- return events;
-}
-
-static void list_events_sort(struct tep_event **events, int nr_events,
- enum tep_event_sort_type sort_type)
-{
- int (*sort)(const void *a, const void *b);
-
- switch (sort_type) {
- case TEP_EVENT_SORT_ID:
- sort = events_id_cmp;
- break;
- case TEP_EVENT_SORT_NAME:
- sort = events_name_cmp;
- break;
- case TEP_EVENT_SORT_SYSTEM:
- sort = events_system_cmp;
- break;
- default:
- sort = NULL;
- }
-
- if (sort)
- qsort(events, nr_events, sizeof(*events), sort);
-}
-
-/**
- * tep_list_events - Get events, sorted by given criteria.
- * @tep: a handle to the tep context
- * @sort_type: desired sort order of the events in the array
- *
- * Returns an array of pointers to all events, sorted by the given
- * @sort_type criteria. The last element of the array is NULL. The returned
- * memory must not be freed, it is managed by the library.
- * The function is not thread safe.
- */
-struct tep_event **tep_list_events(struct tep_handle *tep,
- enum tep_event_sort_type sort_type)
-{
- struct tep_event **events;
-
- if (!tep)
- return NULL;
-
- events = tep->sort_events;
- if (events && tep->last_type == sort_type)
- return events;
-
- if (!events) {
- events = list_events_copy(tep);
- if (!events)
- return NULL;
-
- tep->sort_events = events;
-
- /* the internal events are sorted by id */
- if (sort_type == TEP_EVENT_SORT_ID) {
- tep->last_type = sort_type;
- return events;
- }
- }
-
- list_events_sort(events, tep->nr_events, sort_type);
- tep->last_type = sort_type;
-
- return events;
-}
-
-
-/**
- * tep_list_events_copy - Thread safe version of tep_list_events()
- * @tep: a handle to the tep context
- * @sort_type: desired sort order of the events in the array
- *
- * Returns an array of pointers to all events, sorted by the given
- * @sort_type criteria. The last element of the array is NULL. The returned
- * array is newly allocated inside the function and must be freed by the caller
- */
-struct tep_event **tep_list_events_copy(struct tep_handle *tep,
- enum tep_event_sort_type sort_type)
-{
- struct tep_event **events;
-
- if (!tep)
- return NULL;
-
- events = list_events_copy(tep);
- if (!events)
- return NULL;
-
- /* the internal events are sorted by id */
- if (sort_type == TEP_EVENT_SORT_ID)
- return events;
-
- list_events_sort(events, tep->nr_events, sort_type);
-
- return events;
-}
-
-static struct tep_format_field **
-get_event_fields(const char *type, const char *name,
- int count, struct tep_format_field *list)
-{
- struct tep_format_field **fields;
- struct tep_format_field *field;
- int i = 0;
-
- fields = malloc(sizeof(*fields) * (count + 1));
- if (!fields)
- return NULL;
-
- for (field = list; field; field = field->next) {
- fields[i++] = field;
- if (i == count + 1) {
- do_warning("event %s has more %s fields than specified",
- name, type);
- i--;
- break;
- }
- }
-
- if (i != count)
- do_warning("event %s has less %s fields than specified",
- name, type);
-
- fields[i] = NULL;
-
- return fields;
-}
-
-/**
- * tep_event_common_fields - return a list of common fields for an event
- * @event: the event to return the common fields of.
- *
- * Returns an allocated array of fields. The last item in the array is NULL.
- * The array must be freed with free().
- */
-struct tep_format_field **tep_event_common_fields(struct tep_event *event)
-{
- return get_event_fields("common", event->name,
- event->format.nr_common,
- event->format.common_fields);
-}
-
-/**
- * tep_event_fields - return a list of event specific fields for an event
- * @event: the event to return the fields of.
- *
- * Returns an allocated array of fields. The last item in the array is NULL.
- * The array must be freed with free().
- */
-struct tep_format_field **tep_event_fields(struct tep_event *event)
-{
- return get_event_fields("event", event->name,
- event->format.nr_fields,
- event->format.fields);
-}
-
-static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field)
-{
- trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
- if (field->next) {
- trace_seq_puts(s, ", ");
- print_fields(s, field->next);
- }
-}
-
-/* for debugging */
-static void print_args(struct tep_print_arg *args)
-{
- int print_paren = 1;
- struct trace_seq s;
-
- switch (args->type) {
- case TEP_PRINT_NULL:
- printf("null");
- break;
- case TEP_PRINT_ATOM:
- printf("%s", args->atom.atom);
- break;
- case TEP_PRINT_FIELD:
- printf("REC->%s", args->field.name);
- break;
- case TEP_PRINT_FLAGS:
- printf("__print_flags(");
- print_args(args->flags.field);
- printf(", %s, ", args->flags.delim);
- trace_seq_init(&s);
- print_fields(&s, args->flags.flags);
- trace_seq_do_printf(&s);
- trace_seq_destroy(&s);
- printf(")");
- break;
- case TEP_PRINT_SYMBOL:
- printf("__print_symbolic(");
- print_args(args->symbol.field);
- printf(", ");
- trace_seq_init(&s);
- print_fields(&s, args->symbol.symbols);
- trace_seq_do_printf(&s);
- trace_seq_destroy(&s);
- printf(")");
- break;
- case TEP_PRINT_HEX:
- printf("__print_hex(");
- print_args(args->hex.field);
- printf(", ");
- print_args(args->hex.size);
- printf(")");
- break;
- case TEP_PRINT_HEX_STR:
- printf("__print_hex_str(");
- print_args(args->hex.field);
- printf(", ");
- print_args(args->hex.size);
- printf(")");
- break;
- case TEP_PRINT_INT_ARRAY:
- printf("__print_array(");
- print_args(args->int_array.field);
- printf(", ");
- print_args(args->int_array.count);
- printf(", ");
- print_args(args->int_array.el_size);
- printf(")");
- break;
- case TEP_PRINT_STRING:
- case TEP_PRINT_BSTRING:
- printf("__get_str(%s)", args->string.string);
- break;
- case TEP_PRINT_BITMASK:
- printf("__get_bitmask(%s)", args->bitmask.bitmask);
- break;
- case TEP_PRINT_TYPE:
- printf("(%s)", args->typecast.type);
- print_args(args->typecast.item);
- break;
- case TEP_PRINT_OP:
- if (strcmp(args->op.op, ":") == 0)
- print_paren = 0;
- if (print_paren)
- printf("(");
- print_args(args->op.left);
- printf(" %s ", args->op.op);
- print_args(args->op.right);
- if (print_paren)
- printf(")");
- break;
- default:
- /* we should warn... */
- return;
- }
- if (args->next) {
- printf("\n");
- print_args(args->next);
- }
-}
-
-static void parse_header_field(const char *field,
- int *offset, int *size, int mandatory)
-{
- unsigned long long save_input_buf_ptr;
- unsigned long long save_input_buf_siz;
- char *token;
- int type;
-
- save_input_buf_ptr = input_buf_ptr;
- save_input_buf_siz = input_buf_siz;
-
- if (read_expected(TEP_EVENT_ITEM, "field") < 0)
- return;
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return;
-
- /* type */
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
- free_token(token);
-
- /*
- * If this is not a mandatory field, then test it first.
- */
- if (mandatory) {
- if (read_expected(TEP_EVENT_ITEM, field) < 0)
- return;
- } else {
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
- if (strcmp(token, field) != 0)
- goto discard;
- free_token(token);
- }
-
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- return;
- if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
- return;
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return;
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
- *offset = atoi(token);
- free_token(token);
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- return;
- if (read_expected(TEP_EVENT_ITEM, "size") < 0)
- return;
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return;
- if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
- goto fail;
- *size = atoi(token);
- free_token(token);
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- return;
- type = read_token(&token);
- if (type != TEP_EVENT_NEWLINE) {
- /* newer versions of the kernel have a "signed" type */
- if (type != TEP_EVENT_ITEM)
- goto fail;
-
- if (strcmp(token, "signed") != 0)
- goto fail;
-
- free_token(token);
-
- if (read_expected(TEP_EVENT_OP, ":") < 0)
- return;
-
- if (read_expect_type(TEP_EVENT_ITEM, &token))
- goto fail;
-
- free_token(token);
- if (read_expected(TEP_EVENT_OP, ";") < 0)
- return;
-
- if (read_expect_type(TEP_EVENT_NEWLINE, &token))
- goto fail;
- }
- fail:
- free_token(token);
- return;
-
- discard:
- input_buf_ptr = save_input_buf_ptr;
- input_buf_siz = save_input_buf_siz;
- *offset = 0;
- *size = 0;
- free_token(token);
-}
-
-/**
- * tep_parse_header_page - parse the data stored in the header page
- * @tep: a handle to the trace event parser context
- * @buf: the buffer storing the header page format string
- * @size: the size of @buf
- * @long_size: the long size to use if there is no header
- *
- * This parses the header page format for information on the
- * ring buffer used. The @buf should be copied from
- *
- * /sys/kernel/debug/tracing/events/header_page
- */
-int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
- int long_size)
-{
- int ignore;
-
- if (!size) {
- /*
- * Old kernels did not have header page info.
- * Sorry but we just use what we find here in user space.
- */
- tep->header_page_ts_size = sizeof(long long);
- tep->header_page_size_size = long_size;
- tep->header_page_data_offset = sizeof(long long) + long_size;
- tep->old_format = 1;
- return -1;
- }
- init_input_buf(buf, size);
-
- parse_header_field("timestamp", &tep->header_page_ts_offset,
- &tep->header_page_ts_size, 1);
- parse_header_field("commit", &tep->header_page_size_offset,
- &tep->header_page_size_size, 1);
- parse_header_field("overwrite", &tep->header_page_overwrite,
- &ignore, 0);
- parse_header_field("data", &tep->header_page_data_offset,
- &tep->header_page_data_size, 1);
-
- return 0;
-}
-
-static int event_matches(struct tep_event *event,
- int id, const char *sys_name,
- const char *event_name)
-{
- if (id >= 0 && id != event->id)
- return 0;
-
- if (event_name && (strcmp(event_name, event->name) != 0))
- return 0;
-
- if (sys_name && (strcmp(sys_name, event->system) != 0))
- return 0;
-
- return 1;
-}
-
-static void free_handler(struct event_handler *handle)
-{
- free((void *)handle->sys_name);
- free((void *)handle->event_name);
- free(handle);
-}
-
-static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
-{
- struct event_handler *handle, **next;
-
- for (next = &tep->handlers; *next;
- next = &(*next)->next) {
- handle = *next;
- if (event_matches(event, handle->id,
- handle->sys_name,
- handle->event_name))
- break;
- }
-
- if (!(*next))
- return 0;
-
- pr_stat("overriding event (%d) %s:%s with new print handler",
- event->id, event->system, event->name);
-
- event->handler = handle->func;
- event->context = handle->context;
-
- *next = handle->next;
- free_handler(handle);
-
- return 1;
-}
-
-/**
- * parse_format - parse the event format
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-static enum tep_errno parse_format(struct tep_event **eventp,
- struct tep_handle *tep, const char *buf,
- unsigned long size, const char *sys)
-{
- struct tep_event *event;
- int ret;
-
- init_input_buf(buf, size);
-
- *eventp = event = alloc_event();
- if (!event)
- return TEP_ERRNO__MEM_ALLOC_FAILED;
-
- event->name = event_read_name();
- if (!event->name) {
- /* Bad event? */
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- goto event_alloc_failed;
- }
-
- if (strcmp(sys, "ftrace") == 0) {
- event->flags |= TEP_EVENT_FL_ISFTRACE;
-
- if (strcmp(event->name, "bprint") == 0)
- event->flags |= TEP_EVENT_FL_ISBPRINT;
- }
-
- event->id = event_read_id();
- if (event->id < 0) {
- ret = TEP_ERRNO__READ_ID_FAILED;
- /*
- * This isn't an allocation error actually.
- * But as the ID is critical, just bail out.
- */
- goto event_alloc_failed;
- }
-
- event->system = strdup(sys);
- if (!event->system) {
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- goto event_alloc_failed;
- }
-
- /* Add tep to event so that it can be referenced */
- event->tep = tep;
-
- ret = event_read_format(event);
- if (ret < 0) {
- ret = TEP_ERRNO__READ_FORMAT_FAILED;
- goto event_parse_failed;
- }
-
- /*
- * If the event has an override, don't print warnings if the event
- * print format fails to parse.
- */
- if (tep && find_event_handle(tep, event))
- show_warning = 0;
-
- ret = event_read_print(event);
- show_warning = 1;
-
- if (ret < 0) {
- ret = TEP_ERRNO__READ_PRINT_FAILED;
- goto event_parse_failed;
- }
-
- if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) {
- struct tep_format_field *field;
- struct tep_print_arg *arg, **list;
-
- /* old ftrace had no args */
- list = &event->print_fmt.args;
- for (field = event->format.fields; field; field = field->next) {
- arg = alloc_arg();
- if (!arg) {
- event->flags |= TEP_EVENT_FL_FAILED;
- return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
- }
- arg->type = TEP_PRINT_FIELD;
- arg->field.name = strdup(field->name);
- if (!arg->field.name) {
- event->flags |= TEP_EVENT_FL_FAILED;
- free_arg(arg);
- return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
- }
- arg->field.field = field;
- *list = arg;
- list = &arg->next;
- }
- }
-
- if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
- event->print_fmt.print_cache = parse_args(event,
- event->print_fmt.format,
- event->print_fmt.args);
-
- return 0;
-
- event_parse_failed:
- event->flags |= TEP_EVENT_FL_FAILED;
- return ret;
-
- event_alloc_failed:
- free(event->system);
- free(event->name);
- free(event);
- *eventp = NULL;
- return ret;
-}
-
-static enum tep_errno
-__parse_event(struct tep_handle *tep,
- struct tep_event **eventp,
- const char *buf, unsigned long size,
- const char *sys)
-{
- int ret = parse_format(eventp, tep, buf, size, sys);
- struct tep_event *event = *eventp;
-
- if (event == NULL)
- return ret;
-
- if (tep && add_event(tep, event)) {
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- goto event_add_failed;
- }
-
-#define PRINT_ARGS 0
- if (PRINT_ARGS && event->print_fmt.args)
- print_args(event->print_fmt.args);
-
- return 0;
-
-event_add_failed:
- free_tep_event(event);
- return ret;
-}
-
-/**
- * tep_parse_format - parse the event format
- * @tep: a handle to the trace event parser context
- * @eventp: returned format
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-enum tep_errno tep_parse_format(struct tep_handle *tep,
- struct tep_event **eventp,
- const char *buf,
- unsigned long size, const char *sys)
-{
- return __parse_event(tep, eventp, buf, size, sys);
-}
-
-/**
- * tep_parse_event - parse the event format
- * @tep: a handle to the trace event parser context
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
- unsigned long size, const char *sys)
-{
- struct tep_event *event = NULL;
- return __parse_event(tep, &event, buf, size, sys);
-}
-
-int get_field_val(struct trace_seq *s, struct tep_format_field *field,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err)
-{
- if (!field) {
- if (err)
- trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
- return -1;
- }
-
- if (tep_read_number_field(field, record->data, val)) {
- if (err)
- trace_seq_printf(s, " %s=INVALID", name);
- return -1;
- }
-
- return 0;
-}
-
-/**
- * tep_get_field_raw - return the raw pointer into the data field
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @len: place to store the field length.
- * @err: print default error if failed.
- *
- * Returns a pointer into record->data of the field and places
- * the length of the field in @len.
- *
- * On failure, it returns NULL.
- */
-void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- int *len, int err)
-{
- struct tep_format_field *field;
- void *data = record->data;
- unsigned offset;
- int dummy;
-
- if (!event)
- return NULL;
-
- field = tep_find_field(event, name);
-
- if (!field) {
- if (err)
- trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
- return NULL;
- }
-
- /* Allow @len to be NULL */
- if (!len)
- len = &dummy;
-
- offset = field->offset;
- if (field->flags & TEP_FIELD_IS_DYNAMIC) {
- offset = tep_read_number(event->tep,
- data + offset, field->size);
- *len = offset >> 16;
- offset &= 0xffff;
- } else
- *len = field->size;
-
- return data + offset;
-}
-
-/**
- * tep_get_field_val - find a field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err)
-{
- struct tep_format_field *field;
-
- if (!event)
- return -1;
-
- field = tep_find_field(event, name);
-
- return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_get_common_field_val - find a common field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err)
-{
- struct tep_format_field *field;
-
- if (!event)
- return -1;
-
- field = tep_find_common_field(event, name);
-
- return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_get_any_field_val - find a any field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err)
-{
- struct tep_format_field *field;
-
- if (!event)
- return -1;
-
- field = tep_find_any_field(event, name);
-
- return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_print_num_field - print a field and a format
- * @s: The seq to print to
- * @fmt: The printf format to print the field with.
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @err: print default error if failed.
- *
- * Returns positive value on success, negative in case of an error,
- * or 0 if buffer is full.
- */
-int tep_print_num_field(struct trace_seq *s, const char *fmt,
- struct tep_event *event, const char *name,
- struct tep_record *record, int err)
-{
- struct tep_format_field *field = tep_find_field(event, name);
- unsigned long long val;
-
- if (!field)
- goto failed;
-
- if (tep_read_number_field(field, record->data, &val))
- goto failed;
-
- return trace_seq_printf(s, fmt, val);
-
- failed:
- if (err)
- trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
- return -1;
-}
-
-/**
- * tep_print_func_field - print a field and a format for function pointers
- * @s: The seq to print to
- * @fmt: The printf format to print the field with.
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @err: print default error if failed.
- *
- * Returns positive value on success, negative in case of an error,
- * or 0 if buffer is full.
- */
-int tep_print_func_field(struct trace_seq *s, const char *fmt,
- struct tep_event *event, const char *name,
- struct tep_record *record, int err)
-{
- struct tep_format_field *field = tep_find_field(event, name);
- struct tep_handle *tep = event->tep;
- unsigned long long val;
- struct func_map *func;
- char tmp[128];
-
- if (!field)
- goto failed;
-
- if (tep_read_number_field(field, record->data, &val))
- goto failed;
-
- func = find_func(tep, val);
-
- if (func)
- snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
- else
- sprintf(tmp, "0x%08llx", val);
-
- return trace_seq_printf(s, fmt, tmp);
-
- failed:
- if (err)
- trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
- return -1;
-}
-
-static void free_func_handle(struct tep_function_handler *func)
-{
- struct func_params *params;
-
- free(func->name);
-
- while (func->params) {
- params = func->params;
- func->params = params->next;
- free(params);
- }
-
- free(func);
-}
-
-/**
- * tep_register_print_function - register a helper function
- * @tep: a handle to the trace event parser context
- * @func: the function to process the helper function
- * @ret_type: the return type of the helper function
- * @name: the name of the helper function
- * @parameters: A list of enum tep_func_arg_type
- *
- * Some events may have helper functions in the print format arguments.
- * This allows a plugin to dynamically create a way to process one
- * of these functions.
- *
- * The @parameters is a variable list of tep_func_arg_type enums that
- * must end with TEP_FUNC_ARG_VOID.
- */
-int tep_register_print_function(struct tep_handle *tep,
- tep_func_handler func,
- enum tep_func_arg_type ret_type,
- char *name, ...)
-{
- struct tep_function_handler *func_handle;
- struct func_params **next_param;
- struct func_params *param;
- enum tep_func_arg_type type;
- va_list ap;
- int ret;
-
- func_handle = find_func_handler(tep, name);
- if (func_handle) {
- /*
- * This is most like caused by the users own
- * plugins updating the function. This overrides the
- * system defaults.
- */
- pr_stat("override of function helper '%s'", name);
- remove_func_handler(tep, name);
- }
-
- func_handle = calloc(1, sizeof(*func_handle));
- if (!func_handle) {
- do_warning("Failed to allocate function handler");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- func_handle->ret_type = ret_type;
- func_handle->name = strdup(name);
- func_handle->func = func;
- if (!func_handle->name) {
- do_warning("Failed to allocate function name");
- free(func_handle);
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- next_param = &(func_handle->params);
- va_start(ap, name);
- for (;;) {
- type = va_arg(ap, enum tep_func_arg_type);
- if (type == TEP_FUNC_ARG_VOID)
- break;
-
- if (type >= TEP_FUNC_ARG_MAX_TYPES) {
- do_warning("Invalid argument type %d", type);
- ret = TEP_ERRNO__INVALID_ARG_TYPE;
- goto out_free;
- }
-
- param = malloc(sizeof(*param));
- if (!param) {
- do_warning("Failed to allocate function param");
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- goto out_free;
- }
- param->type = type;
- param->next = NULL;
-
- *next_param = param;
- next_param = &(param->next);
-
- func_handle->nr_args++;
- }
- va_end(ap);
-
- func_handle->next = tep->func_handlers;
- tep->func_handlers = func_handle;
-
- return 0;
- out_free:
- va_end(ap);
- free_func_handle(func_handle);
- return ret;
-}
-
-/**
- * tep_unregister_print_function - unregister a helper function
- * @tep: a handle to the trace event parser context
- * @func: the function to process the helper function
- * @name: the name of the helper function
- *
- * This function removes existing print handler for function @name.
- *
- * Returns 0 if the handler was removed successully, -1 otherwise.
- */
-int tep_unregister_print_function(struct tep_handle *tep,
- tep_func_handler func, char *name)
-{
- struct tep_function_handler *func_handle;
-
- func_handle = find_func_handler(tep, name);
- if (func_handle && func_handle->func == func) {
- remove_func_handler(tep, name);
- return 0;
- }
- return -1;
-}
-
-static struct tep_event *search_event(struct tep_handle *tep, int id,
- const char *sys_name,
- const char *event_name)
-{
- struct tep_event *event;
-
- if (id >= 0) {
- /* search by id */
- event = tep_find_event(tep, id);
- if (!event)
- return NULL;
- if (event_name && (strcmp(event_name, event->name) != 0))
- return NULL;
- if (sys_name && (strcmp(sys_name, event->system) != 0))
- return NULL;
- } else {
- event = tep_find_event_by_name(tep, sys_name, event_name);
- if (!event)
- return NULL;
- }
- return event;
-}
-
-/**
- * tep_register_event_handler - register a way to parse an event
- * @tep: a handle to the trace event parser context
- * @id: the id of the event to register
- * @sys_name: the system name the event belongs to
- * @event_name: the name of the event
- * @func: the function to call to parse the event information
- * @context: the data to be passed to @func
- *
- * This function allows a developer to override the parsing of
- * a given event. If for some reason the default print format
- * is not sufficient, this function will register a function
- * for an event to be used to parse the data instead.
- *
- * If @id is >= 0, then it is used to find the event.
- * else @sys_name and @event_name are used.
- *
- * Returns:
- * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten
- * TEP_REGISTER_SUCCESS if a new handler is registered successfully
- * negative TEP_ERRNO_... in case of an error
- *
- */
-int tep_register_event_handler(struct tep_handle *tep, int id,
- const char *sys_name, const char *event_name,
- tep_event_handler_func func, void *context)
-{
- struct tep_event *event;
- struct event_handler *handle;
-
- event = search_event(tep, id, sys_name, event_name);
- if (event == NULL)
- goto not_found;
-
- pr_stat("overriding event (%d) %s:%s with new print handler",
- event->id, event->system, event->name);
-
- event->handler = func;
- event->context = context;
- return TEP_REGISTER_SUCCESS_OVERWRITE;
-
- not_found:
- /* Save for later use. */
- handle = calloc(1, sizeof(*handle));
- if (!handle) {
- do_warning("Failed to allocate event handler");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- handle->id = id;
- if (event_name)
- handle->event_name = strdup(event_name);
- if (sys_name)
- handle->sys_name = strdup(sys_name);
-
- if ((event_name && !handle->event_name) ||
- (sys_name && !handle->sys_name)) {
- do_warning("Failed to allocate event/sys name");
- free((void *)handle->event_name);
- free((void *)handle->sys_name);
- free(handle);
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- handle->func = func;
- handle->next = tep->handlers;
- tep->handlers = handle;
- handle->context = context;
-
- return TEP_REGISTER_SUCCESS;
-}
-
-static int handle_matches(struct event_handler *handler, int id,
- const char *sys_name, const char *event_name,
- tep_event_handler_func func, void *context)
-{
- if (id >= 0 && id != handler->id)
- return 0;
-
- if (event_name && (strcmp(event_name, handler->event_name) != 0))
- return 0;
-
- if (sys_name && (strcmp(sys_name, handler->sys_name) != 0))
- return 0;
-
- if (func != handler->func || context != handler->context)
- return 0;
-
- return 1;
-}
-
-/**
- * tep_unregister_event_handler - unregister an existing event handler
- * @tep: a handle to the trace event parser context
- * @id: the id of the event to unregister
- * @sys_name: the system name the handler belongs to
- * @event_name: the name of the event handler
- * @func: the function to call to parse the event information
- * @context: the data to be passed to @func
- *
- * This function removes existing event handler (parser).
- *
- * If @id is >= 0, then it is used to find the event.
- * else @sys_name and @event_name are used.
- *
- * Returns 0 if handler was removed successfully, -1 if event was not found.
- */
-int tep_unregister_event_handler(struct tep_handle *tep, int id,
- const char *sys_name, const char *event_name,
- tep_event_handler_func func, void *context)
-{
- struct tep_event *event;
- struct event_handler *handle;
- struct event_handler **next;
-
- event = search_event(tep, id, sys_name, event_name);
- if (event == NULL)
- goto not_found;
-
- if (event->handler == func && event->context == context) {
- pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.",
- event->id, event->system, event->name);
-
- event->handler = NULL;
- event->context = NULL;
- return 0;
- }
-
-not_found:
- for (next = &tep->handlers; *next; next = &(*next)->next) {
- handle = *next;
- if (handle_matches(handle, id, sys_name, event_name,
- func, context))
- break;
- }
-
- if (!(*next))
- return -1;
-
- *next = handle->next;
- free_handler(handle);
-
- return 0;
-}
-
-/**
- * tep_alloc - create a tep handle
- */
-struct tep_handle *tep_alloc(void)
-{
- struct tep_handle *tep = calloc(1, sizeof(*tep));
-
- if (tep) {
- tep->ref_count = 1;
- tep->host_bigendian = tep_is_bigendian();
- }
-
- return tep;
-}
-
-void tep_ref(struct tep_handle *tep)
-{
- tep->ref_count++;
-}
-
-int tep_get_ref(struct tep_handle *tep)
-{
- if (tep)
- return tep->ref_count;
- return 0;
-}
-
-__hidden void free_tep_format_field(struct tep_format_field *field)
-{
- free(field->type);
- if (field->alias != field->name)
- free(field->alias);
- free(field->name);
- free(field);
-}
-
-static void free_format_fields(struct tep_format_field *field)
-{
- struct tep_format_field *next;
-
- while (field) {
- next = field->next;
- free_tep_format_field(field);
- field = next;
- }
-}
-
-static void free_formats(struct tep_format *format)
-{
- free_format_fields(format->common_fields);
- free_format_fields(format->fields);
-}
-
-__hidden void free_tep_event(struct tep_event *event)
-{
- free(event->name);
- free(event->system);
-
- free_formats(&event->format);
-
- free(event->print_fmt.format);
- free_args(event->print_fmt.args);
- free_parse_args(event->print_fmt.print_cache);
- free(event);
-}
-
-/**
- * tep_free - free a tep handle
- * @tep: the tep handle to free
- */
-void tep_free(struct tep_handle *tep)
-{
- struct cmdline_list *cmdlist, *cmdnext;
- struct func_list *funclist, *funcnext;
- struct printk_list *printklist, *printknext;
- struct tep_function_handler *func_handler;
- struct event_handler *handle;
- int i;
-
- if (!tep)
- return;
-
- cmdlist = tep->cmdlist;
- funclist = tep->funclist;
- printklist = tep->printklist;
-
- tep->ref_count--;
- if (tep->ref_count)
- return;
-
- if (tep->cmdlines) {
- for (i = 0; i < tep->cmdline_count; i++)
- free(tep->cmdlines[i].comm);
- free(tep->cmdlines);
- }
-
- while (cmdlist) {
- cmdnext = cmdlist->next;
- free(cmdlist->comm);
- free(cmdlist);
- cmdlist = cmdnext;
- }
-
- if (tep->func_map) {
- for (i = 0; i < (int)tep->func_count; i++) {
- free(tep->func_map[i].func);
- free(tep->func_map[i].mod);
- }
- free(tep->func_map);
- }
-
- while (funclist) {
- funcnext = funclist->next;
- free(funclist->func);
- free(funclist->mod);
- free(funclist);
- funclist = funcnext;
- }
-
- while (tep->func_handlers) {
- func_handler = tep->func_handlers;
- tep->func_handlers = func_handler->next;
- free_func_handle(func_handler);
- }
-
- if (tep->printk_map) {
- for (i = 0; i < (int)tep->printk_count; i++)
- free(tep->printk_map[i].printk);
- free(tep->printk_map);
- }
-
- while (printklist) {
- printknext = printklist->next;
- free(printklist->printk);
- free(printklist);
- printklist = printknext;
- }
-
- for (i = 0; i < tep->nr_events; i++)
- free_tep_event(tep->events[i]);
-
- while (tep->handlers) {
- handle = tep->handlers;
- tep->handlers = handle->next;
- free_handler(handle);
- }
-
- free(tep->events);
- free(tep->sort_events);
- free(tep->func_resolver);
- free_tep_plugin_paths(tep);
-
- free(tep);
-}
-
-void tep_unref(struct tep_handle *tep)
-{
- tep_free(tep);
-}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
deleted file mode 100644
index a67ad9a5b835..000000000000
--- a/tools/lib/traceevent/event-parse.h
+++ /dev/null
@@ -1,749 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef _PARSE_EVENTS_H
-#define _PARSE_EVENTS_H
-
-#include <stdbool.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <regex.h>
-#include <string.h>
-
-#include "trace-seq.h"
-
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((unused))
-#endif
-
-#ifndef DEBUG_RECORD
-#define DEBUG_RECORD 0
-#endif
-
-struct tep_record {
- unsigned long long ts;
- unsigned long long offset;
- long long missed_events; /* buffer dropped events before */
- int record_size; /* size of binary record */
- int size; /* size of data */
- void *data;
- int cpu;
- int ref_count;
- int locked; /* Do not free, even if ref_count is zero */
- void *priv;
-#if DEBUG_RECORD
- struct tep_record *prev;
- struct tep_record *next;
- long alloc_addr;
-#endif
-};
-
-/* ----------------------- tep ----------------------- */
-
-struct tep_handle;
-struct tep_event;
-
-typedef int (*tep_event_handler_func)(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event,
- void *context);
-
-typedef int (*tep_plugin_load_func)(struct tep_handle *tep);
-typedef int (*tep_plugin_unload_func)(struct tep_handle *tep);
-
-struct tep_plugin_option {
- struct tep_plugin_option *next;
- void *handle;
- char *file;
- char *name;
- char *plugin_alias;
- char *description;
- const char *value;
- void *priv;
- int set;
-};
-
-/*
- * Plugin hooks that can be called:
- *
- * TEP_PLUGIN_LOADER: (required)
- * The function name to initialized the plugin.
- *
- * int TEP_PLUGIN_LOADER(struct tep_handle *tep)
- *
- * TEP_PLUGIN_UNLOADER: (optional)
- * The function called just before unloading
- *
- * int TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
- *
- * TEP_PLUGIN_OPTIONS: (optional)
- * Plugin options that can be set before loading
- *
- * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = {
- * {
- * .name = "option-name",
- * .plugin_alias = "override-file-name", (optional)
- * .description = "description of option to show users",
- * },
- * {
- * .name = NULL,
- * },
- * };
- *
- * Array must end with .name = NULL;
- *
- *
- * .plugin_alias is used to give a shorter name to access
- * the vairable. Useful if a plugin handles more than one event.
- *
- * If .value is not set, then it is considered a boolean and only
- * .set will be processed. If .value is defined, then it is considered
- * a string option and .set will be ignored.
- *
- * TEP_PLUGIN_ALIAS: (optional)
- * The name to use for finding options (uses filename if not defined)
- */
-#define TEP_PLUGIN_LOADER tep_plugin_loader
-#define TEP_PLUGIN_UNLOADER tep_plugin_unloader
-#define TEP_PLUGIN_OPTIONS tep_plugin_options
-#define TEP_PLUGIN_ALIAS tep_plugin_alias
-#define _MAKE_STR(x) #x
-#define MAKE_STR(x) _MAKE_STR(x)
-#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER)
-#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER)
-#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
-#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
-
-enum tep_format_flags {
- TEP_FIELD_IS_ARRAY = 1,
- TEP_FIELD_IS_POINTER = 2,
- TEP_FIELD_IS_SIGNED = 4,
- TEP_FIELD_IS_STRING = 8,
- TEP_FIELD_IS_DYNAMIC = 16,
- TEP_FIELD_IS_LONG = 32,
- TEP_FIELD_IS_FLAG = 64,
- TEP_FIELD_IS_SYMBOLIC = 128,
-};
-
-struct tep_format_field {
- struct tep_format_field *next;
- struct tep_event *event;
- char *type;
- char *name;
- char *alias;
- int offset;
- int size;
- unsigned int arraylen;
- unsigned int elementsize;
- unsigned long flags;
-};
-
-struct tep_format {
- int nr_common;
- int nr_fields;
- struct tep_format_field *common_fields;
- struct tep_format_field *fields;
-};
-
-struct tep_print_arg_atom {
- char *atom;
-};
-
-struct tep_print_arg_string {
- char *string;
- int offset;
-};
-
-struct tep_print_arg_bitmask {
- char *bitmask;
- int offset;
-};
-
-struct tep_print_arg_field {
- char *name;
- struct tep_format_field *field;
-};
-
-struct tep_print_flag_sym {
- struct tep_print_flag_sym *next;
- char *value;
- char *str;
-};
-
-struct tep_print_arg_typecast {
- char *type;
- struct tep_print_arg *item;
-};
-
-struct tep_print_arg_flags {
- struct tep_print_arg *field;
- char *delim;
- struct tep_print_flag_sym *flags;
-};
-
-struct tep_print_arg_symbol {
- struct tep_print_arg *field;
- struct tep_print_flag_sym *symbols;
-};
-
-struct tep_print_arg_hex {
- struct tep_print_arg *field;
- struct tep_print_arg *size;
-};
-
-struct tep_print_arg_int_array {
- struct tep_print_arg *field;
- struct tep_print_arg *count;
- struct tep_print_arg *el_size;
-};
-
-struct tep_print_arg_dynarray {
- struct tep_format_field *field;
- struct tep_print_arg *index;
-};
-
-struct tep_print_arg;
-
-struct tep_print_arg_op {
- char *op;
- int prio;
- struct tep_print_arg *left;
- struct tep_print_arg *right;
-};
-
-struct tep_function_handler;
-
-struct tep_print_arg_func {
- struct tep_function_handler *func;
- struct tep_print_arg *args;
-};
-
-enum tep_print_arg_type {
- TEP_PRINT_NULL,
- TEP_PRINT_ATOM,
- TEP_PRINT_FIELD,
- TEP_PRINT_FLAGS,
- TEP_PRINT_SYMBOL,
- TEP_PRINT_HEX,
- TEP_PRINT_INT_ARRAY,
- TEP_PRINT_TYPE,
- TEP_PRINT_STRING,
- TEP_PRINT_BSTRING,
- TEP_PRINT_DYNAMIC_ARRAY,
- TEP_PRINT_OP,
- TEP_PRINT_FUNC,
- TEP_PRINT_BITMASK,
- TEP_PRINT_DYNAMIC_ARRAY_LEN,
- TEP_PRINT_HEX_STR,
-};
-
-struct tep_print_arg {
- struct tep_print_arg *next;
- enum tep_print_arg_type type;
- union {
- struct tep_print_arg_atom atom;
- struct tep_print_arg_field field;
- struct tep_print_arg_typecast typecast;
- struct tep_print_arg_flags flags;
- struct tep_print_arg_symbol symbol;
- struct tep_print_arg_hex hex;
- struct tep_print_arg_int_array int_array;
- struct tep_print_arg_func func;
- struct tep_print_arg_string string;
- struct tep_print_arg_bitmask bitmask;
- struct tep_print_arg_op op;
- struct tep_print_arg_dynarray dynarray;
- };
-};
-
-struct tep_print_parse;
-
-struct tep_print_fmt {
- char *format;
- struct tep_print_arg *args;
- struct tep_print_parse *print_cache;
-};
-
-struct tep_event {
- struct tep_handle *tep;
- char *name;
- int id;
- int flags;
- struct tep_format format;
- struct tep_print_fmt print_fmt;
- char *system;
- tep_event_handler_func handler;
- void *context;
-};
-
-enum {
- TEP_EVENT_FL_ISFTRACE = 0x01,
- TEP_EVENT_FL_ISPRINT = 0x02,
- TEP_EVENT_FL_ISBPRINT = 0x04,
- TEP_EVENT_FL_ISFUNCENT = 0x10,
- TEP_EVENT_FL_ISFUNCRET = 0x20,
- TEP_EVENT_FL_NOHANDLE = 0x40,
- TEP_EVENT_FL_PRINTRAW = 0x80,
-
- TEP_EVENT_FL_FAILED = 0x80000000
-};
-
-enum tep_event_sort_type {
- TEP_EVENT_SORT_ID,
- TEP_EVENT_SORT_NAME,
- TEP_EVENT_SORT_SYSTEM,
-};
-
-enum tep_event_type {
- TEP_EVENT_ERROR,
- TEP_EVENT_NONE,
- TEP_EVENT_SPACE,
- TEP_EVENT_NEWLINE,
- TEP_EVENT_OP,
- TEP_EVENT_DELIM,
- TEP_EVENT_ITEM,
- TEP_EVENT_DQUOTE,
- TEP_EVENT_SQUOTE,
-};
-
-typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
- unsigned long long *args);
-
-enum tep_func_arg_type {
- TEP_FUNC_ARG_VOID,
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_LONG,
- TEP_FUNC_ARG_STRING,
- TEP_FUNC_ARG_PTR,
- TEP_FUNC_ARG_MAX_TYPES
-};
-
-enum tep_flag {
- TEP_NSEC_OUTPUT = 1, /* output in NSECS */
- TEP_DISABLE_SYS_PLUGINS = 1 << 1,
- TEP_DISABLE_PLUGINS = 1 << 2,
-};
-
-#define TEP_ERRORS \
- _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \
- _PE(PARSE_EVENT_FAILED, "failed to parse event"), \
- _PE(READ_ID_FAILED, "failed to read event id"), \
- _PE(READ_FORMAT_FAILED, "failed to read event format"), \
- _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \
- _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
- _PE(INVALID_ARG_TYPE, "invalid argument type"), \
- _PE(INVALID_EXP_TYPE, "invalid expression type"), \
- _PE(INVALID_OP_TYPE, "invalid operator type"), \
- _PE(INVALID_EVENT_NAME, "invalid event name"), \
- _PE(EVENT_NOT_FOUND, "no event found"), \
- _PE(SYNTAX_ERROR, "syntax error"), \
- _PE(ILLEGAL_RVALUE, "illegal rvalue"), \
- _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \
- _PE(INVALID_REGEX, "regex did not compute"), \
- _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \
- _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \
- _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \
- _PE(REPARENT_FAILED, "failed to reparent filter OP"), \
- _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \
- _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \
- _PE(ILLEGAL_TOKEN, "illegal token"), \
- _PE(INVALID_PAREN, "open parenthesis cannot come here"), \
- _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \
- _PE(UNKNOWN_TOKEN, "unknown token"), \
- _PE(FILTER_NOT_FOUND, "no filter found"), \
- _PE(NOT_A_NUMBER, "must have number field"), \
- _PE(NO_FILTER, "no filters exists"), \
- _PE(FILTER_MISS, "record does not match to filter")
-
-#undef _PE
-#define _PE(__code, __str) TEP_ERRNO__ ## __code
-enum tep_errno {
- TEP_ERRNO__SUCCESS = 0,
- TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS,
-
- /*
- * Choose an arbitrary negative big number not to clash with standard
- * errno since SUS requires the errno has distinct positive values.
- * See 'Issue 6' in the link below.
- *
- * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
- */
- __TEP_ERRNO__START = -100000,
-
- TEP_ERRORS,
-
- __TEP_ERRNO__END,
-};
-#undef _PE
-
-struct tep_plugin_list;
-
-#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
-
-enum tep_plugin_load_priority {
- TEP_PLUGIN_FIRST,
- TEP_PLUGIN_LAST,
-};
-
-int tep_add_plugin_path(struct tep_handle *tep, char *path,
- enum tep_plugin_load_priority prio);
-struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
-void tep_unload_plugins(struct tep_plugin_list *plugin_list,
- struct tep_handle *tep);
-void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data);
-char **tep_plugin_list_options(void);
-void tep_plugin_free_options_list(char **list);
-int tep_plugin_add_options(const char *name,
- struct tep_plugin_option *options);
-int tep_plugin_add_option(const char *name, const char *val);
-void tep_plugin_remove_options(struct tep_plugin_option *options);
-void tep_plugin_print_options(struct trace_seq *s);
-void tep_print_plugins(struct trace_seq *s,
- const char *prefix, const char *suffix,
- const struct tep_plugin_list *list);
-
-/* tep_handle */
-typedef char *(tep_func_resolver_t)(void *priv,
- unsigned long long *addrp, char **modp);
-void tep_set_flag(struct tep_handle *tep, int flag);
-void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
-bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags);
-
-static inline int tep_is_bigendian(void)
-{
- unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
- unsigned int val;
-
- memcpy(&val, str, 4);
- return val == 0x01020304;
-}
-
-/* taken from kernel/trace/trace.h */
-enum trace_flag_type {
- TRACE_FLAG_IRQS_OFF = 0x01,
- TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
- TRACE_FLAG_NEED_RESCHED = 0x04,
- TRACE_FLAG_HARDIRQ = 0x08,
- TRACE_FLAG_SOFTIRQ = 0x10,
-};
-
-int tep_set_function_resolver(struct tep_handle *tep,
- tep_func_resolver_t *func, void *priv);
-void tep_reset_function_resolver(struct tep_handle *tep);
-int tep_register_comm(struct tep_handle *tep, const char *comm, int pid);
-int tep_override_comm(struct tep_handle *tep, const char *comm, int pid);
-int tep_register_function(struct tep_handle *tep, char *name,
- unsigned long long addr, char *mod);
-int tep_register_print_string(struct tep_handle *tep, const char *fmt,
- unsigned long long addr);
-bool tep_is_pid_registered(struct tep_handle *tep, int pid);
-
-struct tep_event *tep_get_event(struct tep_handle *tep, int index);
-
-#define TEP_PRINT_INFO "INFO"
-#define TEP_PRINT_INFO_RAW "INFO_RAW"
-#define TEP_PRINT_COMM "COMM"
-#define TEP_PRINT_LATENCY "LATENCY"
-#define TEP_PRINT_NAME "NAME"
-#define TEP_PRINT_PID 1U
-#define TEP_PRINT_TIME 2U
-#define TEP_PRINT_CPU 3U
-
-void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
- struct tep_record *record, const char *fmt, ...)
- __attribute__ ((format (printf, 4, 5)));
-
-int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
- int long_size);
-
-enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
- unsigned long size, const char *sys);
-enum tep_errno tep_parse_format(struct tep_handle *tep,
- struct tep_event **eventp,
- const char *buf,
- unsigned long size, const char *sys);
-
-void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- int *len, int err);
-
-int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err);
-int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err);
-int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
- const char *name, struct tep_record *record,
- unsigned long long *val, int err);
-
-int tep_print_num_field(struct trace_seq *s, const char *fmt,
- struct tep_event *event, const char *name,
- struct tep_record *record, int err);
-
-int tep_print_func_field(struct trace_seq *s, const char *fmt,
- struct tep_event *event, const char *name,
- struct tep_record *record, int err);
-
-enum tep_reg_handler {
- TEP_REGISTER_SUCCESS = 0,
- TEP_REGISTER_SUCCESS_OVERWRITE,
-};
-
-int tep_register_event_handler(struct tep_handle *tep, int id,
- const char *sys_name, const char *event_name,
- tep_event_handler_func func, void *context);
-int tep_unregister_event_handler(struct tep_handle *tep, int id,
- const char *sys_name, const char *event_name,
- tep_event_handler_func func, void *context);
-int tep_register_print_function(struct tep_handle *tep,
- tep_func_handler func,
- enum tep_func_arg_type ret_type,
- char *name, ...);
-int tep_unregister_print_function(struct tep_handle *tep,
- tep_func_handler func, char *name);
-
-struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name);
-struct tep_format_field *tep_find_field(struct tep_event *event, const char *name);
-struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name);
-
-const char *tep_find_function(struct tep_handle *tep, unsigned long long addr);
-unsigned long long
-tep_find_function_address(struct tep_handle *tep, unsigned long long addr);
-unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size);
-int tep_read_number_field(struct tep_format_field *field, const void *data,
- unsigned long long *value);
-
-struct tep_event *tep_get_first_event(struct tep_handle *tep);
-int tep_get_events_count(struct tep_handle *tep);
-struct tep_event *tep_find_event(struct tep_handle *tep, int id);
-
-struct tep_event *
-tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name);
-struct tep_event *
-tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record);
-
-int tep_data_type(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_pid(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_flags(struct tep_handle *tep, struct tep_record *rec);
-const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid);
-struct tep_cmdline;
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
- struct tep_cmdline *next);
-int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline);
-
-void tep_print_field(struct trace_seq *s, void *data,
- struct tep_format_field *field);
-void tep_print_fields(struct trace_seq *s, void *data,
- int size __maybe_unused, struct tep_event *event);
-int tep_strerror(struct tep_handle *tep, enum tep_errno errnum,
- char *buf, size_t buflen);
-
-struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type);
-struct tep_event **tep_list_events_copy(struct tep_handle *tep,
- enum tep_event_sort_type);
-struct tep_format_field **tep_event_common_fields(struct tep_event *event);
-struct tep_format_field **tep_event_fields(struct tep_event *event);
-
-enum tep_endian {
- TEP_LITTLE_ENDIAN = 0,
- TEP_BIG_ENDIAN
-};
-int tep_get_cpus(struct tep_handle *tep);
-void tep_set_cpus(struct tep_handle *tep, int cpus);
-int tep_get_long_size(struct tep_handle *tep);
-void tep_set_long_size(struct tep_handle *tep, int long_size);
-int tep_get_page_size(struct tep_handle *tep);
-void tep_set_page_size(struct tep_handle *tep, int _page_size);
-bool tep_is_file_bigendian(struct tep_handle *tep);
-void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian);
-bool tep_is_local_bigendian(struct tep_handle *tep);
-void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian);
-int tep_get_header_page_size(struct tep_handle *tep);
-int tep_get_header_timestamp_size(struct tep_handle *tep);
-bool tep_is_old_format(struct tep_handle *tep);
-void tep_set_test_filters(struct tep_handle *tep, int test_filters);
-
-struct tep_handle *tep_alloc(void);
-void tep_free(struct tep_handle *tep);
-void tep_ref(struct tep_handle *tep);
-void tep_unref(struct tep_handle *tep);
-int tep_get_ref(struct tep_handle *tep);
-
-/* for debugging */
-void tep_print_funcs(struct tep_handle *tep);
-void tep_print_printk(struct tep_handle *tep);
-
-/* ----------------------- filtering ----------------------- */
-
-enum tep_filter_boolean_type {
- TEP_FILTER_FALSE,
- TEP_FILTER_TRUE,
-};
-
-enum tep_filter_op_type {
- TEP_FILTER_OP_AND = 1,
- TEP_FILTER_OP_OR,
- TEP_FILTER_OP_NOT,
-};
-
-enum tep_filter_cmp_type {
- TEP_FILTER_CMP_NONE,
- TEP_FILTER_CMP_EQ,
- TEP_FILTER_CMP_NE,
- TEP_FILTER_CMP_GT,
- TEP_FILTER_CMP_LT,
- TEP_FILTER_CMP_GE,
- TEP_FILTER_CMP_LE,
- TEP_FILTER_CMP_MATCH,
- TEP_FILTER_CMP_NOT_MATCH,
- TEP_FILTER_CMP_REGEX,
- TEP_FILTER_CMP_NOT_REGEX,
-};
-
-enum tep_filter_exp_type {
- TEP_FILTER_EXP_NONE,
- TEP_FILTER_EXP_ADD,
- TEP_FILTER_EXP_SUB,
- TEP_FILTER_EXP_MUL,
- TEP_FILTER_EXP_DIV,
- TEP_FILTER_EXP_MOD,
- TEP_FILTER_EXP_RSHIFT,
- TEP_FILTER_EXP_LSHIFT,
- TEP_FILTER_EXP_AND,
- TEP_FILTER_EXP_OR,
- TEP_FILTER_EXP_XOR,
- TEP_FILTER_EXP_NOT,
-};
-
-enum tep_filter_arg_type {
- TEP_FILTER_ARG_NONE,
- TEP_FILTER_ARG_BOOLEAN,
- TEP_FILTER_ARG_VALUE,
- TEP_FILTER_ARG_FIELD,
- TEP_FILTER_ARG_EXP,
- TEP_FILTER_ARG_OP,
- TEP_FILTER_ARG_NUM,
- TEP_FILTER_ARG_STR,
-};
-
-enum tep_filter_value_type {
- TEP_FILTER_NUMBER,
- TEP_FILTER_STRING,
- TEP_FILTER_CHAR
-};
-
-struct tep_filter_arg;
-
-struct tep_filter_arg_boolean {
- enum tep_filter_boolean_type value;
-};
-
-struct tep_filter_arg_field {
- struct tep_format_field *field;
-};
-
-struct tep_filter_arg_value {
- enum tep_filter_value_type type;
- union {
- char *str;
- unsigned long long val;
- };
-};
-
-struct tep_filter_arg_op {
- enum tep_filter_op_type type;
- struct tep_filter_arg *left;
- struct tep_filter_arg *right;
-};
-
-struct tep_filter_arg_exp {
- enum tep_filter_exp_type type;
- struct tep_filter_arg *left;
- struct tep_filter_arg *right;
-};
-
-struct tep_filter_arg_num {
- enum tep_filter_cmp_type type;
- struct tep_filter_arg *left;
- struct tep_filter_arg *right;
-};
-
-struct tep_filter_arg_str {
- enum tep_filter_cmp_type type;
- struct tep_format_field *field;
- char *val;
- char *buffer;
- regex_t reg;
-};
-
-struct tep_filter_arg {
- enum tep_filter_arg_type type;
- union {
- struct tep_filter_arg_boolean boolean;
- struct tep_filter_arg_field field;
- struct tep_filter_arg_value value;
- struct tep_filter_arg_op op;
- struct tep_filter_arg_exp exp;
- struct tep_filter_arg_num num;
- struct tep_filter_arg_str str;
- };
-};
-
-struct tep_filter_type {
- int event_id;
- struct tep_event *event;
- struct tep_filter_arg *filter;
-};
-
-#define TEP_FILTER_ERROR_BUFSZ 1024
-
-struct tep_event_filter {
- struct tep_handle *tep;
- int filters;
- struct tep_filter_type *event_filters;
- char error_buffer[TEP_FILTER_ERROR_BUFSZ];
-};
-
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep);
-
-/* for backward compatibility */
-#define FILTER_NONE TEP_ERRNO__NO_FILTER
-#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND
-#define FILTER_MISS TEP_ERRNO__FILTER_MISS
-#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH
-
-enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
- const char *filter_str);
-
-enum tep_errno tep_filter_match(struct tep_event_filter *filter,
- struct tep_record *record);
-
-int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
- char *buf, size_t buflen);
-
-int tep_event_filtered(struct tep_event_filter *filter,
- int event_id);
-
-void tep_filter_reset(struct tep_event_filter *filter);
-
-void tep_filter_free(struct tep_event_filter *filter);
-
-char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
-
-int tep_filter_remove_event(struct tep_event_filter *filter,
- int event_id);
-
-int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
-
-int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
-
-#endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
deleted file mode 100644
index e7f93d5fe4fd..000000000000
--- a/tools/lib/traceevent/event-plugin.c
+++ /dev/null
@@ -1,711 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <string.h>
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <errno.h>
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/"
-
-static struct registered_plugin_options {
- struct registered_plugin_options *next;
- struct tep_plugin_option *options;
-} *registered_options;
-
-static struct trace_plugin_options {
- struct trace_plugin_options *next;
- char *plugin;
- char *option;
- char *value;
-} *trace_plugin_options;
-
-struct tep_plugin_list {
- struct tep_plugin_list *next;
- char *name;
- void *handle;
-};
-
-struct tep_plugins_dir {
- struct tep_plugins_dir *next;
- char *path;
- enum tep_plugin_load_priority prio;
-};
-
-static void lower_case(char *str)
-{
- if (!str)
- return;
- for (; *str; str++)
- *str = tolower(*str);
-}
-
-static int update_option_value(struct tep_plugin_option *op, const char *val)
-{
- char *op_val;
-
- if (!val) {
- /* toggle, only if option is boolean */
- if (op->value)
- /* Warn? */
- return 0;
- op->set ^= 1;
- return 0;
- }
-
- /*
- * If the option has a value then it takes a string
- * otherwise the option is a boolean.
- */
- if (op->value) {
- op->value = val;
- return 0;
- }
-
- /* Option is boolean, must be either "1", "0", "true" or "false" */
-
- op_val = strdup(val);
- if (!op_val)
- return -1;
- lower_case(op_val);
-
- if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0)
- op->set = 1;
- else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0)
- op->set = 0;
- free(op_val);
-
- return 0;
-}
-
-/**
- * tep_plugin_list_options - get list of plugin options
- *
- * Returns an array of char strings that list the currently registered
- * plugin options in the format of <plugin>:<option>. This list can be
- * used by toggling the option.
- *
- * Returns NULL if there's no options registered. On error it returns
- * INVALID_PLUGIN_LIST_OPTION
- *
- * Must be freed with tep_plugin_free_options_list().
- */
-char **tep_plugin_list_options(void)
-{
- struct registered_plugin_options *reg;
- struct tep_plugin_option *op;
- char **list = NULL;
- char *name;
- int count = 0;
-
- for (reg = registered_options; reg; reg = reg->next) {
- for (op = reg->options; op->name; op++) {
- char *alias = op->plugin_alias ? op->plugin_alias : op->file;
- char **temp = list;
- int ret;
-
- ret = asprintf(&name, "%s:%s", alias, op->name);
- if (ret < 0)
- goto err;
-
- list = realloc(list, count + 2);
- if (!list) {
- list = temp;
- free(name);
- goto err;
- }
- list[count++] = name;
- list[count] = NULL;
- }
- }
- return list;
-
- err:
- while (--count >= 0)
- free(list[count]);
- free(list);
-
- return INVALID_PLUGIN_LIST_OPTION;
-}
-
-void tep_plugin_free_options_list(char **list)
-{
- int i;
-
- if (!list)
- return;
-
- if (list == INVALID_PLUGIN_LIST_OPTION)
- return;
-
- for (i = 0; list[i]; i++)
- free(list[i]);
-
- free(list);
-}
-
-static int
-update_option(const char *file, struct tep_plugin_option *option)
-{
- struct trace_plugin_options *op;
- char *plugin;
- int ret = 0;
-
- if (option->plugin_alias) {
- plugin = strdup(option->plugin_alias);
- if (!plugin)
- return -1;
- } else {
- char *p;
- plugin = strdup(file);
- if (!plugin)
- return -1;
- p = strstr(plugin, ".");
- if (p)
- *p = '\0';
- }
-
- /* first look for named options */
- for (op = trace_plugin_options; op; op = op->next) {
- if (!op->plugin)
- continue;
- if (strcmp(op->plugin, plugin) != 0)
- continue;
- if (strcmp(op->option, option->name) != 0)
- continue;
-
- ret = update_option_value(option, op->value);
- if (ret)
- goto out;
- break;
- }
-
- /* first look for unnamed options */
- for (op = trace_plugin_options; op; op = op->next) {
- if (op->plugin)
- continue;
- if (strcmp(op->option, option->name) != 0)
- continue;
-
- ret = update_option_value(option, op->value);
- break;
- }
-
- out:
- free(plugin);
- return ret;
-}
-
-/**
- * tep_plugin_add_options - Add a set of options by a plugin
- * @name: The name of the plugin adding the options
- * @options: The set of options being loaded
- *
- * Sets the options with the values that have been added by user.
- */
-int tep_plugin_add_options(const char *name,
- struct tep_plugin_option *options)
-{
- struct registered_plugin_options *reg;
-
- reg = malloc(sizeof(*reg));
- if (!reg)
- return -1;
- reg->next = registered_options;
- reg->options = options;
- registered_options = reg;
-
- while (options->name) {
- update_option(name, options);
- options++;
- }
- return 0;
-}
-
-/**
- * tep_plugin_remove_options - remove plugin options that were registered
- * @options: Options to removed that were registered with tep_plugin_add_options
- */
-void tep_plugin_remove_options(struct tep_plugin_option *options)
-{
- struct registered_plugin_options **last;
- struct registered_plugin_options *reg;
-
- for (last = &registered_options; *last; last = &(*last)->next) {
- if ((*last)->options == options) {
- reg = *last;
- *last = reg->next;
- free(reg);
- return;
- }
- }
-}
-
-static int parse_option_name(char **option, char **plugin)
-{
- char *p;
-
- *plugin = NULL;
-
- if ((p = strstr(*option, ":"))) {
- *plugin = *option;
- *p = '\0';
- *option = strdup(p + 1);
- if (!*option)
- return -1;
- }
- return 0;
-}
-
-static struct tep_plugin_option *
-find_registered_option(const char *plugin, const char *option)
-{
- struct registered_plugin_options *reg;
- struct tep_plugin_option *op;
- const char *op_plugin;
-
- for (reg = registered_options; reg; reg = reg->next) {
- for (op = reg->options; op->name; op++) {
- if (op->plugin_alias)
- op_plugin = op->plugin_alias;
- else
- op_plugin = op->file;
-
- if (plugin && strcmp(plugin, op_plugin) != 0)
- continue;
- if (strcmp(option, op->name) != 0)
- continue;
-
- return op;
- }
- }
-
- return NULL;
-}
-
-static int process_option(const char *plugin, const char *option, const char *val)
-{
- struct tep_plugin_option *op;
-
- op = find_registered_option(plugin, option);
- if (!op)
- return 0;
-
- return update_option_value(op, val);
-}
-
-/**
- * tep_plugin_add_option - add an option/val pair to set plugin options
- * @name: The name of the option (format: <plugin>:<option> or just <option>)
- * @val: (optional) the value for the option
- *
- * Modify a plugin option. If @val is given than the value of the option
- * is set (note, some options just take a boolean, so @val must be either
- * "1" or "0" or "true" or "false").
- */
-int tep_plugin_add_option(const char *name, const char *val)
-{
- struct trace_plugin_options *op;
- char *option_str;
- char *plugin;
-
- option_str = strdup(name);
- if (!option_str)
- return -ENOMEM;
-
- if (parse_option_name(&option_str, &plugin) < 0)
- return -ENOMEM;
-
- /* If the option exists, update the val */
- for (op = trace_plugin_options; op; op = op->next) {
- /* Both must be NULL or not NULL */
- if ((!plugin || !op->plugin) && plugin != op->plugin)
- continue;
- if (plugin && strcmp(plugin, op->plugin) != 0)
- continue;
- if (strcmp(op->option, option_str) != 0)
- continue;
-
- /* update option */
- free(op->value);
- if (val) {
- op->value = strdup(val);
- if (!op->value)
- goto out_free;
- } else
- op->value = NULL;
-
- /* plugin and option_str don't get freed at the end */
- free(plugin);
- free(option_str);
-
- plugin = op->plugin;
- option_str = op->option;
- break;
- }
-
- /* If not found, create */
- if (!op) {
- op = malloc(sizeof(*op));
- if (!op)
- goto out_free;
- memset(op, 0, sizeof(*op));
- op->plugin = plugin;
- op->option = option_str;
- if (val) {
- op->value = strdup(val);
- if (!op->value) {
- free(op);
- goto out_free;
- }
- }
- op->next = trace_plugin_options;
- trace_plugin_options = op;
- }
-
- return process_option(plugin, option_str, val);
-
-out_free:
- free(plugin);
- free(option_str);
- return -ENOMEM;
-}
-
-static void print_op_data(struct trace_seq *s, const char *name,
- const char *op)
-{
- if (op)
- trace_seq_printf(s, "%8s:\t%s\n", name, op);
-}
-
-/**
- * tep_plugin_print_options - print out the registered plugin options
- * @s: The trace_seq descriptor to write the plugin options into
- *
- * Writes a list of options into trace_seq @s.
- */
-void tep_plugin_print_options(struct trace_seq *s)
-{
- struct registered_plugin_options *reg;
- struct tep_plugin_option *op;
-
- for (reg = registered_options; reg; reg = reg->next) {
- if (reg != registered_options)
- trace_seq_printf(s, "============\n");
- for (op = reg->options; op->name; op++) {
- if (op != reg->options)
- trace_seq_printf(s, "------------\n");
- print_op_data(s, "file", op->file);
- print_op_data(s, "plugin", op->plugin_alias);
- print_op_data(s, "option", op->name);
- print_op_data(s, "desc", op->description);
- print_op_data(s, "value", op->value);
- trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
- }
- }
-}
-
-/**
- * tep_print_plugins - print out the list of plugins loaded
- * @s: the trace_seq descripter to write to
- * @prefix: The prefix string to add before listing the option name
- * @suffix: The suffix string ot append after the option name
- * @list: The list of plugins (usually returned by tep_load_plugins()
- *
- * Writes to the trace_seq @s the list of plugins (files) that is
- * returned by tep_load_plugins(). Use @prefix and @suffix for formating:
- * @prefix = " ", @suffix = "\n".
- */
-void tep_print_plugins(struct trace_seq *s,
- const char *prefix, const char *suffix,
- const struct tep_plugin_list *list)
-{
- while (list) {
- trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
- list = list->next;
- }
-}
-
-static void
-load_plugin(struct tep_handle *tep, const char *path,
- const char *file, void *data)
-{
- struct tep_plugin_list **plugin_list = data;
- struct tep_plugin_option *options;
- tep_plugin_load_func func;
- struct tep_plugin_list *list;
- const char *alias;
- char *plugin;
- void *handle;
- int ret;
-
- ret = asprintf(&plugin, "%s/%s", path, file);
- if (ret < 0) {
- warning("could not allocate plugin memory\n");
- return;
- }
-
- handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
- if (!handle) {
- warning("could not load plugin '%s'\n%s\n",
- plugin, dlerror());
- goto out_free;
- }
-
- alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME);
- if (!alias)
- alias = file;
-
- options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
- if (options) {
- while (options->name) {
- ret = update_option(alias, options);
- if (ret < 0)
- goto out_free;
- options++;
- }
- }
-
- func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
- if (!func) {
- warning("could not find func '%s' in plugin '%s'\n%s\n",
- TEP_PLUGIN_LOADER_NAME, plugin, dlerror());
- goto out_free;
- }
-
- list = malloc(sizeof(*list));
- if (!list) {
- warning("could not allocate plugin memory\n");
- goto out_free;
- }
-
- list->next = *plugin_list;
- list->handle = handle;
- list->name = plugin;
- *plugin_list = list;
-
- pr_stat("registering plugin: %s", plugin);
- func(tep);
- return;
-
- out_free:
- free(plugin);
-}
-
-static void
-load_plugins_dir(struct tep_handle *tep, const char *suffix,
- const char *path,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data)
-{
- struct dirent *dent;
- struct stat st;
- DIR *dir;
- int ret;
-
- ret = stat(path, &st);
- if (ret < 0)
- return;
-
- if (!S_ISDIR(st.st_mode))
- return;
-
- dir = opendir(path);
- if (!dir)
- return;
-
- while ((dent = readdir(dir))) {
- const char *name = dent->d_name;
-
- if (strcmp(name, ".") == 0 ||
- strcmp(name, "..") == 0)
- continue;
-
- /* Only load plugins that end in suffix */
- if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
- continue;
-
- load_plugin(tep, path, name, data);
- }
-
- closedir(dir);
-}
-
-/**
- * tep_load_plugins_hook - call a user specified callback to load a plugin
- * @tep: handler to traceevent context
- * @suffix: filter only plugin files with given suffix
- * @load_plugin: user specified callback, called for each plugin file
- * @data: custom context, passed to @load_plugin
- *
- * Searches for traceevent plugin files and calls @load_plugin for each
- * The order of plugins search is:
- * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
- * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
- * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
- * - In user's home: ~/.local/lib/traceevent/plugins/
- * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
- *
- */
-void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data)
-{
- struct tep_plugins_dir *dir = NULL;
- char *home;
- char *path;
- char *envdir;
- int ret;
-
- if (tep && tep->flags & TEP_DISABLE_PLUGINS)
- return;
-
- if (tep)
- dir = tep->plugins_dir;
- while (dir) {
- if (dir->prio == TEP_PLUGIN_FIRST)
- load_plugins_dir(tep, suffix, dir->path,
- load_plugin, data);
- dir = dir->next;
- }
-
- /*
- * If a system plugin directory was defined,
- * check that first.
- */
-#ifdef PLUGIN_DIR
- if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
- load_plugins_dir(tep, suffix, PLUGIN_DIR,
- load_plugin, data);
-#endif
-
- /*
- * Next let the environment-set plugin directory
- * override the system defaults.
- */
- envdir = getenv("TRACEEVENT_PLUGIN_DIR");
- if (envdir)
- load_plugins_dir(tep, suffix, envdir, load_plugin, data);
-
- /*
- * Now let the home directory override the environment
- * or system defaults.
- */
- home = getenv("HOME");
- if (!home)
- return;
-
- ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
- if (ret < 0) {
- warning("could not allocate plugin memory\n");
- return;
- }
-
- load_plugins_dir(tep, suffix, path, load_plugin, data);
-
- if (tep)
- dir = tep->plugins_dir;
- while (dir) {
- if (dir->prio == TEP_PLUGIN_LAST)
- load_plugins_dir(tep, suffix, dir->path,
- load_plugin, data);
- dir = dir->next;
- }
-
- free(path);
-}
-
-struct tep_plugin_list*
-tep_load_plugins(struct tep_handle *tep)
-{
- struct tep_plugin_list *list = NULL;
-
- tep_load_plugins_hook(tep, ".so", load_plugin, &list);
- return list;
-}
-
-/**
- * tep_add_plugin_path - Add a new plugin directory.
- * @tep: Trace event handler.
- * @path: Path to a directory. All plugin files in that
- * directory will be loaded.
- *@prio: Load priority of the plugins in that directory.
- *
- * Returns -1 in case of an error, 0 otherwise.
- */
-int tep_add_plugin_path(struct tep_handle *tep, char *path,
- enum tep_plugin_load_priority prio)
-{
- struct tep_plugins_dir *dir;
-
- if (!tep || !path)
- return -1;
-
- dir = calloc(1, sizeof(*dir));
- if (!dir)
- return -1;
-
- dir->path = strdup(path);
- if (!dir->path) {
- free(dir);
- return -1;
- }
- dir->prio = prio;
- dir->next = tep->plugins_dir;
- tep->plugins_dir = dir;
-
- return 0;
-}
-
-__hidden void free_tep_plugin_paths(struct tep_handle *tep)
-{
- struct tep_plugins_dir *dir;
-
- if (!tep)
- return;
-
- dir = tep->plugins_dir;
- while (dir) {
- tep->plugins_dir = tep->plugins_dir->next;
- free(dir->path);
- free(dir);
- dir = tep->plugins_dir;
- }
-}
-
-void
-tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
-{
- tep_plugin_unload_func func;
- struct tep_plugin_list *list;
-
- while (plugin_list) {
- list = plugin_list;
- plugin_list = list->next;
- func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
- if (func)
- func(tep);
- dlclose(list->handle);
- free(list->name);
- free(list);
- }
-}
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
deleted file mode 100644
index 0560b96a31d1..000000000000
--- a/tools/lib/traceevent/event-utils.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef __UTIL_H
-#define __UTIL_H
-
-#include <ctype.h>
-
-/* Can be overridden */
-void warning(const char *fmt, ...);
-void pr_stat(const char *fmt, ...);
-void vpr_stat(const char *fmt, va_list ap);
-
-/* Always available */
-void __warning(const char *fmt, ...);
-void __pr_stat(const char *fmt, ...);
-
-void __vwarning(const char *fmt, ...);
-void __vpr_stat(const char *fmt, ...);
-
-#define min(x, y) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- (void) (&_min1 == &_min2); \
- _min1 < _min2 ? _min1 : _min2; })
-
-static inline char *strim(char *string)
-{
- char *ret;
-
- if (!string)
- return NULL;
- while (*string) {
- if (!isspace(*string))
- break;
- string++;
- }
- ret = string;
-
- string = ret + strlen(ret) - 1;
- while (string > ret) {
- if (!isspace(*string))
- break;
- string--;
- }
- string[1] = 0;
-
- return ret;
-}
-
-static inline int has_text(const char *text)
-{
- if (!text)
- return 0;
-
- while (*text) {
- if (!isspace(*text))
- return 1;
- text++;
- }
-
- return 0;
-}
-
-#endif
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
deleted file mode 100644
index f1640d651c8a..000000000000
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ /dev/null
@@ -1,809 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "kbuffer.h"
-
-#define MISSING_EVENTS (1UL << 31)
-#define MISSING_STORED (1UL << 30)
-
-#define COMMIT_MASK ((1 << 27) - 1)
-
-enum {
- KBUFFER_FL_HOST_BIG_ENDIAN = (1<<0),
- KBUFFER_FL_BIG_ENDIAN = (1<<1),
- KBUFFER_FL_LONG_8 = (1<<2),
- KBUFFER_FL_OLD_FORMAT = (1<<3),
-};
-
-#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
-
-/** kbuffer
- * @timestamp - timestamp of current event
- * @lost_events - # of lost events between this subbuffer and previous
- * @flags - special flags of the kbuffer
- * @subbuffer - pointer to the sub-buffer page
- * @data - pointer to the start of data on the sub-buffer page
- * @index - index from @data to the @curr event data
- * @curr - offset from @data to the start of current event
- * (includes metadata)
- * @next - offset from @data to the start of next event
- * @size - The size of data on @data
- * @start - The offset from @subbuffer where @data lives
- *
- * @read_4 - Function to read 4 raw bytes (may swap)
- * @read_8 - Function to read 8 raw bytes (may swap)
- * @read_long - Function to read a long word (4 or 8 bytes with needed swap)
- */
-struct kbuffer {
- unsigned long long timestamp;
- long long lost_events;
- unsigned long flags;
- void *subbuffer;
- void *data;
- unsigned int index;
- unsigned int curr;
- unsigned int next;
- unsigned int size;
- unsigned int start;
-
- unsigned int (*read_4)(void *ptr);
- unsigned long long (*read_8)(void *ptr);
- unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
- int (*next_event)(struct kbuffer *kbuf);
-};
-
-static void *zmalloc(size_t size)
-{
- return calloc(1, size);
-}
-
-static int host_is_bigendian(void)
-{
- unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
- unsigned int *ptr;
-
- ptr = (unsigned int *)str;
- return *ptr == 0x01020304;
-}
-
-static int do_swap(struct kbuffer *kbuf)
-{
- return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
- ENDIAN_MASK;
-}
-
-static unsigned long long __read_8(void *ptr)
-{
- unsigned long long data = *(unsigned long long *)ptr;
-
- return data;
-}
-
-static unsigned long long __read_8_sw(void *ptr)
-{
- unsigned long long data = *(unsigned long long *)ptr;
- unsigned long long swap;
-
- swap = ((data & 0xffULL) << 56) |
- ((data & (0xffULL << 8)) << 40) |
- ((data & (0xffULL << 16)) << 24) |
- ((data & (0xffULL << 24)) << 8) |
- ((data & (0xffULL << 32)) >> 8) |
- ((data & (0xffULL << 40)) >> 24) |
- ((data & (0xffULL << 48)) >> 40) |
- ((data & (0xffULL << 56)) >> 56);
-
- return swap;
-}
-
-static unsigned int __read_4(void *ptr)
-{
- unsigned int data = *(unsigned int *)ptr;
-
- return data;
-}
-
-static unsigned int __read_4_sw(void *ptr)
-{
- unsigned int data = *(unsigned int *)ptr;
- unsigned int swap;
-
- swap = ((data & 0xffULL) << 24) |
- ((data & (0xffULL << 8)) << 8) |
- ((data & (0xffULL << 16)) >> 8) |
- ((data & (0xffULL << 24)) >> 24);
-
- return swap;
-}
-
-static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
-{
- return kbuf->read_8(ptr);
-}
-
-static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
-{
- return kbuf->read_4(ptr);
-}
-
-static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
-{
- return kbuf->read_8(ptr);
-}
-
-static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
-{
- return kbuf->read_4(ptr);
-}
-
-static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
-{
- return kbuf->read_long(kbuf, ptr);
-}
-
-static int calc_index(struct kbuffer *kbuf, void *ptr)
-{
- return (unsigned long)ptr - (unsigned long)kbuf->data;
-}
-
-static int __next_event(struct kbuffer *kbuf);
-
-/**
- * kbuffer_alloc - allocat a new kbuffer
- * @size; enum to denote size of word
- * @endian: enum to denote endianness
- *
- * Allocates and returns a new kbuffer.
- */
-struct kbuffer *
-kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
-{
- struct kbuffer *kbuf;
- int flags = 0;
-
- switch (size) {
- case KBUFFER_LSIZE_4:
- break;
- case KBUFFER_LSIZE_8:
- flags |= KBUFFER_FL_LONG_8;
- break;
- default:
- return NULL;
- }
-
- switch (endian) {
- case KBUFFER_ENDIAN_LITTLE:
- break;
- case KBUFFER_ENDIAN_BIG:
- flags |= KBUFFER_FL_BIG_ENDIAN;
- break;
- default:
- return NULL;
- }
-
- kbuf = zmalloc(sizeof(*kbuf));
- if (!kbuf)
- return NULL;
-
- kbuf->flags = flags;
-
- if (host_is_bigendian())
- kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
-
- if (do_swap(kbuf)) {
- kbuf->read_8 = __read_8_sw;
- kbuf->read_4 = __read_4_sw;
- } else {
- kbuf->read_8 = __read_8;
- kbuf->read_4 = __read_4;
- }
-
- if (kbuf->flags & KBUFFER_FL_LONG_8)
- kbuf->read_long = __read_long_8;
- else
- kbuf->read_long = __read_long_4;
-
- /* May be changed by kbuffer_set_old_format() */
- kbuf->next_event = __next_event;
-
- return kbuf;
-}
-
-/** kbuffer_free - free an allocated kbuffer
- * @kbuf: The kbuffer to free
- *
- * Can take NULL as a parameter.
- */
-void kbuffer_free(struct kbuffer *kbuf)
-{
- free(kbuf);
-}
-
-static unsigned int type4host(struct kbuffer *kbuf,
- unsigned int type_len_ts)
-{
- if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
- return (type_len_ts >> 29) & 3;
- else
- return type_len_ts & 3;
-}
-
-static unsigned int len4host(struct kbuffer *kbuf,
- unsigned int type_len_ts)
-{
- if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
- return (type_len_ts >> 27) & 7;
- else
- return (type_len_ts >> 2) & 7;
-}
-
-static unsigned int type_len4host(struct kbuffer *kbuf,
- unsigned int type_len_ts)
-{
- if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
- return (type_len_ts >> 27) & ((1 << 5) - 1);
- else
- return type_len_ts & ((1 << 5) - 1);
-}
-
-static unsigned int ts4host(struct kbuffer *kbuf,
- unsigned int type_len_ts)
-{
- if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
- return type_len_ts & ((1 << 27) - 1);
- else
- return type_len_ts >> 5;
-}
-
-/*
- * Linux 2.6.30 and earlier (not much ealier) had a different
- * ring buffer format. It should be obsolete, but we handle it anyway.
- */
-enum old_ring_buffer_type {
- OLD_RINGBUF_TYPE_PADDING,
- OLD_RINGBUF_TYPE_TIME_EXTEND,
- OLD_RINGBUF_TYPE_TIME_STAMP,
- OLD_RINGBUF_TYPE_DATA,
-};
-
-static unsigned int old_update_pointers(struct kbuffer *kbuf)
-{
- unsigned long long extend;
- unsigned int type_len_ts;
- unsigned int type;
- unsigned int len;
- unsigned int delta;
- unsigned int length;
- void *ptr = kbuf->data + kbuf->curr;
-
- type_len_ts = read_4(kbuf, ptr);
- ptr += 4;
-
- type = type4host(kbuf, type_len_ts);
- len = len4host(kbuf, type_len_ts);
- delta = ts4host(kbuf, type_len_ts);
-
- switch (type) {
- case OLD_RINGBUF_TYPE_PADDING:
- kbuf->next = kbuf->size;
- return 0;
-
- case OLD_RINGBUF_TYPE_TIME_EXTEND:
- extend = read_4(kbuf, ptr);
- extend <<= TS_SHIFT;
- extend += delta;
- delta = extend;
- ptr += 4;
- length = 0;
- break;
-
- case OLD_RINGBUF_TYPE_TIME_STAMP:
- /* should never happen! */
- kbuf->curr = kbuf->size;
- kbuf->next = kbuf->size;
- kbuf->index = kbuf->size;
- return -1;
- default:
- if (len)
- length = len * 4;
- else {
- length = read_4(kbuf, ptr);
- length -= 4;
- ptr += 4;
- }
- break;
- }
-
- kbuf->timestamp += delta;
- kbuf->index = calc_index(kbuf, ptr);
- kbuf->next = kbuf->index + length;
-
- return type;
-}
-
-static int __old_next_event(struct kbuffer *kbuf)
-{
- int type;
-
- do {
- kbuf->curr = kbuf->next;
- if (kbuf->next >= kbuf->size)
- return -1;
- type = old_update_pointers(kbuf);
- } while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
-
- return 0;
-}
-
-static unsigned int
-translate_data(struct kbuffer *kbuf, void *data, void **rptr,
- unsigned long long *delta, int *length)
-{
- unsigned long long extend;
- unsigned int type_len_ts;
- unsigned int type_len;
-
- type_len_ts = read_4(kbuf, data);
- data += 4;
-
- type_len = type_len4host(kbuf, type_len_ts);
- *delta = ts4host(kbuf, type_len_ts);
-
- switch (type_len) {
- case KBUFFER_TYPE_PADDING:
- *length = read_4(kbuf, data);
- break;
-
- case KBUFFER_TYPE_TIME_EXTEND:
- case KBUFFER_TYPE_TIME_STAMP:
- extend = read_4(kbuf, data);
- data += 4;
- extend <<= TS_SHIFT;
- extend += *delta;
- *delta = extend;
- *length = 0;
- break;
-
- case 0:
- *length = read_4(kbuf, data) - 4;
- *length = (*length + 3) & ~3;
- data += 4;
- break;
- default:
- *length = type_len * 4;
- break;
- }
-
- *rptr = data;
-
- return type_len;
-}
-
-static unsigned int update_pointers(struct kbuffer *kbuf)
-{
- unsigned long long delta;
- unsigned int type_len;
- int length;
- void *ptr = kbuf->data + kbuf->curr;
-
- type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
-
- if (type_len == KBUFFER_TYPE_TIME_STAMP)
- kbuf->timestamp = delta;
- else
- kbuf->timestamp += delta;
-
- kbuf->index = calc_index(kbuf, ptr);
- kbuf->next = kbuf->index + length;
-
- return type_len;
-}
-
-/**
- * kbuffer_translate_data - read raw data to get a record
- * @swap: Set to 1 if bytes in words need to be swapped when read
- * @data: The raw data to read
- * @size: Address to store the size of the event data.
- *
- * Returns a pointer to the event data. To determine the entire
- * record size (record metadata + data) just add the difference between
- * @data and the returned value to @size.
- */
-void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
-{
- unsigned long long delta;
- struct kbuffer kbuf;
- int type_len;
- int length;
- void *ptr;
-
- if (swap) {
- kbuf.read_8 = __read_8_sw;
- kbuf.read_4 = __read_4_sw;
- kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
- } else {
- kbuf.read_8 = __read_8;
- kbuf.read_4 = __read_4;
- kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
- }
-
- type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
- switch (type_len) {
- case KBUFFER_TYPE_PADDING:
- case KBUFFER_TYPE_TIME_EXTEND:
- case KBUFFER_TYPE_TIME_STAMP:
- return NULL;
- }
-
- *size = length;
-
- return ptr;
-}
-
-static int __next_event(struct kbuffer *kbuf)
-{
- int type;
-
- do {
- kbuf->curr = kbuf->next;
- if (kbuf->next >= kbuf->size)
- return -1;
- type = update_pointers(kbuf);
- } while (type == KBUFFER_TYPE_TIME_EXTEND ||
- type == KBUFFER_TYPE_TIME_STAMP ||
- type == KBUFFER_TYPE_PADDING);
-
- return 0;
-}
-
-static int next_event(struct kbuffer *kbuf)
-{
- return kbuf->next_event(kbuf);
-}
-
-/**
- * kbuffer_next_event - increment the current pointer
- * @kbuf: The kbuffer to read
- * @ts: Address to store the next record's timestamp (may be NULL to ignore)
- *
- * Increments the pointers into the subbuffer of the kbuffer to point to the
- * next event so that the next kbuffer_read_event() will return a
- * new event.
- *
- * Returns the data of the next event if a new event exists on the subbuffer,
- * NULL otherwise.
- */
-void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
-{
- int ret;
-
- if (!kbuf || !kbuf->subbuffer)
- return NULL;
-
- ret = next_event(kbuf);
- if (ret < 0)
- return NULL;
-
- if (ts)
- *ts = kbuf->timestamp;
-
- return kbuf->data + kbuf->index;
-}
-
-/**
- * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
- * @kbuf: The kbuffer to load
- * @subbuffer: The subbuffer to load into @kbuf.
- *
- * Load a new subbuffer (page) into @kbuf. This will reset all
- * the pointers and update the @kbuf timestamp. The next read will
- * return the first event on @subbuffer.
- *
- * Returns 0 on succes, -1 otherwise.
- */
-int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
-{
- unsigned long long flags;
- void *ptr = subbuffer;
-
- if (!kbuf || !subbuffer)
- return -1;
-
- kbuf->subbuffer = subbuffer;
-
- kbuf->timestamp = read_8(kbuf, ptr);
- ptr += 8;
-
- kbuf->curr = 0;
-
- if (kbuf->flags & KBUFFER_FL_LONG_8)
- kbuf->start = 16;
- else
- kbuf->start = 12;
-
- kbuf->data = subbuffer + kbuf->start;
-
- flags = read_long(kbuf, ptr);
- kbuf->size = (unsigned int)flags & COMMIT_MASK;
-
- if (flags & MISSING_EVENTS) {
- if (flags & MISSING_STORED) {
- ptr = kbuf->data + kbuf->size;
- kbuf->lost_events = read_long(kbuf, ptr);
- } else
- kbuf->lost_events = -1;
- } else
- kbuf->lost_events = 0;
-
- kbuf->index = 0;
- kbuf->next = 0;
-
- next_event(kbuf);
-
- return 0;
-}
-
-/**
- * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer
- * @kbuf: The kbuffer to load
- * @subbuf: The subbuffer to read from.
- *
- * Return the timestamp from a subbuffer.
- */
-unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf)
-{
- return kbuf->read_8(subbuf);
-}
-
-/**
- * kbuffer_ptr_delta - read the delta field from a record
- * @kbuf: The kbuffer to load
- * @ptr: The record in the buffe.
- *
- * Return the timestamp delta from a record
- */
-unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr)
-{
- unsigned int type_len_ts;
-
- type_len_ts = read_4(kbuf, ptr);
- return ts4host(kbuf, type_len_ts);
-}
-
-
-/**
- * kbuffer_read_event - read the next event in the kbuffer subbuffer
- * @kbuf: The kbuffer to read from
- * @ts: The address to store the timestamp of the event (may be NULL to ignore)
- *
- * Returns a pointer to the data part of the current event.
- * NULL if no event is left on the subbuffer.
- */
-void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
-{
- if (!kbuf || !kbuf->subbuffer)
- return NULL;
-
- if (kbuf->curr >= kbuf->size)
- return NULL;
-
- if (ts)
- *ts = kbuf->timestamp;
- return kbuf->data + kbuf->index;
-}
-
-/**
- * kbuffer_timestamp - Return the timestamp of the current event
- * @kbuf: The kbuffer to read from
- *
- * Returns the timestamp of the current (next) event.
- */
-unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
-{
- return kbuf->timestamp;
-}
-
-/**
- * kbuffer_read_at_offset - read the event that is at offset
- * @kbuf: The kbuffer to read from
- * @offset: The offset into the subbuffer
- * @ts: The address to store the timestamp of the event (may be NULL to ignore)
- *
- * The @offset must be an index from the @kbuf subbuffer beginning.
- * If @offset is bigger than the stored subbuffer, NULL will be returned.
- *
- * Returns the data of the record that is at @offset. Note, @offset does
- * not need to be the start of the record, the offset just needs to be
- * in the record (or beginning of it).
- *
- * Note, the kbuf timestamp and pointers are updated to the
- * returned record. That is, kbuffer_read_event() will return the same
- * data and timestamp, and kbuffer_next_event() will increment from
- * this record.
- */
-void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
- unsigned long long *ts)
-{
- void *data;
-
- if (offset < kbuf->start)
- offset = 0;
- else
- offset -= kbuf->start;
-
- /* Reset the buffer */
- kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
- data = kbuffer_read_event(kbuf, ts);
-
- while (kbuf->curr < offset) {
- data = kbuffer_next_event(kbuf, ts);
- if (!data)
- break;
- }
-
- return data;
-}
-
-/**
- * kbuffer_subbuffer_size - the size of the loaded subbuffer
- * @kbuf: The kbuffer to read from
- *
- * Returns the size of the subbuffer. Note, this size is
- * where the last event resides. The stored subbuffer may actually be
- * bigger due to padding and such.
- */
-int kbuffer_subbuffer_size(struct kbuffer *kbuf)
-{
- return kbuf->size;
-}
-
-/**
- * kbuffer_curr_index - Return the index of the record
- * @kbuf: The kbuffer to read from
- *
- * Returns the index from the start of the data part of
- * the subbuffer to the current location. Note this is not
- * from the start of the subbuffer. An index of zero will
- * point to the first record. Use kbuffer_curr_offset() for
- * the actually offset (that can be used by kbuffer_read_at_offset())
- */
-int kbuffer_curr_index(struct kbuffer *kbuf)
-{
- return kbuf->curr;
-}
-
-/**
- * kbuffer_curr_offset - Return the offset of the record
- * @kbuf: The kbuffer to read from
- *
- * Returns the offset from the start of the subbuffer to the
- * current location.
- */
-int kbuffer_curr_offset(struct kbuffer *kbuf)
-{
- return kbuf->curr + kbuf->start;
-}
-
-/**
- * kbuffer_event_size - return the size of the event data
- * @kbuf: The kbuffer to read
- *
- * Returns the size of the event data (the payload not counting
- * the meta data of the record) of the current event.
- */
-int kbuffer_event_size(struct kbuffer *kbuf)
-{
- return kbuf->next - kbuf->index;
-}
-
-/**
- * kbuffer_curr_size - return the size of the entire record
- * @kbuf: The kbuffer to read
- *
- * Returns the size of the entire record (meta data and payload)
- * of the current event.
- */
-int kbuffer_curr_size(struct kbuffer *kbuf)
-{
- return kbuf->next - kbuf->curr;
-}
-
-/**
- * kbuffer_missed_events - return the # of missed events from last event.
- * @kbuf: The kbuffer to read from
- *
- * Returns the # of missed events (if recorded) before the current
- * event. Note, only events on the beginning of a subbuffer can
- * have missed events, all other events within the buffer will be
- * zero.
- */
-int kbuffer_missed_events(struct kbuffer *kbuf)
-{
- /* Only the first event can have missed events */
- if (kbuf->curr)
- return 0;
-
- return kbuf->lost_events;
-}
-
-/**
- * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
- * @kbuf: The kbuffer to set
- *
- * This is obsolete (or should be). The first kernels to use the
- * new ring buffer had a slightly different ring buffer format
- * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
- * but should not be counted on in the future.
- */
-void kbuffer_set_old_format(struct kbuffer *kbuf)
-{
- kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
-
- kbuf->next_event = __old_next_event;
-}
-
-/**
- * kbuffer_start_of_data - return offset of where data starts on subbuffer
- * @kbuf: The kbuffer
- *
- * Returns the location on the subbuffer where the data starts.
- */
-int kbuffer_start_of_data(struct kbuffer *kbuf)
-{
- return kbuf->start;
-}
-
-/**
- * kbuffer_raw_get - get raw buffer info
- * @kbuf: The kbuffer
- * @subbuf: Start of mapped subbuffer
- * @info: Info descriptor to fill in
- *
- * For debugging. This can return internals of the ring buffer.
- * Expects to have info->next set to what it will read.
- * The type, length and timestamp delta will be filled in, and
- * @info->next will be updated to the next element.
- * The @subbuf is used to know if the info is passed the end of
- * data and NULL will be returned if it is.
- */
-struct kbuffer_raw_info *
-kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info)
-{
- unsigned long long flags;
- unsigned long long delta;
- unsigned int type_len;
- unsigned int size;
- int start;
- int length;
- void *ptr = info->next;
-
- if (!kbuf || !subbuf)
- return NULL;
-
- if (kbuf->flags & KBUFFER_FL_LONG_8)
- start = 16;
- else
- start = 12;
-
- flags = read_long(kbuf, subbuf + 8);
- size = (unsigned int)flags & COMMIT_MASK;
-
- if (ptr < subbuf || ptr >= subbuf + start + size)
- return NULL;
-
- type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
-
- info->next = ptr + length;
-
- info->type = type_len;
- info->delta = delta;
- info->length = length;
-
- return info;
-}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
deleted file mode 100644
index a2b522093cfd..000000000000
--- a/tools/lib/traceevent/kbuffer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef _KBUFFER_H
-#define _KBUFFER_H
-
-#ifndef TS_SHIFT
-#define TS_SHIFT 27
-#endif
-
-enum kbuffer_endian {
- KBUFFER_ENDIAN_BIG,
- KBUFFER_ENDIAN_LITTLE,
-};
-
-enum kbuffer_long_size {
- KBUFFER_LSIZE_4,
- KBUFFER_LSIZE_8,
-};
-
-enum {
- KBUFFER_TYPE_PADDING = 29,
- KBUFFER_TYPE_TIME_EXTEND = 30,
- KBUFFER_TYPE_TIME_STAMP = 31,
-};
-
-struct kbuffer;
-
-struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
-void kbuffer_free(struct kbuffer *kbuf);
-int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
-void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
-void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
-unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
-unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf);
-unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr);
-
-void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
-
-void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
-
-int kbuffer_curr_index(struct kbuffer *kbuf);
-
-int kbuffer_curr_offset(struct kbuffer *kbuf);
-int kbuffer_curr_size(struct kbuffer *kbuf);
-int kbuffer_event_size(struct kbuffer *kbuf);
-int kbuffer_missed_events(struct kbuffer *kbuf);
-int kbuffer_subbuffer_size(struct kbuffer *kbuf);
-
-void kbuffer_set_old_format(struct kbuffer *kbuf);
-int kbuffer_start_of_data(struct kbuffer *kbuf);
-
-/* Debugging */
-
-struct kbuffer_raw_info {
- int type;
- int length;
- unsigned long long delta;
- void *next;
-};
-
-/* Read raw data */
-struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf,
- struct kbuffer_raw_info *info);
-
-#endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/libtraceevent.pc.template b/tools/lib/traceevent/libtraceevent.pc.template
deleted file mode 100644
index 86384fcd57f1..000000000000
--- a/tools/lib/traceevent/libtraceevent.pc.template
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=INSTALL_PREFIX
-libdir=LIB_DIR
-includedir=HEADER_DIR
-
-Name: libtraceevent
-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-Description: Linux kernel trace event library
-Version: LIB_VERSION
-Cflags: -I${includedir}
-Libs: -L${libdir} -ltraceevent
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
deleted file mode 100644
index 368826bb5a57..000000000000
--- a/tools/lib/traceevent/parse-filter.c
+++ /dev/null
@@ -1,2278 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <sys/types.h>
-
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-
-#define COMM "COMM"
-#define CPU "CPU"
-
-static struct tep_format_field comm = {
- .name = "COMM",
-};
-
-static struct tep_format_field cpu = {
- .name = "CPU",
-};
-
-struct event_list {
- struct event_list *next;
- struct tep_event *event;
-};
-
-static void show_error(char *error_buf, const char *fmt, ...)
-{
- unsigned long long index;
- const char *input;
- va_list ap;
- int len;
- int i;
-
- input = get_input_buf();
- index = get_input_buf_ptr();
- len = input ? strlen(input) : 0;
-
- if (len) {
- strcpy(error_buf, input);
- error_buf[len] = '\n';
- for (i = 1; i < len && i < index; i++)
- error_buf[len+i] = ' ';
- error_buf[len + i] = '^';
- error_buf[len + i + 1] = '\n';
- len += i+2;
- }
-
- va_start(ap, fmt);
- vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap);
- va_end(ap);
-}
-
-static enum tep_event_type filter_read_token(char **tok)
-{
- enum tep_event_type type;
- char *token = NULL;
-
- do {
- free_token(token);
- type = read_token(&token);
- } while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
-
- /* If token is = or ! check to see if the next char is ~ */
- if (token &&
- (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
- peek_char() == '~') {
- /* append it */
- *tok = malloc(3);
- if (*tok == NULL) {
- free_token(token);
- return TEP_EVENT_ERROR;
- }
- sprintf(*tok, "%c%c", *token, '~');
- free_token(token);
- /* Now remove the '~' from the buffer */
- read_token(&token);
- free_token(token);
- } else
- *tok = token;
-
- return type;
-}
-
-static int filter_cmp(const void *a, const void *b)
-{
- const struct tep_filter_type *ea = a;
- const struct tep_filter_type *eb = b;
-
- if (ea->event_id < eb->event_id)
- return -1;
-
- if (ea->event_id > eb->event_id)
- return 1;
-
- return 0;
-}
-
-static struct tep_filter_type *
-find_filter_type(struct tep_event_filter *filter, int id)
-{
- struct tep_filter_type *filter_type;
- struct tep_filter_type key;
-
- key.event_id = id;
-
- filter_type = bsearch(&key, filter->event_filters,
- filter->filters,
- sizeof(*filter->event_filters),
- filter_cmp);
-
- return filter_type;
-}
-
-static struct tep_filter_type *
-add_filter_type(struct tep_event_filter *filter, int id)
-{
- struct tep_filter_type *filter_type;
- int i;
-
- filter_type = find_filter_type(filter, id);
- if (filter_type)
- return filter_type;
-
- filter_type = realloc(filter->event_filters,
- sizeof(*filter->event_filters) *
- (filter->filters + 1));
- if (!filter_type)
- return NULL;
-
- filter->event_filters = filter_type;
-
- for (i = 0; i < filter->filters; i++) {
- if (filter->event_filters[i].event_id > id)
- break;
- }
-
- if (i < filter->filters)
- memmove(&filter->event_filters[i+1],
- &filter->event_filters[i],
- sizeof(*filter->event_filters) *
- (filter->filters - i));
-
- filter_type = &filter->event_filters[i];
- filter_type->event_id = id;
- filter_type->event = tep_find_event(filter->tep, id);
- filter_type->filter = NULL;
-
- filter->filters++;
-
- return filter_type;
-}
-
-/**
- * tep_filter_alloc - create a new event filter
- * @tep: The tep that this filter is associated with
- */
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep)
-{
- struct tep_event_filter *filter;
-
- filter = malloc(sizeof(*filter));
- if (filter == NULL)
- return NULL;
-
- memset(filter, 0, sizeof(*filter));
- filter->tep = tep;
- tep_ref(tep);
-
- return filter;
-}
-
-static struct tep_filter_arg *allocate_arg(void)
-{
- return calloc(1, sizeof(struct tep_filter_arg));
-}
-
-static void free_arg(struct tep_filter_arg *arg)
-{
- if (!arg)
- return;
-
- switch (arg->type) {
- case TEP_FILTER_ARG_NONE:
- case TEP_FILTER_ARG_BOOLEAN:
- break;
-
- case TEP_FILTER_ARG_NUM:
- free_arg(arg->num.left);
- free_arg(arg->num.right);
- break;
-
- case TEP_FILTER_ARG_EXP:
- free_arg(arg->exp.left);
- free_arg(arg->exp.right);
- break;
-
- case TEP_FILTER_ARG_STR:
- free(arg->str.val);
- regfree(&arg->str.reg);
- free(arg->str.buffer);
- break;
-
- case TEP_FILTER_ARG_VALUE:
- if (arg->value.type == TEP_FILTER_STRING ||
- arg->value.type == TEP_FILTER_CHAR)
- free(arg->value.str);
- break;
-
- case TEP_FILTER_ARG_OP:
- free_arg(arg->op.left);
- free_arg(arg->op.right);
- default:
- break;
- }
-
- free(arg);
-}
-
-static int add_event(struct event_list **events,
- struct tep_event *event)
-{
- struct event_list *list;
-
- list = malloc(sizeof(*list));
- if (list == NULL)
- return -1;
-
- list->next = *events;
- *events = list;
- list->event = event;
- return 0;
-}
-
-static int event_match(struct tep_event *event,
- regex_t *sreg, regex_t *ereg)
-{
- if (sreg) {
- return !regexec(sreg, event->system, 0, NULL, 0) &&
- !regexec(ereg, event->name, 0, NULL, 0);
- }
-
- return !regexec(ereg, event->system, 0, NULL, 0) ||
- !regexec(ereg, event->name, 0, NULL, 0);
-}
-
-static enum tep_errno
-find_event(struct tep_handle *tep, struct event_list **events,
- char *sys_name, char *event_name)
-{
- struct tep_event *event;
- regex_t ereg;
- regex_t sreg;
- int match = 0;
- int fail = 0;
- char *reg;
- int ret;
- int i;
-
- if (!event_name) {
- /* if no name is given, then swap sys and name */
- event_name = sys_name;
- sys_name = NULL;
- }
-
- ret = asprintf(&reg, "^%s$", event_name);
- if (ret < 0)
- return TEP_ERRNO__MEM_ALLOC_FAILED;
-
- ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
- free(reg);
-
- if (ret)
- return TEP_ERRNO__INVALID_EVENT_NAME;
-
- if (sys_name) {
- ret = asprintf(&reg, "^%s$", sys_name);
- if (ret < 0) {
- regfree(&ereg);
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
- free(reg);
- if (ret) {
- regfree(&ereg);
- return TEP_ERRNO__INVALID_EVENT_NAME;
- }
- }
-
- for (i = 0; i < tep->nr_events; i++) {
- event = tep->events[i];
- if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
- match = 1;
- if (add_event(events, event) < 0) {
- fail = 1;
- break;
- }
- }
- }
-
- regfree(&ereg);
- if (sys_name)
- regfree(&sreg);
-
- if (!match)
- return TEP_ERRNO__EVENT_NOT_FOUND;
- if (fail)
- return TEP_ERRNO__MEM_ALLOC_FAILED;
-
- return 0;
-}
-
-static void free_events(struct event_list *events)
-{
- struct event_list *event;
-
- while (events) {
- event = events;
- events = events->next;
- free(event);
- }
-}
-
-static enum tep_errno
-create_arg_item(struct tep_event *event, const char *token,
- enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
-{
- struct tep_format_field *field;
- struct tep_filter_arg *arg;
-
- arg = allocate_arg();
- if (arg == NULL) {
- show_error(error_str, "failed to allocate filter arg");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- switch (type) {
-
- case TEP_EVENT_SQUOTE:
- case TEP_EVENT_DQUOTE:
- arg->type = TEP_FILTER_ARG_VALUE;
- arg->value.type =
- type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR;
- arg->value.str = strdup(token);
- if (!arg->value.str) {
- free_arg(arg);
- show_error(error_str, "failed to allocate string filter arg");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
- break;
- case TEP_EVENT_ITEM:
- /* if it is a number, then convert it */
- if (isdigit(token[0])) {
- arg->type = TEP_FILTER_ARG_VALUE;
- arg->value.type = TEP_FILTER_NUMBER;
- arg->value.val = strtoull(token, NULL, 0);
- break;
- }
- /* Consider this a field */
- field = tep_find_any_field(event, token);
- if (!field) {
- /* If token is 'COMM' or 'CPU' then it is special */
- if (strcmp(token, COMM) == 0) {
- field = &comm;
- } else if (strcmp(token, CPU) == 0) {
- field = &cpu;
- } else {
- /* not a field, Make it false */
- arg->type = TEP_FILTER_ARG_BOOLEAN;
- arg->boolean.value = TEP_FILTER_FALSE;
- break;
- }
- }
- arg->type = TEP_FILTER_ARG_FIELD;
- arg->field.field = field;
- break;
- default:
- free_arg(arg);
- show_error(error_str, "expected a value but found %s", token);
- return TEP_ERRNO__UNEXPECTED_TYPE;
- }
- *parg = arg;
- return 0;
-}
-
-static struct tep_filter_arg *
-create_arg_op(enum tep_filter_op_type btype)
-{
- struct tep_filter_arg *arg;
-
- arg = allocate_arg();
- if (!arg)
- return NULL;
-
- arg->type = TEP_FILTER_ARG_OP;
- arg->op.type = btype;
-
- return arg;
-}
-
-static struct tep_filter_arg *
-create_arg_exp(enum tep_filter_exp_type etype)
-{
- struct tep_filter_arg *arg;
-
- arg = allocate_arg();
- if (!arg)
- return NULL;
-
- arg->type = TEP_FILTER_ARG_EXP;
- arg->exp.type = etype;
-
- return arg;
-}
-
-static struct tep_filter_arg *
-create_arg_cmp(enum tep_filter_cmp_type ctype)
-{
- struct tep_filter_arg *arg;
-
- arg = allocate_arg();
- if (!arg)
- return NULL;
-
- /* Use NUM and change if necessary */
- arg->type = TEP_FILTER_ARG_NUM;
- arg->num.type = ctype;
-
- return arg;
-}
-
-static enum tep_errno
-add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str)
-{
- struct tep_filter_arg *left;
- char *str;
- int op_type;
- int ret;
-
- switch (op->type) {
- case TEP_FILTER_ARG_EXP:
- if (op->exp.right)
- goto out_fail;
- op->exp.right = arg;
- break;
-
- case TEP_FILTER_ARG_OP:
- if (op->op.right)
- goto out_fail;
- op->op.right = arg;
- break;
-
- case TEP_FILTER_ARG_NUM:
- if (op->op.right)
- goto out_fail;
- /*
- * The arg must be num, str, or field
- */
- switch (arg->type) {
- case TEP_FILTER_ARG_VALUE:
- case TEP_FILTER_ARG_FIELD:
- break;
- default:
- show_error(error_str, "Illegal rvalue");
- return TEP_ERRNO__ILLEGAL_RVALUE;
- }
-
- /*
- * Depending on the type, we may need to
- * convert this to a string or regex.
- */
- switch (arg->value.type) {
- case TEP_FILTER_CHAR:
- /*
- * A char should be converted to number if
- * the string is 1 byte, and the compare
- * is not a REGEX.
- */
- if (strlen(arg->value.str) == 1 &&
- op->num.type != TEP_FILTER_CMP_REGEX &&
- op->num.type != TEP_FILTER_CMP_NOT_REGEX) {
- arg->value.type = TEP_FILTER_NUMBER;
- goto do_int;
- }
- /* fall through */
- case TEP_FILTER_STRING:
-
- /* convert op to a string arg */
- op_type = op->num.type;
- left = op->num.left;
- str = arg->value.str;
-
- /* reset the op for the new field */
- memset(op, 0, sizeof(*op));
-
- /*
- * If left arg was a field not found then
- * NULL the entire op.
- */
- if (left->type == TEP_FILTER_ARG_BOOLEAN) {
- free_arg(left);
- free_arg(arg);
- op->type = TEP_FILTER_ARG_BOOLEAN;
- op->boolean.value = TEP_FILTER_FALSE;
- break;
- }
-
- /* Left arg must be a field */
- if (left->type != TEP_FILTER_ARG_FIELD) {
- show_error(error_str,
- "Illegal lvalue for string comparison");
- return TEP_ERRNO__ILLEGAL_LVALUE;
- }
-
- /* Make sure this is a valid string compare */
- switch (op_type) {
- case TEP_FILTER_CMP_EQ:
- op_type = TEP_FILTER_CMP_MATCH;
- break;
- case TEP_FILTER_CMP_NE:
- op_type = TEP_FILTER_CMP_NOT_MATCH;
- break;
-
- case TEP_FILTER_CMP_REGEX:
- case TEP_FILTER_CMP_NOT_REGEX:
- ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
- if (ret) {
- show_error(error_str,
- "RegEx '%s' did not compute",
- str);
- return TEP_ERRNO__INVALID_REGEX;
- }
- break;
- default:
- show_error(error_str,
- "Illegal comparison for string");
- return TEP_ERRNO__ILLEGAL_STRING_CMP;
- }
-
- op->type = TEP_FILTER_ARG_STR;
- op->str.type = op_type;
- op->str.field = left->field.field;
- op->str.val = strdup(str);
- if (!op->str.val) {
- show_error(error_str, "Failed to allocate string filter");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
- /*
- * Need a buffer to copy data for tests
- */
- op->str.buffer = malloc(op->str.field->size + 1);
- if (!op->str.buffer) {
- show_error(error_str, "Failed to allocate string filter");
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
- /* Null terminate this buffer */
- op->str.buffer[op->str.field->size] = 0;
-
- /* We no longer have left or right args */
- free_arg(arg);
- free_arg(left);
-
- break;
-
- case TEP_FILTER_NUMBER:
-
- do_int:
- switch (op->num.type) {
- case TEP_FILTER_CMP_REGEX:
- case TEP_FILTER_CMP_NOT_REGEX:
- show_error(error_str,
- "Op not allowed with integers");
- return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
-
- default:
- break;
- }
-
- /* numeric compare */
- op->num.right = arg;
- break;
- default:
- goto out_fail;
- }
- break;
- default:
- goto out_fail;
- }
-
- return 0;
-
- out_fail:
- show_error(error_str, "Syntax error");
- return TEP_ERRNO__SYNTAX_ERROR;
-}
-
-static struct tep_filter_arg *
-rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b)
-{
- struct tep_filter_arg *arg;
-
- arg = a->op.right;
- a->op.right = b;
- return arg;
-}
-
-static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg)
-{
- switch (op->type) {
- case TEP_FILTER_ARG_EXP:
- if (arg->type == TEP_FILTER_ARG_OP)
- arg = rotate_op_right(arg, op);
- op->exp.left = arg;
- break;
-
- case TEP_FILTER_ARG_OP:
- op->op.left = arg;
- break;
- case TEP_FILTER_ARG_NUM:
- if (arg->type == TEP_FILTER_ARG_OP)
- arg = rotate_op_right(arg, op);
-
- /* left arg of compares must be a field */
- if (arg->type != TEP_FILTER_ARG_FIELD &&
- arg->type != TEP_FILTER_ARG_BOOLEAN)
- return TEP_ERRNO__INVALID_ARG_TYPE;
- op->num.left = arg;
- break;
- default:
- return TEP_ERRNO__INVALID_ARG_TYPE;
- }
- return 0;
-}
-
-enum op_type {
- OP_NONE,
- OP_BOOL,
- OP_NOT,
- OP_EXP,
- OP_CMP,
-};
-
-static enum op_type process_op(const char *token,
- enum tep_filter_op_type *btype,
- enum tep_filter_cmp_type *ctype,
- enum tep_filter_exp_type *etype)
-{
- *btype = TEP_FILTER_OP_NOT;
- *etype = TEP_FILTER_EXP_NONE;
- *ctype = TEP_FILTER_CMP_NONE;
-
- if (strcmp(token, "&&") == 0)
- *btype = TEP_FILTER_OP_AND;
- else if (strcmp(token, "||") == 0)
- *btype = TEP_FILTER_OP_OR;
- else if (strcmp(token, "!") == 0)
- return OP_NOT;
-
- if (*btype != TEP_FILTER_OP_NOT)
- return OP_BOOL;
-
- /* Check for value expressions */
- if (strcmp(token, "+") == 0) {
- *etype = TEP_FILTER_EXP_ADD;
- } else if (strcmp(token, "-") == 0) {
- *etype = TEP_FILTER_EXP_SUB;
- } else if (strcmp(token, "*") == 0) {
- *etype = TEP_FILTER_EXP_MUL;
- } else if (strcmp(token, "/") == 0) {
- *etype = TEP_FILTER_EXP_DIV;
- } else if (strcmp(token, "%") == 0) {
- *etype = TEP_FILTER_EXP_MOD;
- } else if (strcmp(token, ">>") == 0) {
- *etype = TEP_FILTER_EXP_RSHIFT;
- } else if (strcmp(token, "<<") == 0) {
- *etype = TEP_FILTER_EXP_LSHIFT;
- } else if (strcmp(token, "&") == 0) {
- *etype = TEP_FILTER_EXP_AND;
- } else if (strcmp(token, "|") == 0) {
- *etype = TEP_FILTER_EXP_OR;
- } else if (strcmp(token, "^") == 0) {
- *etype = TEP_FILTER_EXP_XOR;
- } else if (strcmp(token, "~") == 0)
- *etype = TEP_FILTER_EXP_NOT;
-
- if (*etype != TEP_FILTER_EXP_NONE)
- return OP_EXP;
-
- /* Check for compares */
- if (strcmp(token, "==") == 0)
- *ctype = TEP_FILTER_CMP_EQ;
- else if (strcmp(token, "!=") == 0)
- *ctype = TEP_FILTER_CMP_NE;
- else if (strcmp(token, "<") == 0)
- *ctype = TEP_FILTER_CMP_LT;
- else if (strcmp(token, ">") == 0)
- *ctype = TEP_FILTER_CMP_GT;
- else if (strcmp(token, "<=") == 0)
- *ctype = TEP_FILTER_CMP_LE;
- else if (strcmp(token, ">=") == 0)
- *ctype = TEP_FILTER_CMP_GE;
- else if (strcmp(token, "=~") == 0)
- *ctype = TEP_FILTER_CMP_REGEX;
- else if (strcmp(token, "!~") == 0)
- *ctype = TEP_FILTER_CMP_NOT_REGEX;
- else
- return OP_NONE;
-
- return OP_CMP;
-}
-
-static int check_op_done(struct tep_filter_arg *arg)
-{
- switch (arg->type) {
- case TEP_FILTER_ARG_EXP:
- return arg->exp.right != NULL;
-
- case TEP_FILTER_ARG_OP:
- return arg->op.right != NULL;
-
- case TEP_FILTER_ARG_NUM:
- return arg->num.right != NULL;
-
- case TEP_FILTER_ARG_STR:
- /* A string conversion is always done */
- return 1;
-
- case TEP_FILTER_ARG_BOOLEAN:
- /* field not found, is ok */
- return 1;
-
- default:
- return 0;
- }
-}
-
-enum filter_vals {
- FILTER_VAL_NORM,
- FILTER_VAL_FALSE,
- FILTER_VAL_TRUE,
-};
-
-static enum tep_errno
-reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child,
- struct tep_filter_arg *arg, char *error_str)
-{
- struct tep_filter_arg *other_child;
- struct tep_filter_arg **ptr;
-
- if (parent->type != TEP_FILTER_ARG_OP &&
- arg->type != TEP_FILTER_ARG_OP) {
- show_error(error_str, "can not reparent other than OP");
- return TEP_ERRNO__REPARENT_NOT_OP;
- }
-
- /* Get the sibling */
- if (old_child->op.right == arg) {
- ptr = &old_child->op.right;
- other_child = old_child->op.left;
- } else if (old_child->op.left == arg) {
- ptr = &old_child->op.left;
- other_child = old_child->op.right;
- } else {
- show_error(error_str, "Error in reparent op, find other child");
- return TEP_ERRNO__REPARENT_FAILED;
- }
-
- /* Detach arg from old_child */
- *ptr = NULL;
-
- /* Check for root */
- if (parent == old_child) {
- free_arg(other_child);
- *parent = *arg;
- /* Free arg without recussion */
- free(arg);
- return 0;
- }
-
- if (parent->op.right == old_child)
- ptr = &parent->op.right;
- else if (parent->op.left == old_child)
- ptr = &parent->op.left;
- else {
- show_error(error_str, "Error in reparent op");
- return TEP_ERRNO__REPARENT_FAILED;
- }
-
- *ptr = arg;
-
- free_arg(old_child);
- return 0;
-}
-
-/* Returns either filter_vals (success) or tep_errno (failfure) */
-static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg,
- char *error_str)
-{
- int lval, rval;
-
- switch (arg->type) {
-
- /* bad case */
- case TEP_FILTER_ARG_BOOLEAN:
- return FILTER_VAL_FALSE + arg->boolean.value;
-
- /* good cases: */
- case TEP_FILTER_ARG_STR:
- case TEP_FILTER_ARG_VALUE:
- case TEP_FILTER_ARG_FIELD:
- return FILTER_VAL_NORM;
-
- case TEP_FILTER_ARG_EXP:
- lval = test_arg(arg, arg->exp.left, error_str);
- if (lval != FILTER_VAL_NORM)
- return lval;
- rval = test_arg(arg, arg->exp.right, error_str);
- if (rval != FILTER_VAL_NORM)
- return rval;
- return FILTER_VAL_NORM;
-
- case TEP_FILTER_ARG_NUM:
- lval = test_arg(arg, arg->num.left, error_str);
- if (lval != FILTER_VAL_NORM)
- return lval;
- rval = test_arg(arg, arg->num.right, error_str);
- if (rval != FILTER_VAL_NORM)
- return rval;
- return FILTER_VAL_NORM;
-
- case TEP_FILTER_ARG_OP:
- if (arg->op.type != TEP_FILTER_OP_NOT) {
- lval = test_arg(arg, arg->op.left, error_str);
- switch (lval) {
- case FILTER_VAL_NORM:
- break;
- case FILTER_VAL_TRUE:
- if (arg->op.type == TEP_FILTER_OP_OR)
- return FILTER_VAL_TRUE;
- rval = test_arg(arg, arg->op.right, error_str);
- if (rval != FILTER_VAL_NORM)
- return rval;
-
- return reparent_op_arg(parent, arg, arg->op.right,
- error_str);
-
- case FILTER_VAL_FALSE:
- if (arg->op.type == TEP_FILTER_OP_AND)
- return FILTER_VAL_FALSE;
- rval = test_arg(arg, arg->op.right, error_str);
- if (rval != FILTER_VAL_NORM)
- return rval;
-
- return reparent_op_arg(parent, arg, arg->op.right,
- error_str);
-
- default:
- return lval;
- }
- }
-
- rval = test_arg(arg, arg->op.right, error_str);
- switch (rval) {
- case FILTER_VAL_NORM:
- default:
- break;
-
- case FILTER_VAL_TRUE:
- if (arg->op.type == TEP_FILTER_OP_OR)
- return FILTER_VAL_TRUE;
- if (arg->op.type == TEP_FILTER_OP_NOT)
- return FILTER_VAL_FALSE;
-
- return reparent_op_arg(parent, arg, arg->op.left,
- error_str);
-
- case FILTER_VAL_FALSE:
- if (arg->op.type == TEP_FILTER_OP_AND)
- return FILTER_VAL_FALSE;
- if (arg->op.type == TEP_FILTER_OP_NOT)
- return FILTER_VAL_TRUE;
-
- return reparent_op_arg(parent, arg, arg->op.left,
- error_str);
- }
-
- return rval;
- default:
- show_error(error_str, "bad arg in filter tree");
- return TEP_ERRNO__BAD_FILTER_ARG;
- }
- return FILTER_VAL_NORM;
-}
-
-/* Remove any unknown event fields */
-static int collapse_tree(struct tep_filter_arg *arg,
- struct tep_filter_arg **arg_collapsed, char *error_str)
-{
- int ret;
-
- ret = test_arg(arg, arg, error_str);
- switch (ret) {
- case FILTER_VAL_NORM:
- break;
-
- case FILTER_VAL_TRUE:
- case FILTER_VAL_FALSE:
- free_arg(arg);
- arg = allocate_arg();
- if (arg) {
- arg->type = TEP_FILTER_ARG_BOOLEAN;
- arg->boolean.value = ret == FILTER_VAL_TRUE;
- } else {
- show_error(error_str, "Failed to allocate filter arg");
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- }
- break;
-
- default:
- /* test_arg() already set the error_str */
- free_arg(arg);
- arg = NULL;
- break;
- }
-
- *arg_collapsed = arg;
- return ret;
-}
-
-static enum tep_errno
-process_filter(struct tep_event *event, struct tep_filter_arg **parg,
- char *error_str, int not)
-{
- enum tep_event_type type;
- char *token = NULL;
- struct tep_filter_arg *current_op = NULL;
- struct tep_filter_arg *current_exp = NULL;
- struct tep_filter_arg *left_item = NULL;
- struct tep_filter_arg *arg = NULL;
- enum op_type op_type;
- enum tep_filter_op_type btype;
- enum tep_filter_exp_type etype;
- enum tep_filter_cmp_type ctype;
- enum tep_errno ret;
-
- *parg = NULL;
-
- do {
- free(token);
- type = filter_read_token(&token);
- switch (type) {
- case TEP_EVENT_SQUOTE:
- case TEP_EVENT_DQUOTE:
- case TEP_EVENT_ITEM:
- ret = create_arg_item(event, token, type, &arg, error_str);
- if (ret < 0)
- goto fail;
- if (!left_item)
- left_item = arg;
- else if (current_exp) {
- ret = add_right(current_exp, arg, error_str);
- if (ret < 0)
- goto fail;
- left_item = NULL;
- /* Not's only one one expression */
- if (not) {
- arg = NULL;
- if (current_op)
- goto fail_syntax;
- free(token);
- *parg = current_exp;
- return 0;
- }
- } else
- goto fail_syntax;
- arg = NULL;
- break;
-
- case TEP_EVENT_DELIM:
- if (*token == ',') {
- show_error(error_str, "Illegal token ','");
- ret = TEP_ERRNO__ILLEGAL_TOKEN;
- goto fail;
- }
-
- if (*token == '(') {
- if (left_item) {
- show_error(error_str,
- "Open paren can not come after item");
- ret = TEP_ERRNO__INVALID_PAREN;
- goto fail;
- }
- if (current_exp) {
- show_error(error_str,
- "Open paren can not come after expression");
- ret = TEP_ERRNO__INVALID_PAREN;
- goto fail;
- }
-
- ret = process_filter(event, &arg, error_str, 0);
- if (ret != TEP_ERRNO__UNBALANCED_PAREN) {
- if (ret == 0) {
- show_error(error_str,
- "Unbalanced number of '('");
- ret = TEP_ERRNO__UNBALANCED_PAREN;
- }
- goto fail;
- }
- ret = 0;
-
- /* A not wants just one expression */
- if (not) {
- if (current_op)
- goto fail_syntax;
- *parg = arg;
- return 0;
- }
-
- if (current_op)
- ret = add_right(current_op, arg, error_str);
- else
- current_exp = arg;
-
- if (ret < 0)
- goto fail;
-
- } else { /* ')' */
- if (!current_op && !current_exp)
- goto fail_syntax;
-
- /* Make sure everything is finished at this level */
- if (current_exp && !check_op_done(current_exp))
- goto fail_syntax;
- if (current_op && !check_op_done(current_op))
- goto fail_syntax;
-
- if (current_op)
- *parg = current_op;
- else
- *parg = current_exp;
- free(token);
- return TEP_ERRNO__UNBALANCED_PAREN;
- }
- break;
-
- case TEP_EVENT_OP:
- op_type = process_op(token, &btype, &ctype, &etype);
-
- /* All expect a left arg except for NOT */
- switch (op_type) {
- case OP_BOOL:
- /* Logic ops need a left expression */
- if (!current_exp && !current_op)
- goto fail_syntax;
- /* fall through */
- case OP_NOT:
- /* logic only processes ops and exp */
- if (left_item)
- goto fail_syntax;
- break;
- case OP_EXP:
- case OP_CMP:
- if (!left_item)
- goto fail_syntax;
- break;
- case OP_NONE:
- show_error(error_str,
- "Unknown op token %s", token);
- ret = TEP_ERRNO__UNKNOWN_TOKEN;
- goto fail;
- }
-
- ret = 0;
- switch (op_type) {
- case OP_BOOL:
- arg = create_arg_op(btype);
- if (arg == NULL)
- goto fail_alloc;
- if (current_op)
- ret = add_left(arg, current_op);
- else
- ret = add_left(arg, current_exp);
- current_op = arg;
- current_exp = NULL;
- break;
-
- case OP_NOT:
- arg = create_arg_op(btype);
- if (arg == NULL)
- goto fail_alloc;
- if (current_op)
- ret = add_right(current_op, arg, error_str);
- if (ret < 0)
- goto fail;
- current_exp = arg;
- ret = process_filter(event, &arg, error_str, 1);
- if (ret < 0)
- goto fail;
- ret = add_right(current_exp, arg, error_str);
- if (ret < 0)
- goto fail;
- break;
-
- case OP_EXP:
- case OP_CMP:
- if (op_type == OP_EXP)
- arg = create_arg_exp(etype);
- else
- arg = create_arg_cmp(ctype);
- if (arg == NULL)
- goto fail_alloc;
-
- if (current_op)
- ret = add_right(current_op, arg, error_str);
- if (ret < 0)
- goto fail;
- ret = add_left(arg, left_item);
- if (ret < 0) {
- arg = NULL;
- goto fail_syntax;
- }
- current_exp = arg;
- break;
- default:
- break;
- }
- arg = NULL;
- if (ret < 0)
- goto fail_syntax;
- break;
- case TEP_EVENT_NONE:
- break;
- case TEP_EVENT_ERROR:
- goto fail_alloc;
- default:
- goto fail_syntax;
- }
- } while (type != TEP_EVENT_NONE);
-
- if (!current_op && !current_exp)
- goto fail_syntax;
-
- if (!current_op)
- current_op = current_exp;
-
- ret = collapse_tree(current_op, parg, error_str);
- /* collapse_tree() may free current_op, and updates parg accordingly */
- current_op = NULL;
- if (ret < 0)
- goto fail;
-
- free(token);
- return 0;
-
- fail_alloc:
- show_error(error_str, "failed to allocate filter arg");
- ret = TEP_ERRNO__MEM_ALLOC_FAILED;
- goto fail;
- fail_syntax:
- show_error(error_str, "Syntax error");
- ret = TEP_ERRNO__SYNTAX_ERROR;
- fail:
- free_arg(current_op);
- free_arg(current_exp);
- free_arg(arg);
- free(token);
- return ret;
-}
-
-static enum tep_errno
-process_event(struct tep_event *event, const char *filter_str,
- struct tep_filter_arg **parg, char *error_str)
-{
- int ret;
-
- init_input_buf(filter_str, strlen(filter_str));
-
- ret = process_filter(event, parg, error_str, 0);
- if (ret < 0)
- return ret;
-
- /* If parg is NULL, then make it into FALSE */
- if (!*parg) {
- *parg = allocate_arg();
- if (*parg == NULL)
- return TEP_ERRNO__MEM_ALLOC_FAILED;
-
- (*parg)->type = TEP_FILTER_ARG_BOOLEAN;
- (*parg)->boolean.value = TEP_FILTER_FALSE;
- }
-
- return 0;
-}
-
-static enum tep_errno
-filter_event(struct tep_event_filter *filter, struct tep_event *event,
- const char *filter_str, char *error_str)
-{
- struct tep_filter_type *filter_type;
- struct tep_filter_arg *arg;
- enum tep_errno ret;
-
- if (filter_str) {
- ret = process_event(event, filter_str, &arg, error_str);
- if (ret < 0)
- return ret;
-
- } else {
- /* just add a TRUE arg */
- arg = allocate_arg();
- if (arg == NULL)
- return TEP_ERRNO__MEM_ALLOC_FAILED;
-
- arg->type = TEP_FILTER_ARG_BOOLEAN;
- arg->boolean.value = TEP_FILTER_TRUE;
- }
-
- filter_type = add_filter_type(filter, event->id);
- if (filter_type == NULL) {
- free_arg(arg);
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
-
- if (filter_type->filter)
- free_arg(filter_type->filter);
- filter_type->filter = arg;
-
- return 0;
-}
-
-static void filter_init_error_buf(struct tep_event_filter *filter)
-{
- /* clear buffer to reset show error */
- init_input_buf("", 0);
- filter->error_buffer[0] = '\0';
-}
-
-/**
- * tep_filter_add_filter_str - add a new filter
- * @filter: the event filter to add to
- * @filter_str: the filter string that contains the filter
- *
- * Returns 0 if the filter was successfully added or a
- * negative error code. Use tep_filter_strerror() to see
- * actual error message in case of error.
- */
-enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
- const char *filter_str)
-{
- struct tep_handle *tep = filter->tep;
- struct event_list *event;
- struct event_list *events = NULL;
- const char *filter_start;
- const char *next_event;
- char *this_event;
- char *event_name = NULL;
- char *sys_name = NULL;
- char *sp;
- enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */
- int len;
- int ret;
-
- filter_init_error_buf(filter);
-
- filter_start = strchr(filter_str, ':');
- if (filter_start)
- len = filter_start - filter_str;
- else
- len = strlen(filter_str);
-
- do {
- next_event = strchr(filter_str, ',');
- if (next_event &&
- (!filter_start || next_event < filter_start))
- len = next_event - filter_str;
- else if (filter_start)
- len = filter_start - filter_str;
- else
- len = strlen(filter_str);
-
- this_event = malloc(len + 1);
- if (this_event == NULL) {
- /* This can only happen when events is NULL, but still */
- free_events(events);
- return TEP_ERRNO__MEM_ALLOC_FAILED;
- }
- memcpy(this_event, filter_str, len);
- this_event[len] = 0;
-
- if (next_event)
- next_event++;
-
- filter_str = next_event;
-
- sys_name = strtok_r(this_event, "/", &sp);
- event_name = strtok_r(NULL, "/", &sp);
-
- if (!sys_name) {
- /* This can only happen when events is NULL, but still */
- free_events(events);
- free(this_event);
- return TEP_ERRNO__FILTER_NOT_FOUND;
- }
-
- /* Find this event */
- ret = find_event(tep, &events, strim(sys_name), strim(event_name));
- if (ret < 0) {
- free_events(events);
- free(this_event);
- return ret;
- }
- free(this_event);
- } while (filter_str);
-
- /* Skip the ':' */
- if (filter_start)
- filter_start++;
-
- /* filter starts here */
- for (event = events; event; event = event->next) {
- ret = filter_event(filter, event->event, filter_start,
- filter->error_buffer);
- /* Failures are returned if a parse error happened */
- if (ret < 0)
- rtn = ret;
-
- if (ret >= 0 && tep->test_filters) {
- char *test;
- test = tep_filter_make_string(filter, event->event->id);
- if (test) {
- printf(" '%s: %s'\n", event->event->name, test);
- free(test);
- }
- }
- }
-
- free_events(events);
-
- return rtn;
-}
-
-static void free_filter_type(struct tep_filter_type *filter_type)
-{
- free_arg(filter_type->filter);
-}
-
-/**
- * tep_filter_strerror - fill error message in a buffer
- * @filter: the event filter contains error
- * @err: the error code
- * @buf: the buffer to be filled in
- * @buflen: the size of the buffer
- *
- * Returns 0 if message was filled successfully, -1 if error
- */
-int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
- char *buf, size_t buflen)
-{
- if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
- return -1;
-
- if (strlen(filter->error_buffer) > 0) {
- size_t len = snprintf(buf, buflen, "%s", filter->error_buffer);
-
- if (len > buflen)
- return -1;
- return 0;
- }
-
- return tep_strerror(filter->tep, err, buf, buflen);
-}
-
-/**
- * tep_filter_remove_event - remove a filter for an event
- * @filter: the event filter to remove from
- * @event_id: the event to remove a filter for
- *
- * Removes the filter saved for an event defined by @event_id
- * from the @filter.
- *
- * Returns 1: if an event was removed
- * 0: if the event was not found
- */
-int tep_filter_remove_event(struct tep_event_filter *filter,
- int event_id)
-{
- struct tep_filter_type *filter_type;
- unsigned long len;
-
- if (!filter->filters)
- return 0;
-
- filter_type = find_filter_type(filter, event_id);
-
- if (!filter_type)
- return 0;
-
- free_filter_type(filter_type);
-
- /* The filter_type points into the event_filters array */
- len = (unsigned long)(filter->event_filters + filter->filters) -
- (unsigned long)(filter_type + 1);
-
- memmove(filter_type, filter_type + 1, len);
- filter->filters--;
-
- memset(&filter->event_filters[filter->filters], 0,
- sizeof(*filter_type));
-
- return 1;
-}
-
-/**
- * tep_filter_reset - clear all filters in a filter
- * @filter: the event filter to reset
- *
- * Removes all filters from a filter and resets it.
- */
-void tep_filter_reset(struct tep_event_filter *filter)
-{
- int i;
-
- for (i = 0; i < filter->filters; i++)
- free_filter_type(&filter->event_filters[i]);
-
- free(filter->event_filters);
- filter->filters = 0;
- filter->event_filters = NULL;
-}
-
-void tep_filter_free(struct tep_event_filter *filter)
-{
- tep_unref(filter->tep);
-
- tep_filter_reset(filter);
-
- free(filter);
-}
-
-static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg);
-
-static int copy_filter_type(struct tep_event_filter *filter,
- struct tep_event_filter *source,
- struct tep_filter_type *filter_type)
-{
- struct tep_filter_arg *arg;
- struct tep_event *event;
- const char *sys;
- const char *name;
- char *str;
-
- /* Can't assume that the tep's are the same */
- sys = filter_type->event->system;
- name = filter_type->event->name;
- event = tep_find_event_by_name(filter->tep, sys, name);
- if (!event)
- return -1;
-
- str = arg_to_str(source, filter_type->filter);
- if (!str)
- return -1;
-
- if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
- /* Add trivial event */
- arg = allocate_arg();
- if (arg == NULL) {
- free(str);
- return -1;
- }
-
- arg->type = TEP_FILTER_ARG_BOOLEAN;
- if (strcmp(str, "TRUE") == 0)
- arg->boolean.value = 1;
- else
- arg->boolean.value = 0;
-
- filter_type = add_filter_type(filter, event->id);
- if (filter_type == NULL) {
- free(str);
- free_arg(arg);
- return -1;
- }
-
- filter_type->filter = arg;
-
- free(str);
- return 0;
- }
-
- filter_event(filter, event, str, NULL);
- free(str);
-
- return 0;
-}
-
-/**
- * tep_filter_copy - copy a filter using another filter
- * @dest - the filter to copy to
- * @source - the filter to copy from
- *
- * Returns 0 on success and -1 if not all filters were copied
- */
-int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source)
-{
- int ret = 0;
- int i;
-
- tep_filter_reset(dest);
-
- for (i = 0; i < source->filters; i++) {
- if (copy_filter_type(dest, source, &source->event_filters[i]))
- ret = -1;
- }
- return ret;
-}
-
-static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err);
-
-static const char *
-get_comm(struct tep_event *event, struct tep_record *record)
-{
- const char *comm;
- int pid;
-
- pid = tep_data_pid(event->tep, record);
- comm = tep_data_comm_from_pid(event->tep, pid);
- return comm;
-}
-
-static unsigned long long
-get_value(struct tep_event *event,
- struct tep_format_field *field, struct tep_record *record)
-{
- unsigned long long val;
-
- /* Handle our dummy "comm" field */
- if (field == &comm) {
- const char *name;
-
- name = get_comm(event, record);
- return (unsigned long)name;
- }
-
- /* Handle our dummy "cpu" field */
- if (field == &cpu)
- return record->cpu;
-
- tep_read_number_field(field, record->data, &val);
-
- if (!(field->flags & TEP_FIELD_IS_SIGNED))
- return val;
-
- switch (field->size) {
- case 1:
- return (char)val;
- case 2:
- return (short)val;
- case 4:
- return (int)val;
- case 8:
- return (long long)val;
- }
- return val;
-}
-
-static unsigned long long
-get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err);
-
-static unsigned long long
-get_exp_value(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- unsigned long long lval, rval;
-
- lval = get_arg_value(event, arg->exp.left, record, err);
- rval = get_arg_value(event, arg->exp.right, record, err);
-
- if (*err) {
- /*
- * There was an error, no need to process anymore.
- */
- return 0;
- }
-
- switch (arg->exp.type) {
- case TEP_FILTER_EXP_ADD:
- return lval + rval;
-
- case TEP_FILTER_EXP_SUB:
- return lval - rval;
-
- case TEP_FILTER_EXP_MUL:
- return lval * rval;
-
- case TEP_FILTER_EXP_DIV:
- return lval / rval;
-
- case TEP_FILTER_EXP_MOD:
- return lval % rval;
-
- case TEP_FILTER_EXP_RSHIFT:
- return lval >> rval;
-
- case TEP_FILTER_EXP_LSHIFT:
- return lval << rval;
-
- case TEP_FILTER_EXP_AND:
- return lval & rval;
-
- case TEP_FILTER_EXP_OR:
- return lval | rval;
-
- case TEP_FILTER_EXP_XOR:
- return lval ^ rval;
-
- case TEP_FILTER_EXP_NOT:
- default:
- if (!*err)
- *err = TEP_ERRNO__INVALID_EXP_TYPE;
- }
- return 0;
-}
-
-static unsigned long long
-get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- switch (arg->type) {
- case TEP_FILTER_ARG_FIELD:
- return get_value(event, arg->field.field, record);
-
- case TEP_FILTER_ARG_VALUE:
- if (arg->value.type != TEP_FILTER_NUMBER) {
- if (!*err)
- *err = TEP_ERRNO__NOT_A_NUMBER;
- }
- return arg->value.val;
-
- case TEP_FILTER_ARG_EXP:
- return get_exp_value(event, arg, record, err);
-
- default:
- if (!*err)
- *err = TEP_ERRNO__INVALID_ARG_TYPE;
- }
- return 0;
-}
-
-static int test_num(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- unsigned long long lval, rval;
-
- lval = get_arg_value(event, arg->num.left, record, err);
- rval = get_arg_value(event, arg->num.right, record, err);
-
- if (*err) {
- /*
- * There was an error, no need to process anymore.
- */
- return 0;
- }
-
- switch (arg->num.type) {
- case TEP_FILTER_CMP_EQ:
- return lval == rval;
-
- case TEP_FILTER_CMP_NE:
- return lval != rval;
-
- case TEP_FILTER_CMP_GT:
- return lval > rval;
-
- case TEP_FILTER_CMP_LT:
- return lval < rval;
-
- case TEP_FILTER_CMP_GE:
- return lval >= rval;
-
- case TEP_FILTER_CMP_LE:
- return lval <= rval;
-
- default:
- if (!*err)
- *err = TEP_ERRNO__ILLEGAL_INTEGER_CMP;
- return 0;
- }
-}
-
-static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
-{
- struct tep_event *event;
- struct tep_handle *tep;
- unsigned long long addr;
- const char *val = NULL;
- unsigned int size;
- char hex[64];
-
- /* If the field is not a string convert it */
- if (arg->str.field->flags & TEP_FIELD_IS_STRING) {
- val = record->data + arg->str.field->offset;
- size = arg->str.field->size;
-
- if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
- addr = *(unsigned int *)val;
- val = record->data + (addr & 0xffff);
- size = addr >> 16;
- }
-
- /*
- * We need to copy the data since we can't be sure the field
- * is null terminated.
- */
- if (*(val + size - 1)) {
- /* copy it */
- memcpy(arg->str.buffer, val, arg->str.field->size);
- /* the buffer is already NULL terminated */
- val = arg->str.buffer;
- }
-
- } else {
- event = arg->str.field->event;
- tep = event->tep;
- addr = get_value(event, arg->str.field, record);
-
- if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
- /* convert to a kernel symbol */
- val = tep_find_function(tep, addr);
-
- if (val == NULL) {
- /* just use the hex of the string name */
- snprintf(hex, 64, "0x%llx", addr);
- val = hex;
- }
- }
-
- return val;
-}
-
-static int test_str(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- const char *val;
-
- if (arg->str.field == &comm)
- val = get_comm(event, record);
- else
- val = get_field_str(arg, record);
-
- switch (arg->str.type) {
- case TEP_FILTER_CMP_MATCH:
- return strcmp(val, arg->str.val) == 0;
-
- case TEP_FILTER_CMP_NOT_MATCH:
- return strcmp(val, arg->str.val) != 0;
-
- case TEP_FILTER_CMP_REGEX:
- /* Returns zero on match */
- return !regexec(&arg->str.reg, val, 0, NULL, 0);
-
- case TEP_FILTER_CMP_NOT_REGEX:
- return regexec(&arg->str.reg, val, 0, NULL, 0);
-
- default:
- if (!*err)
- *err = TEP_ERRNO__ILLEGAL_STRING_CMP;
- return 0;
- }
-}
-
-static int test_op(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- switch (arg->op.type) {
- case TEP_FILTER_OP_AND:
- return test_filter(event, arg->op.left, record, err) &&
- test_filter(event, arg->op.right, record, err);
-
- case TEP_FILTER_OP_OR:
- return test_filter(event, arg->op.left, record, err) ||
- test_filter(event, arg->op.right, record, err);
-
- case TEP_FILTER_OP_NOT:
- return !test_filter(event, arg->op.right, record, err);
-
- default:
- if (!*err)
- *err = TEP_ERRNO__INVALID_OP_TYPE;
- return 0;
- }
-}
-
-static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
- struct tep_record *record, enum tep_errno *err)
-{
- if (*err) {
- /*
- * There was an error, no need to process anymore.
- */
- return 0;
- }
-
- switch (arg->type) {
- case TEP_FILTER_ARG_BOOLEAN:
- /* easy case */
- return arg->boolean.value;
-
- case TEP_FILTER_ARG_OP:
- return test_op(event, arg, record, err);
-
- case TEP_FILTER_ARG_NUM:
- return test_num(event, arg, record, err);
-
- case TEP_FILTER_ARG_STR:
- return test_str(event, arg, record, err);
-
- case TEP_FILTER_ARG_EXP:
- case TEP_FILTER_ARG_VALUE:
- case TEP_FILTER_ARG_FIELD:
- /*
- * Expressions, fields and values evaluate
- * to true if they return non zero
- */
- return !!get_arg_value(event, arg, record, err);
-
- default:
- if (!*err)
- *err = TEP_ERRNO__INVALID_ARG_TYPE;
- return 0;
- }
-}
-
-/**
- * tep_event_filtered - return true if event has filter
- * @filter: filter struct with filter information
- * @event_id: event id to test if filter exists
- *
- * Returns 1 if filter found for @event_id
- * otherwise 0;
- */
-int tep_event_filtered(struct tep_event_filter *filter, int event_id)
-{
- struct tep_filter_type *filter_type;
-
- if (!filter->filters)
- return 0;
-
- filter_type = find_filter_type(filter, event_id);
-
- return filter_type ? 1 : 0;
-}
-
-/**
- * tep_filter_match - test if a record matches a filter
- * @filter: filter struct with filter information
- * @record: the record to test against the filter
- *
- * Returns: match result or error code (prefixed with TEP_ERRNO__)
- * FILTER_MATCH - filter found for event and @record matches
- * FILTER_MISS - filter found for event and @record does not match
- * FILTER_NOT_FOUND - no filter found for @record's event
- * NO_FILTER - if no filters exist
- * otherwise - error occurred during test
- */
-enum tep_errno tep_filter_match(struct tep_event_filter *filter,
- struct tep_record *record)
-{
- struct tep_handle *tep = filter->tep;
- struct tep_filter_type *filter_type;
- int event_id;
- int ret;
- enum tep_errno err = 0;
-
- filter_init_error_buf(filter);
-
- if (!filter->filters)
- return TEP_ERRNO__NO_FILTER;
-
- event_id = tep_data_type(tep, record);
-
- filter_type = find_filter_type(filter, event_id);
- if (!filter_type)
- return TEP_ERRNO__FILTER_NOT_FOUND;
-
- ret = test_filter(filter_type->event, filter_type->filter, record, &err);
- if (err)
- return err;
-
- return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
-}
-
-static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *str = NULL;
- char *left = NULL;
- char *right = NULL;
- char *op = NULL;
- int left_val = -1;
- int right_val = -1;
- int val;
-
- switch (arg->op.type) {
- case TEP_FILTER_OP_AND:
- op = "&&";
- /* fall through */
- case TEP_FILTER_OP_OR:
- if (!op)
- op = "||";
-
- left = arg_to_str(filter, arg->op.left);
- right = arg_to_str(filter, arg->op.right);
- if (!left || !right)
- break;
-
- /* Try to consolidate boolean values */
- if (strcmp(left, "TRUE") == 0)
- left_val = 1;
- else if (strcmp(left, "FALSE") == 0)
- left_val = 0;
-
- if (strcmp(right, "TRUE") == 0)
- right_val = 1;
- else if (strcmp(right, "FALSE") == 0)
- right_val = 0;
-
- if (left_val >= 0) {
- if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) ||
- (arg->op.type == TEP_FILTER_OP_OR && left_val)) {
- /* Just return left value */
- str = left;
- left = NULL;
- break;
- }
- if (right_val >= 0) {
- /* just evaluate this. */
- val = 0;
- switch (arg->op.type) {
- case TEP_FILTER_OP_AND:
- val = left_val && right_val;
- break;
- case TEP_FILTER_OP_OR:
- val = left_val || right_val;
- break;
- default:
- break;
- }
- if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
- str = NULL;
- break;
- }
- }
- if (right_val >= 0) {
- if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) ||
- (arg->op.type == TEP_FILTER_OP_OR && right_val)) {
- /* Just return right value */
- str = right;
- right = NULL;
- break;
- }
- /* The right value is meaningless */
- str = left;
- left = NULL;
- break;
- }
-
- if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
- str = NULL;
- break;
-
- case TEP_FILTER_OP_NOT:
- op = "!";
- right = arg_to_str(filter, arg->op.right);
- if (!right)
- break;
-
- /* See if we can consolidate */
- if (strcmp(right, "TRUE") == 0)
- right_val = 1;
- else if (strcmp(right, "FALSE") == 0)
- right_val = 0;
- if (right_val >= 0) {
- /* just return the opposite */
- if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
- str = NULL;
- break;
- }
- if (asprintf(&str, "%s(%s)", op, right) < 0)
- str = NULL;
- break;
-
- default:
- /* ?? */
- break;
- }
- free(left);
- free(right);
- return str;
-}
-
-static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *str = NULL;
-
- if (asprintf(&str, "%lld", arg->value.val) < 0)
- str = NULL;
-
- return str;
-}
-
-static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- return strdup(arg->field.field->name);
-}
-
-static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *lstr;
- char *rstr;
- char *op;
- char *str = NULL;
-
- lstr = arg_to_str(filter, arg->exp.left);
- rstr = arg_to_str(filter, arg->exp.right);
- if (!lstr || !rstr)
- goto out;
-
- switch (arg->exp.type) {
- case TEP_FILTER_EXP_ADD:
- op = "+";
- break;
- case TEP_FILTER_EXP_SUB:
- op = "-";
- break;
- case TEP_FILTER_EXP_MUL:
- op = "*";
- break;
- case TEP_FILTER_EXP_DIV:
- op = "/";
- break;
- case TEP_FILTER_EXP_MOD:
- op = "%";
- break;
- case TEP_FILTER_EXP_RSHIFT:
- op = ">>";
- break;
- case TEP_FILTER_EXP_LSHIFT:
- op = "<<";
- break;
- case TEP_FILTER_EXP_AND:
- op = "&";
- break;
- case TEP_FILTER_EXP_OR:
- op = "|";
- break;
- case TEP_FILTER_EXP_XOR:
- op = "^";
- break;
- default:
- op = "[ERROR IN EXPRESSION TYPE]";
- break;
- }
-
- if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
- str = NULL;
-out:
- free(lstr);
- free(rstr);
-
- return str;
-}
-
-static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *lstr;
- char *rstr;
- char *str = NULL;
- char *op = NULL;
-
- lstr = arg_to_str(filter, arg->num.left);
- rstr = arg_to_str(filter, arg->num.right);
- if (!lstr || !rstr)
- goto out;
-
- switch (arg->num.type) {
- case TEP_FILTER_CMP_EQ:
- op = "==";
- /* fall through */
- case TEP_FILTER_CMP_NE:
- if (!op)
- op = "!=";
- /* fall through */
- case TEP_FILTER_CMP_GT:
- if (!op)
- op = ">";
- /* fall through */
- case TEP_FILTER_CMP_LT:
- if (!op)
- op = "<";
- /* fall through */
- case TEP_FILTER_CMP_GE:
- if (!op)
- op = ">=";
- /* fall through */
- case TEP_FILTER_CMP_LE:
- if (!op)
- op = "<=";
-
- if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
- str = NULL;
- break;
-
- default:
- /* ?? */
- break;
- }
-
-out:
- free(lstr);
- free(rstr);
- return str;
-}
-
-static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *str = NULL;
- char *op = NULL;
-
- switch (arg->str.type) {
- case TEP_FILTER_CMP_MATCH:
- op = "==";
- /* fall through */
- case TEP_FILTER_CMP_NOT_MATCH:
- if (!op)
- op = "!=";
- /* fall through */
- case TEP_FILTER_CMP_REGEX:
- if (!op)
- op = "=~";
- /* fall through */
- case TEP_FILTER_CMP_NOT_REGEX:
- if (!op)
- op = "!~";
-
- if (asprintf(&str, "%s %s \"%s\"",
- arg->str.field->name, op, arg->str.val) < 0)
- str = NULL;
- break;
-
- default:
- /* ?? */
- break;
- }
- return str;
-}
-
-static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
- char *str = NULL;
-
- switch (arg->type) {
- case TEP_FILTER_ARG_BOOLEAN:
- if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
- str = NULL;
- return str;
-
- case TEP_FILTER_ARG_OP:
- return op_to_str(filter, arg);
-
- case TEP_FILTER_ARG_NUM:
- return num_to_str(filter, arg);
-
- case TEP_FILTER_ARG_STR:
- return str_to_str(filter, arg);
-
- case TEP_FILTER_ARG_VALUE:
- return val_to_str(filter, arg);
-
- case TEP_FILTER_ARG_FIELD:
- return field_to_str(filter, arg);
-
- case TEP_FILTER_ARG_EXP:
- return exp_to_str(filter, arg);
-
- default:
- /* ?? */
- return NULL;
- }
-
-}
-
-/**
- * tep_filter_make_string - return a string showing the filter
- * @filter: filter struct with filter information
- * @event_id: the event id to return the filter string with
- *
- * Returns a string that displays the filter contents.
- * This string must be freed with free(str).
- * NULL is returned if no filter is found or allocation failed.
- */
-char *
-tep_filter_make_string(struct tep_event_filter *filter, int event_id)
-{
- struct tep_filter_type *filter_type;
-
- if (!filter->filters)
- return NULL;
-
- filter_type = find_filter_type(filter, event_id);
-
- if (!filter_type)
- return NULL;
-
- return arg_to_str(filter, filter_type->filter);
-}
-
-/**
- * tep_filter_compare - compare two filters and return if they are the same
- * @filter1: Filter to compare with @filter2
- * @filter2: Filter to compare with @filter1
- *
- * Returns:
- * 1 if the two filters hold the same content.
- * 0 if they do not.
- */
-int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2)
-{
- struct tep_filter_type *filter_type1;
- struct tep_filter_type *filter_type2;
- char *str1, *str2;
- int result;
- int i;
-
- /* Do the easy checks first */
- if (filter1->filters != filter2->filters)
- return 0;
- if (!filter1->filters && !filter2->filters)
- return 1;
-
- /*
- * Now take a look at each of the events to see if they have the same
- * filters to them.
- */
- for (i = 0; i < filter1->filters; i++) {
- filter_type1 = &filter1->event_filters[i];
- filter_type2 = find_filter_type(filter2, filter_type1->event_id);
- if (!filter_type2)
- break;
- if (filter_type1->filter->type != filter_type2->filter->type)
- break;
- /* The best way to compare complex filters is with strings */
- str1 = arg_to_str(filter1, filter_type1->filter);
- str2 = arg_to_str(filter2, filter_type2->filter);
- if (str1 && str2)
- result = strcmp(str1, str2) != 0;
- else
- /* bail out if allocation fails */
- result = 1;
-
- free(str1);
- free(str2);
- if (result)
- break;
- }
-
- if (i < filter1->filters)
- return 0;
- return 1;
-}
-
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
deleted file mode 100644
index e99867111387..000000000000
--- a/tools/lib/traceevent/parse-utils.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-
-#define __weak __attribute__((weak))
-
-void __vwarning(const char *fmt, va_list ap)
-{
- if (errno)
- perror("libtraceevent");
- errno = 0;
-
- fprintf(stderr, " ");
- vfprintf(stderr, fmt, ap);
-
- fprintf(stderr, "\n");
-}
-
-void __warning(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- __vwarning(fmt, ap);
- va_end(ap);
-}
-
-void __weak warning(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- __vwarning(fmt, ap);
- va_end(ap);
-}
-
-void __vpr_stat(const char *fmt, va_list ap)
-{
- vprintf(fmt, ap);
- printf("\n");
-}
-
-void __pr_stat(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- __vpr_stat(fmt, ap);
- va_end(ap);
-}
-
-void __weak vpr_stat(const char *fmt, va_list ap)
-{
- __vpr_stat(fmt, ap);
-}
-
-void __weak pr_stat(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- __vpr_stat(fmt, ap);
- va_end(ap);
-}
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
deleted file mode 100644
index dd4da823c38f..000000000000
--- a/tools/lib/traceevent/plugins/Build
+++ /dev/null
@@ -1,12 +0,0 @@
-plugin_jbd2-y += plugin_jbd2.o
-plugin_hrtimer-y += plugin_hrtimer.o
-plugin_kmem-y += plugin_kmem.o
-plugin_kvm-y += plugin_kvm.o
-plugin_mac80211-y += plugin_mac80211.o
-plugin_sched_switch-y += plugin_sched_switch.o
-plugin_function-y += plugin_function.o
-plugin_futex-y += plugin_futex.o
-plugin_xen-y += plugin_xen.o
-plugin_scsi-y += plugin_scsi.o
-plugin_cfg80211-y += plugin_cfg80211.o
-plugin_tlb-y += plugin_tlb.o \ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
deleted file mode 100644
index 47e802553250..000000000000
--- a/tools/lib/traceevent/plugins/Makefile
+++ /dev/null
@@ -1,225 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-#MAKEFLAGS += --no-print-directory
-
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
- $(if $(or $(findstring environment,$(origin $(1))),\
- $(findstring command line,$(origin $(1)))),,\
- $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,NM,$(CROSS_COMPILE)nm)
-$(call allow-override,PKG_CONFIG,pkg-config)
-
-EXT = -std=gnu99
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
-ifeq ($(LP64), 1)
- libdir_relative_tmp = lib64
-else
- libdir_relative_tmp = lib
-endif
-
-libdir_relative ?= $(libdir_relative_tmp)
-prefix ?= /usr/local
-libdir = $(prefix)/$(libdir_relative)
-
-set_plugin_dir := 1
-
-# Set plugin_dir to preffered global plugin location
-# If we install under $HOME directory we go under
-# $(HOME)/.local/lib/traceevent/plugins
-#
-# We dont set PLUGIN_DIR in case we install under $HOME
-# directory, because by default the code looks under:
-# $(HOME)/.local/lib/traceevent/plugins by default.
-#
-ifeq ($(plugin_dir),)
-ifeq ($(prefix),$(HOME))
-override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
-set_plugin_dir := 0
-else
-override plugin_dir = $(libdir)/traceevent/plugins
-endif
-endif
-
-ifeq ($(set_plugin_dir),1)
-PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
-PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
-endif
-
-include ../../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-export prefix libdir src obj
-
-# Shell quotes
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
-
-CONFIG_INCLUDES =
-CONFIG_LIBS =
-CONFIG_FLAGS =
-
-OBJ = $@
-N =
-
-INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
-# Append required CFLAGS
-override CFLAGS += -fPIC
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
-# Disable command line variables (CFLAGS) override from top
-# level Makefile (perf), otherwise build Makefile will get
-# the same command line setup.
-MAKEOVERRIDES=
-
-export srctree OUTPUT CC LD CFLAGS V
-
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-
-DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
-
-PLUGINS = plugin_jbd2.so
-PLUGINS += plugin_hrtimer.so
-PLUGINS += plugin_kmem.so
-PLUGINS += plugin_kvm.so
-PLUGINS += plugin_mac80211.so
-PLUGINS += plugin_sched_switch.so
-PLUGINS += plugin_function.so
-PLUGINS += plugin_futex.so
-PLUGINS += plugin_xen.so
-PLUGINS += plugin_scsi.so
-PLUGINS += plugin_cfg80211.so
-PLUGINS += plugin_tlb.so
-
-PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
-PLUGINS_IN := $(PLUGINS:.so=-in.o)
-
-plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE)
-
-__plugin_obj = $(notdir $@)
- plugin_obj = $(__plugin_obj:-in.o=)
-
-$(PLUGINS_IN): force
- $(Q)$(MAKE) $(build)=$(plugin_obj)
-
-$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
- $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
-
-$(OUTPUT)%.so: $(OUTPUT)%-in.o
- $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
-
-define update_dir
- (echo $1 > $@.tmp; \
- if [ -r $@ ] && cmp -s $@ $@.tmp; then \
- rm -f $@.tmp; \
- else \
- echo ' UPDATE $@'; \
- mv -f $@.tmp $@; \
- fi);
-endef
-
-tags: force
- $(RM) tags
- find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
- --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS: force
- $(RM) TAGS
- find . -name '*.[ch]' | xargs etags \
- --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install_mkdir
- if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
- fi
-endef
-
-define do_install
- $(call do_install_mkdir,$2); \
- $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
-endef
-
-define do_install_plugins
- for plugin in $1; do \
- $(call do_install,$$plugin,$(plugin_dir_SQ)); \
- done
-endef
-
-define do_generate_dynamic_list_file
- symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
- xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
- if [ "$$symbol_type" = "U W" ];then \
- (echo '{'; \
- $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
- echo '};'; \
- ) > $2; \
- else \
- (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
- fi
-endef
-
-install: $(PLUGINS)
- $(call QUIET_INSTALL, trace_plugins) \
- $(call do_install_plugins, $(PLUGINS))
-
-clean:
- $(call QUIET_CLEAN, trace_plugins) \
- $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
- $(RM) $(OUTPUT)libtraceevent-dynamic-list \
- $(RM) TRACEEVENT-CFLAGS tags TAGS;
-
-PHONY += force plugins
-force:
-
-# Declare the contents of the .PHONY variable as phony. We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/plugins/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c
deleted file mode 100644
index 3d43b56a6c98..000000000000
--- a/tools/lib/traceevent/plugins/plugin_cfg80211.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include <endian.h>
-#include "event-parse.h"
-
-/*
- * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
- * Fedora6.
- */
-#ifndef le16toh
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define le16toh(x) (x)
-# else
-# define le16toh(x) __bswap_16 (x)
-# endif
-#endif
-
-
-static unsigned long long
-process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
-{
- uint16_t *val = (uint16_t *) (unsigned long) args[0];
- return val ? (long long) le16toh(*val) : 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_print_function(tep,
- process___le16_to_cpup,
- TEP_FUNC_ARG_INT,
- "__le16_to_cpup",
- TEP_FUNC_ARG_PTR,
- TEP_FUNC_ARG_VOID);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_print_function(tep, process___le16_to_cpup,
- "__le16_to_cpup");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
deleted file mode 100644
index 807b16e1bf0f..000000000000
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-static struct func_stack {
- int size;
- char **stack;
-} *fstack;
-
-static int cpus = -1;
-
-#define STK_BLK 10
-
-struct tep_plugin_option plugin_options[] =
-{
- {
- .name = "parent",
- .plugin_alias = "ftrace",
- .description =
- "Print parent of functions for function events",
- },
- {
- .name = "indent",
- .plugin_alias = "ftrace",
- .description =
- "Try to show function call indents, based on parents",
- .set = 1,
- },
- {
- .name = "offset",
- .plugin_alias = "ftrace",
- .description =
- "Show function names as well as their offsets",
- .set = 0,
- },
- {
- .name = NULL,
- }
-};
-
-static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
-static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
-static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
-
-static void add_child(struct func_stack *stack, const char *child, int pos)
-{
- int i;
-
- if (!child)
- return;
-
- if (pos < stack->size)
- free(stack->stack[pos]);
- else {
- char **ptr;
-
- ptr = realloc(stack->stack, sizeof(char *) *
- (stack->size + STK_BLK));
- if (!ptr) {
- warning("could not allocate plugin memory\n");
- return;
- }
-
- stack->stack = ptr;
-
- for (i = stack->size; i < stack->size + STK_BLK; i++)
- stack->stack[i] = NULL;
- stack->size += STK_BLK;
- }
-
- stack->stack[pos] = strdup(child);
-}
-
-static int add_and_get_index(const char *parent, const char *child, int cpu)
-{
- int i;
-
- if (cpu < 0)
- return 0;
-
- if (cpu > cpus) {
- struct func_stack *ptr;
-
- ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
- if (!ptr) {
- warning("could not allocate plugin memory\n");
- return 0;
- }
-
- fstack = ptr;
-
- /* Account for holes in the cpu count */
- for (i = cpus + 1; i <= cpu; i++)
- memset(&fstack[i], 0, sizeof(fstack[i]));
- cpus = cpu;
- }
-
- for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
- if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
- add_child(&fstack[cpu], child, i+1);
- return i;
- }
- }
-
- /* Not found */
- add_child(&fstack[cpu], parent, 0);
- add_child(&fstack[cpu], child, 1);
- return 0;
-}
-
-static void show_function(struct trace_seq *s, struct tep_handle *tep,
- const char *func, unsigned long long function)
-{
- unsigned long long offset;
-
- trace_seq_printf(s, "%s", func);
- if (ftrace_offset->set) {
- offset = tep_find_function_address(tep, function);
- trace_seq_printf(s, "+0x%x ", (int)(function - offset));
- }
-}
-
-static int function_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_handle *tep = event->tep;
- unsigned long long function;
- unsigned long long pfunction;
- const char *func;
- const char *parent;
- int index = 0;
-
- if (tep_get_field_val(s, event, "ip", record, &function, 1))
- return trace_seq_putc(s, '!');
-
- func = tep_find_function(tep, function);
-
- if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
- return trace_seq_putc(s, '!');
-
- parent = tep_find_function(tep, pfunction);
-
- if (parent && ftrace_indent->set)
- index = add_and_get_index(parent, func, record->cpu);
-
- trace_seq_printf(s, "%*s", index*3, "");
-
- if (func)
- show_function(s, tep, func, function);
- else
- trace_seq_printf(s, "0x%llx", function);
-
- if (ftrace_parent->set) {
- trace_seq_printf(s, " <-- ");
- if (parent)
- show_function(s, tep, parent, pfunction);
- else
- trace_seq_printf(s, "0x%llx", pfunction);
- }
-
- return 0;
-}
-
-static int
-trace_stack_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_format_field *field;
- unsigned long long addr;
- const char *func;
- int long_size;
- void *data = record->data;
-
- field = tep_find_any_field(event, "caller");
- if (!field) {
- trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
- return 0;
- }
-
- trace_seq_puts(s, "<stack trace >\n");
-
- long_size = tep_get_long_size(event->tep);
-
- for (data += field->offset; data < record->data + record->size;
- data += long_size) {
- addr = tep_read_number(event->tep, data, long_size);
-
- if ((long_size == 8 && addr == (unsigned long long)-1) ||
- ((int)addr == -1))
- break;
-
- func = tep_find_function(event->tep, addr);
- if (func)
- trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
- else
- trace_seq_printf(s, "=> %llx\n", addr);
- }
-
- return 0;
-}
-
-static int
-trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_format_field *field;
- unsigned long long id;
- int long_size;
- void *data = record->data;
-
- if (tep_get_field_val(s, event, "id", record, &id, 1))
- return trace_seq_putc(s, '!');
-
- trace_seq_printf(s, "# %llx", id);
-
- field = tep_find_any_field(event, "buf");
- if (!field) {
- trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
- return 0;
- }
-
- long_size = tep_get_long_size(event->tep);
-
- for (data += field->offset; data < record->data + record->size;
- data += long_size) {
- int size = sizeof(long);
- int left = (record->data + record->size) - data;
- int i;
-
- if (size > left)
- size = left;
-
- for (i = 0; i < size; i++)
- trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
- }
-
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "ftrace", "function",
- function_handler, NULL);
-
- tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
- trace_stack_handler, NULL);
-
- tep_register_event_handler(tep, -1, "ftrace", "raw_data",
- trace_raw_data_handler, NULL);
-
- tep_plugin_add_options("ftrace", plugin_options);
-
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- int i, x;
-
- tep_unregister_event_handler(tep, -1, "ftrace", "function",
- function_handler, NULL);
-
- for (i = 0; i <= cpus; i++) {
- for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
- free(fstack[i].stack[x]);
- free(fstack[i].stack);
- }
-
- tep_plugin_remove_options(plugin_options);
-
- free(fstack);
- fstack = NULL;
- cpus = -1;
-}
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
deleted file mode 100644
index eb7c9f8a850a..000000000000
--- a/tools/lib/traceevent/plugins/plugin_futex.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2017 National Instruments Corp.
- *
- * Author: Julia Cartwright <julia@ni.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <linux/futex.h>
-
-#include "event-parse.h"
-
-#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
-
-struct futex_args {
- unsigned long long uaddr;
- unsigned long long op;
- unsigned long long val;
- unsigned long long utime; /* or val2 */
- unsigned long long uaddr2;
- unsigned long long val3;
-};
-
-struct futex_op {
- const char *name;
- const char *fmt_val;
- const char *fmt_utime;
- const char *fmt_uaddr2;
- const char *fmt_val3;
-};
-
-static const struct futex_op futex_op_tbl[] = {
- { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL },
- { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL },
- { "FUTEX_FD", " val=%llu", NULL, NULL, NULL },
- { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL },
- { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
- { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
- { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL },
- { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL },
- { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL },
- { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" },
- { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" },
- { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
- { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
-};
-
-
-static void futex_print(struct trace_seq *s, const struct futex_args *args,
- const struct futex_op *fop)
-{
- trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
-
- if (fop->fmt_val)
- trace_seq_printf(s, fop->fmt_val, args->val);
-
- if (fop->fmt_utime)
- trace_seq_printf(s,fop->fmt_utime, args->utime);
-
- if (fop->fmt_uaddr2)
- trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
-
- if (fop->fmt_val3)
- trace_seq_printf(s, fop->fmt_val3, args->val3);
-}
-
-static int futex_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- const struct futex_op *fop;
- struct futex_args args;
- unsigned long long cmd;
-
- if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
- return 1;
-
- if (tep_get_field_val(s, event, "op", record, &args.op, 1))
- return 1;
-
- if (tep_get_field_val(s, event, "val", record, &args.val, 1))
- return 1;
-
- if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
- return 1;
-
- if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
- return 1;
-
- if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
- return 1;
-
- cmd = args.op & FUTEX_CMD_MASK;
- if (cmd >= ARRAY_SIZE(futex_op_tbl))
- return 1;
-
- fop = &futex_op_tbl[cmd];
-
- trace_seq_printf(s, "op=%s", fop->name);
-
- if (args.op & FUTEX_PRIVATE_FLAG)
- trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
-
- if (args.op & FUTEX_CLOCK_REALTIME)
- trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
-
- futex_print(s, &args, fop);
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
- futex_handler, NULL);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
- futex_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
deleted file mode 100644
index d98466788f14..000000000000
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int timer_expire_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- trace_seq_printf(s, "hrtimer=");
-
- if (tep_print_num_field(s, "0x%llx", event, "timer",
- record, 0) == -1)
- tep_print_num_field(s, "0x%llx", event, "hrtimer",
- record, 1);
-
- trace_seq_printf(s, " now=");
-
- tep_print_num_field(s, "%llu", event, "now", record, 1);
-
- tep_print_func_field(s, " function=%s", event, "function",
- record, 0);
- return 0;
-}
-
-static int timer_start_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- trace_seq_printf(s, "hrtimer=");
-
- if (tep_print_num_field(s, "0x%llx", event, "timer",
- record, 0) == -1)
- tep_print_num_field(s, "0x%llx", event, "hrtimer",
- record, 1);
-
- tep_print_func_field(s, " function=%s", event, "function",
- record, 0);
-
- trace_seq_printf(s, " expires=");
- tep_print_num_field(s, "%llu", event, "expires", record, 1);
-
- trace_seq_printf(s, " softexpires=");
- tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1,
- "timer", "hrtimer_expire_entry",
- timer_expire_handler, NULL);
-
- tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
- timer_start_handler, NULL);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1,
- "timer", "hrtimer_expire_entry",
- timer_expire_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
- timer_start_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
deleted file mode 100644
index 69111a68d3cf..000000000000
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define MINORBITS 20
-#define MINORMASK ((1U << MINORBITS) - 1)
-
-#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
-#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
-
-static unsigned long long
-process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
-{
- unsigned int dev = args[0];
-
- trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
- return 0;
-}
-
-static unsigned long long
-process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
-{
- unsigned long long jiffies = args[0];
-
- trace_seq_printf(s, "%lld", jiffies);
- return jiffies;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_print_function(tep,
- process_jbd2_dev_to_name,
- TEP_FUNC_ARG_STRING,
- "jbd2_dev_to_name",
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_VOID);
-
- tep_register_print_function(tep,
- process_jiffies_to_msecs,
- TEP_FUNC_ARG_LONG,
- "jiffies_to_msecs",
- TEP_FUNC_ARG_LONG,
- TEP_FUNC_ARG_VOID);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_print_function(tep, process_jbd2_dev_to_name,
- "jbd2_dev_to_name");
-
- tep_unregister_print_function(tep, process_jiffies_to_msecs,
- "jiffies_to_msecs");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
deleted file mode 100644
index 4b4f7f9616e3..000000000000
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int call_site_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_format_field *field;
- unsigned long long val, addr;
- void *data = record->data;
- const char *func;
-
- field = tep_find_field(event, "call_site");
- if (!field)
- return 1;
-
- if (tep_read_number_field(field, data, &val))
- return 1;
-
- func = tep_find_function(event->tep, val);
- if (!func)
- return 1;
-
- addr = tep_find_function_address(event->tep, val);
-
- trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
- return 1;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "kmem", "kfree",
- call_site_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kmem", "kmalloc",
- call_site_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
- call_site_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
- call_site_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kmem",
- "kmem_cache_alloc_node",
- call_site_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
- call_site_handler, NULL);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1, "kmem", "kfree",
- call_site_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
- call_site_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
- call_site_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
- call_site_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kmem",
- "kmem_cache_alloc_node",
- call_site_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
- call_site_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
deleted file mode 100644
index 51ceeb9147eb..000000000000
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ /dev/null
@@ -1,527 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#ifdef HAVE_UDIS86
-
-#include <udis86.h>
-
-static ud_t ud;
-
-static void init_disassembler(void)
-{
- ud_init(&ud);
- ud_set_syntax(&ud, UD_SYN_ATT);
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
- int cr0_pe, int eflags_vm,
- int cs_d, int cs_l)
-{
- int mode;
-
- if (!cr0_pe)
- mode = 16;
- else if (eflags_vm)
- mode = 16;
- else if (cs_l)
- mode = 64;
- else if (cs_d)
- mode = 32;
- else
- mode = 16;
-
- ud_set_pc(&ud, rip);
- ud_set_mode(&ud, mode);
- ud_set_input_buffer(&ud, insn, len);
- ud_disassemble(&ud);
- return ud_insn_asm(&ud);
-}
-
-#else
-
-static void init_disassembler(void)
-{
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
- int cr0_pe, int eflags_vm,
- int cs_d, int cs_l)
-{
- static char out[15*3+1];
- int i;
-
- for (i = 0; i < len; ++i)
- sprintf(out + i * 3, "%02x ", insn[i]);
- out[len*3-1] = '\0';
- return out;
-}
-
-#endif
-
-
-#define VMX_EXIT_REASONS \
- _ER(EXCEPTION_NMI, 0) \
- _ER(EXTERNAL_INTERRUPT, 1) \
- _ER(TRIPLE_FAULT, 2) \
- _ER(PENDING_INTERRUPT, 7) \
- _ER(NMI_WINDOW, 8) \
- _ER(TASK_SWITCH, 9) \
- _ER(CPUID, 10) \
- _ER(HLT, 12) \
- _ER(INVD, 13) \
- _ER(INVLPG, 14) \
- _ER(RDPMC, 15) \
- _ER(RDTSC, 16) \
- _ER(VMCALL, 18) \
- _ER(VMCLEAR, 19) \
- _ER(VMLAUNCH, 20) \
- _ER(VMPTRLD, 21) \
- _ER(VMPTRST, 22) \
- _ER(VMREAD, 23) \
- _ER(VMRESUME, 24) \
- _ER(VMWRITE, 25) \
- _ER(VMOFF, 26) \
- _ER(VMON, 27) \
- _ER(CR_ACCESS, 28) \
- _ER(DR_ACCESS, 29) \
- _ER(IO_INSTRUCTION, 30) \
- _ER(MSR_READ, 31) \
- _ER(MSR_WRITE, 32) \
- _ER(MWAIT_INSTRUCTION, 36) \
- _ER(MONITOR_INSTRUCTION, 39) \
- _ER(PAUSE_INSTRUCTION, 40) \
- _ER(MCE_DURING_VMENTRY, 41) \
- _ER(TPR_BELOW_THRESHOLD, 43) \
- _ER(APIC_ACCESS, 44) \
- _ER(EOI_INDUCED, 45) \
- _ER(EPT_VIOLATION, 48) \
- _ER(EPT_MISCONFIG, 49) \
- _ER(INVEPT, 50) \
- _ER(PREEMPTION_TIMER, 52) \
- _ER(WBINVD, 54) \
- _ER(XSETBV, 55) \
- _ER(APIC_WRITE, 56) \
- _ER(INVPCID, 58) \
- _ER(PML_FULL, 62) \
- _ER(XSAVES, 63) \
- _ER(XRSTORS, 64)
-
-#define SVM_EXIT_REASONS \
- _ER(EXIT_READ_CR0, 0x000) \
- _ER(EXIT_READ_CR3, 0x003) \
- _ER(EXIT_READ_CR4, 0x004) \
- _ER(EXIT_READ_CR8, 0x008) \
- _ER(EXIT_WRITE_CR0, 0x010) \
- _ER(EXIT_WRITE_CR3, 0x013) \
- _ER(EXIT_WRITE_CR4, 0x014) \
- _ER(EXIT_WRITE_CR8, 0x018) \
- _ER(EXIT_READ_DR0, 0x020) \
- _ER(EXIT_READ_DR1, 0x021) \
- _ER(EXIT_READ_DR2, 0x022) \
- _ER(EXIT_READ_DR3, 0x023) \
- _ER(EXIT_READ_DR4, 0x024) \
- _ER(EXIT_READ_DR5, 0x025) \
- _ER(EXIT_READ_DR6, 0x026) \
- _ER(EXIT_READ_DR7, 0x027) \
- _ER(EXIT_WRITE_DR0, 0x030) \
- _ER(EXIT_WRITE_DR1, 0x031) \
- _ER(EXIT_WRITE_DR2, 0x032) \
- _ER(EXIT_WRITE_DR3, 0x033) \
- _ER(EXIT_WRITE_DR4, 0x034) \
- _ER(EXIT_WRITE_DR5, 0x035) \
- _ER(EXIT_WRITE_DR6, 0x036) \
- _ER(EXIT_WRITE_DR7, 0x037) \
- _ER(EXIT_EXCP_DE, 0x040) \
- _ER(EXIT_EXCP_DB, 0x041) \
- _ER(EXIT_EXCP_BP, 0x043) \
- _ER(EXIT_EXCP_OF, 0x044) \
- _ER(EXIT_EXCP_BR, 0x045) \
- _ER(EXIT_EXCP_UD, 0x046) \
- _ER(EXIT_EXCP_NM, 0x047) \
- _ER(EXIT_EXCP_DF, 0x048) \
- _ER(EXIT_EXCP_TS, 0x04a) \
- _ER(EXIT_EXCP_NP, 0x04b) \
- _ER(EXIT_EXCP_SS, 0x04c) \
- _ER(EXIT_EXCP_GP, 0x04d) \
- _ER(EXIT_EXCP_PF, 0x04e) \
- _ER(EXIT_EXCP_MF, 0x050) \
- _ER(EXIT_EXCP_AC, 0x051) \
- _ER(EXIT_EXCP_MC, 0x052) \
- _ER(EXIT_EXCP_XF, 0x053) \
- _ER(EXIT_INTR, 0x060) \
- _ER(EXIT_NMI, 0x061) \
- _ER(EXIT_SMI, 0x062) \
- _ER(EXIT_INIT, 0x063) \
- _ER(EXIT_VINTR, 0x064) \
- _ER(EXIT_CR0_SEL_WRITE, 0x065) \
- _ER(EXIT_IDTR_READ, 0x066) \
- _ER(EXIT_GDTR_READ, 0x067) \
- _ER(EXIT_LDTR_READ, 0x068) \
- _ER(EXIT_TR_READ, 0x069) \
- _ER(EXIT_IDTR_WRITE, 0x06a) \
- _ER(EXIT_GDTR_WRITE, 0x06b) \
- _ER(EXIT_LDTR_WRITE, 0x06c) \
- _ER(EXIT_TR_WRITE, 0x06d) \
- _ER(EXIT_RDTSC, 0x06e) \
- _ER(EXIT_RDPMC, 0x06f) \
- _ER(EXIT_PUSHF, 0x070) \
- _ER(EXIT_POPF, 0x071) \
- _ER(EXIT_CPUID, 0x072) \
- _ER(EXIT_RSM, 0x073) \
- _ER(EXIT_IRET, 0x074) \
- _ER(EXIT_SWINT, 0x075) \
- _ER(EXIT_INVD, 0x076) \
- _ER(EXIT_PAUSE, 0x077) \
- _ER(EXIT_HLT, 0x078) \
- _ER(EXIT_INVLPG, 0x079) \
- _ER(EXIT_INVLPGA, 0x07a) \
- _ER(EXIT_IOIO, 0x07b) \
- _ER(EXIT_MSR, 0x07c) \
- _ER(EXIT_TASK_SWITCH, 0x07d) \
- _ER(EXIT_FERR_FREEZE, 0x07e) \
- _ER(EXIT_SHUTDOWN, 0x07f) \
- _ER(EXIT_VMRUN, 0x080) \
- _ER(EXIT_VMMCALL, 0x081) \
- _ER(EXIT_VMLOAD, 0x082) \
- _ER(EXIT_VMSAVE, 0x083) \
- _ER(EXIT_STGI, 0x084) \
- _ER(EXIT_CLGI, 0x085) \
- _ER(EXIT_SKINIT, 0x086) \
- _ER(EXIT_RDTSCP, 0x087) \
- _ER(EXIT_ICEBP, 0x088) \
- _ER(EXIT_WBINVD, 0x089) \
- _ER(EXIT_MONITOR, 0x08a) \
- _ER(EXIT_MWAIT, 0x08b) \
- _ER(EXIT_MWAIT_COND, 0x08c) \
- _ER(EXIT_XSETBV, 0x08d) \
- _ER(EXIT_NPF, 0x400) \
- _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \
- _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \
- _ER(EXIT_ERR, -1)
-
-#define _ER(reason, val) { #reason, val },
-struct str_values {
- const char *str;
- int val;
-};
-
-static struct str_values vmx_exit_reasons[] = {
- VMX_EXIT_REASONS
- { NULL, -1}
-};
-
-static struct str_values svm_exit_reasons[] = {
- SVM_EXIT_REASONS
- { NULL, -1}
-};
-
-static struct isa_exit_reasons {
- unsigned isa;
- struct str_values *strings;
-} isa_exit_reasons[] = {
- { .isa = 1, .strings = vmx_exit_reasons },
- { .isa = 2, .strings = svm_exit_reasons },
- { }
-};
-
-static const char *find_exit_reason(unsigned isa, int val)
-{
- struct str_values *strings = NULL;
- int i;
-
- for (i = 0; isa_exit_reasons[i].strings; ++i)
- if (isa_exit_reasons[i].isa == isa) {
- strings = isa_exit_reasons[i].strings;
- break;
- }
- if (!strings)
- return "UNKNOWN-ISA";
- for (i = 0; strings[i].str; i++)
- if (strings[i].val == val)
- break;
-
- return strings[i].str;
-}
-
-static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, const char *field)
-{
- unsigned long long isa;
- unsigned long long val;
- const char *reason;
-
- if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
- return -1;
-
- if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
- isa = 1;
-
- reason = find_exit_reason(isa, val);
- if (reason)
- trace_seq_printf(s, "reason %s", reason);
- else
- trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
- return 0;
-}
-
-static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- unsigned long long info1 = 0, info2 = 0;
-
- if (print_exit_reason(s, record, event, "exit_reason") < 0)
- return -1;
-
- tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
-
- if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
- && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
- trace_seq_printf(s, " info %llx %llx", info1, info2);
-
- return 0;
-}
-
-#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
-#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
-#define KVM_EMUL_INSN_F_CS_D (1 << 2)
-#define KVM_EMUL_INSN_F_CS_L (1 << 3)
-
-static int kvm_emulate_insn_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- unsigned long long rip, csbase, len, flags, failed;
- int llen;
- uint8_t *insn;
- const char *disasm;
-
- if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
- return -1;
-
- if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
- return -1;
-
- if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
- return -1;
-
- if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
- return -1;
-
- if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
- return -1;
-
- insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
- if (!insn)
- return -1;
-
- disasm = disassemble(insn, len, rip,
- flags & KVM_EMUL_INSN_F_CR0_PE,
- flags & KVM_EMUL_INSN_F_EFL_VM,
- flags & KVM_EMUL_INSN_F_CS_D,
- flags & KVM_EMUL_INSN_F_CS_L);
-
- trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
- failed ? " FAIL" : "");
- return 0;
-}
-
-
-static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- if (print_exit_reason(s, record, event, "exit_code") < 0)
- return -1;
-
- tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
- tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
- tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
- tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
-
- return 0;
-}
-
-static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
-
- return kvm_nested_vmexit_inject_handler(s, record, event, context);
-}
-
-union kvm_mmu_page_role {
- unsigned word;
- struct {
- unsigned level:4;
- unsigned cr4_pae:1;
- unsigned quadrant:2;
- unsigned direct:1;
- unsigned access:3;
- unsigned invalid:1;
- unsigned nxe:1;
- unsigned cr0_wp:1;
- unsigned smep_and_not_wp:1;
- unsigned smap_and_not_wp:1;
- unsigned pad_for_nice_hex_output:8;
- unsigned smm:8;
- };
-};
-
-static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- unsigned long long val;
- static const char *access_str[] = {
- "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
- };
- union kvm_mmu_page_role role;
-
- if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
- return -1;
-
- role.word = (int)val;
-
- /*
- * We can only use the structure if file is of the same
- * endianness.
- */
- if (tep_is_file_bigendian(event->tep) ==
- tep_is_local_bigendian(event->tep)) {
-
- trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
- role.level,
- role.quadrant,
- role.direct ? " direct" : "",
- access_str[role.access],
- role.invalid ? " invalid" : "",
- role.cr4_pae ? "" : "!",
- role.nxe ? "" : "!",
- role.cr0_wp ? "" : "!",
- role.smep_and_not_wp ? " smep" : "",
- role.smap_and_not_wp ? " smap" : "",
- role.smm ? " smm" : "");
- } else
- trace_seq_printf(s, "WORD: %08x", role.word);
-
- tep_print_num_field(s, " root %u ", event,
- "root_count", record, 1);
-
- if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
- return -1;
-
- trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0);
- return 0;
-}
-
-static int kvm_mmu_get_page_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- unsigned long long val;
-
- if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
- return -1;
-
- trace_seq_printf(s, "%s ", val ? "new" : "existing");
-
- if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
- return -1;
-
- trace_seq_printf(s, "sp gfn %llx ", val);
- return kvm_mmu_print_role(s, record, event, context);
-}
-
-#define PT_WRITABLE_SHIFT 1
-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-
-static unsigned long long
-process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
-{
- unsigned long pte = args[0];
- return pte & PT_WRITABLE_MASK;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- init_disassembler();
-
- tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
- kvm_exit_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
- kvm_emulate_insn_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
- kvm_nested_vmexit_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
- kvm_nested_vmexit_inject_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
- kvm_mmu_get_page_handler, NULL);
-
- tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
- kvm_mmu_print_role, NULL);
-
- tep_register_event_handler(tep, -1,
- "kvmmmu", "kvm_mmu_unsync_page",
- kvm_mmu_print_role, NULL);
-
- tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
- kvm_mmu_print_role, NULL);
-
- tep_register_event_handler(tep, -1, "kvmmmu",
- "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
- NULL);
-
- tep_register_print_function(tep,
- process_is_writable_pte,
- TEP_FUNC_ARG_INT,
- "is_writable_pte",
- TEP_FUNC_ARG_LONG,
- TEP_FUNC_ARG_VOID);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
- kvm_exit_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
- kvm_emulate_insn_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
- kvm_nested_vmexit_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
- kvm_nested_vmexit_inject_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
- kvm_mmu_get_page_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
- kvm_mmu_print_role, NULL);
-
- tep_unregister_event_handler(tep, -1,
- "kvmmmu", "kvm_mmu_unsync_page",
- kvm_mmu_print_role, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
- kvm_mmu_print_role, NULL);
-
- tep_unregister_event_handler(tep, -1, "kvmmmu",
- "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
- NULL);
-
- tep_unregister_print_function(tep, process_is_writable_pte,
- "is_writable_pte");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
deleted file mode 100644
index f48071e3cfb8..000000000000
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define INDENT 65
-
-static void print_string(struct trace_seq *s, struct tep_event *event,
- const char *name, const void *data)
-{
- struct tep_format_field *f = tep_find_field(event, name);
- int offset;
- int length;
-
- if (!f) {
- trace_seq_printf(s, "NOTFOUND:%s", name);
- return;
- }
-
- offset = f->offset;
- length = f->size;
-
- if (!strncmp(f->type, "__data_loc", 10)) {
- unsigned long long v;
- if (tep_read_number_field(f, data, &v)) {
- trace_seq_printf(s, "invalid_data_loc");
- return;
- }
- offset = v & 0xffff;
- length = v >> 16;
- }
-
- trace_seq_printf(s, "%.*s", length, (char *)data + offset);
-}
-
-#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0)
-#define SFX(fn) tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
-#define SP() trace_seq_putc(s, ' ')
-
-static int drv_bss_info_changed(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- void *data = record->data;
-
- print_string(s, event, "wiphy_name", data);
- trace_seq_printf(s, " vif:");
- print_string(s, event, "vif_name", data);
- tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
-
- trace_seq_printf(s, "\n%*s", INDENT, "");
- SF("assoc"); SP();
- SF("aid"); SP();
- SF("cts"); SP();
- SF("shortpre"); SP();
- SF("shortslot"); SP();
- SF("dtimper"); SP();
- trace_seq_printf(s, "\n%*s", INDENT, "");
- SF("bcnint"); SP();
- SFX("assoc_cap"); SP();
- SFX("basic_rates"); SP();
- SF("enable_beacon");
- trace_seq_printf(s, "\n%*s", INDENT, "");
- SF("ht_operation_mode");
-
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "mac80211",
- "drv_bss_info_changed",
- drv_bss_info_changed, NULL);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1, "mac80211",
- "drv_bss_info_changed",
- drv_bss_info_changed, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
deleted file mode 100644
index e12fa103820a..000000000000
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static void write_state(struct trace_seq *s, int val)
-{
- const char states[] = "SDTtZXxW";
- int found = 0;
- int i;
-
- for (i = 0; i < (sizeof(states) - 1); i++) {
- if (!(val & (1 << i)))
- continue;
-
- if (found)
- trace_seq_putc(s, '|');
-
- found = 1;
- trace_seq_putc(s, states[i]);
- }
-
- if (!found)
- trace_seq_putc(s, 'R');
-}
-
-static void write_and_save_comm(struct tep_format_field *field,
- struct tep_record *record,
- struct trace_seq *s, int pid)
-{
- const char *comm;
- int len;
-
- comm = (char *)(record->data + field->offset);
- len = s->len;
- trace_seq_printf(s, "%.*s",
- field->size, comm);
-
- /* make sure the comm has a \0 at the end. */
- trace_seq_terminate(s);
- comm = &s->buffer[len];
-
- /* Help out the comm to ids. This will handle dups */
- tep_register_comm(field->event->tep, comm, pid);
-}
-
-static int sched_wakeup_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_format_field *field;
- unsigned long long val;
-
- if (tep_get_field_val(s, event, "pid", record, &val, 1))
- return trace_seq_putc(s, '!');
-
- field = tep_find_any_field(event, "comm");
- if (field) {
- write_and_save_comm(field, record, s, val);
- trace_seq_putc(s, ':');
- }
- trace_seq_printf(s, "%lld", val);
-
- if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
- trace_seq_printf(s, " [%lld]", val);
-
- if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
- trace_seq_printf(s, " success=%lld", val);
-
- if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
- trace_seq_printf(s, " CPU:%03llu", val);
-
- return 0;
-}
-
-static int sched_switch_handler(struct trace_seq *s,
- struct tep_record *record,
- struct tep_event *event, void *context)
-{
- struct tep_format_field *field;
- unsigned long long val;
-
- if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
- return trace_seq_putc(s, '!');
-
- field = tep_find_any_field(event, "prev_comm");
- if (field) {
- write_and_save_comm(field, record, s, val);
- trace_seq_putc(s, ':');
- }
- trace_seq_printf(s, "%lld ", val);
-
- if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
- trace_seq_printf(s, "[%d] ", (int) val);
-
- if (tep_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
- write_state(s, val);
-
- trace_seq_puts(s, " ==> ");
-
- if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
- return trace_seq_putc(s, '!');
-
- field = tep_find_any_field(event, "next_comm");
- if (field) {
- write_and_save_comm(field, record, s, val);
- trace_seq_putc(s, ':');
- }
- trace_seq_printf(s, "%lld", val);
-
- if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
- trace_seq_printf(s, " [%d]", (int) val);
-
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "sched", "sched_switch",
- sched_switch_handler, NULL);
-
- tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
- sched_wakeup_handler, NULL);
-
- tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
- sched_wakeup_handler, NULL);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
- sched_switch_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
- sched_wakeup_handler, NULL);
-
- tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
- sched_wakeup_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c
deleted file mode 100644
index 5d0387a4b65a..000000000000
--- a/tools/lib/traceevent/plugins/plugin_scsi.c
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-typedef unsigned long sector_t;
-typedef uint64_t u64;
-typedef unsigned int u32;
-
-/*
- * SCSI opcodes
- */
-#define TEST_UNIT_READY 0x00
-#define REZERO_UNIT 0x01
-#define REQUEST_SENSE 0x03
-#define FORMAT_UNIT 0x04
-#define READ_BLOCK_LIMITS 0x05
-#define REASSIGN_BLOCKS 0x07
-#define INITIALIZE_ELEMENT_STATUS 0x07
-#define READ_6 0x08
-#define WRITE_6 0x0a
-#define SEEK_6 0x0b
-#define READ_REVERSE 0x0f
-#define WRITE_FILEMARKS 0x10
-#define SPACE 0x11
-#define INQUIRY 0x12
-#define RECOVER_BUFFERED_DATA 0x14
-#define MODE_SELECT 0x15
-#define RESERVE 0x16
-#define RELEASE 0x17
-#define COPY 0x18
-#define ERASE 0x19
-#define MODE_SENSE 0x1a
-#define START_STOP 0x1b
-#define RECEIVE_DIAGNOSTIC 0x1c
-#define SEND_DIAGNOSTIC 0x1d
-#define ALLOW_MEDIUM_REMOVAL 0x1e
-
-#define READ_FORMAT_CAPACITIES 0x23
-#define SET_WINDOW 0x24
-#define READ_CAPACITY 0x25
-#define READ_10 0x28
-#define WRITE_10 0x2a
-#define SEEK_10 0x2b
-#define POSITION_TO_ELEMENT 0x2b
-#define WRITE_VERIFY 0x2e
-#define VERIFY 0x2f
-#define SEARCH_HIGH 0x30
-#define SEARCH_EQUAL 0x31
-#define SEARCH_LOW 0x32
-#define SET_LIMITS 0x33
-#define PRE_FETCH 0x34
-#define READ_POSITION 0x34
-#define SYNCHRONIZE_CACHE 0x35
-#define LOCK_UNLOCK_CACHE 0x36
-#define READ_DEFECT_DATA 0x37
-#define MEDIUM_SCAN 0x38
-#define COMPARE 0x39
-#define COPY_VERIFY 0x3a
-#define WRITE_BUFFER 0x3b
-#define READ_BUFFER 0x3c
-#define UPDATE_BLOCK 0x3d
-#define READ_LONG 0x3e
-#define WRITE_LONG 0x3f
-#define CHANGE_DEFINITION 0x40
-#define WRITE_SAME 0x41
-#define UNMAP 0x42
-#define READ_TOC 0x43
-#define READ_HEADER 0x44
-#define GET_EVENT_STATUS_NOTIFICATION 0x4a
-#define LOG_SELECT 0x4c
-#define LOG_SENSE 0x4d
-#define XDWRITEREAD_10 0x53
-#define MODE_SELECT_10 0x55
-#define RESERVE_10 0x56
-#define RELEASE_10 0x57
-#define MODE_SENSE_10 0x5a
-#define PERSISTENT_RESERVE_IN 0x5e
-#define PERSISTENT_RESERVE_OUT 0x5f
-#define VARIABLE_LENGTH_CMD 0x7f
-#define REPORT_LUNS 0xa0
-#define SECURITY_PROTOCOL_IN 0xa2
-#define MAINTENANCE_IN 0xa3
-#define MAINTENANCE_OUT 0xa4
-#define MOVE_MEDIUM 0xa5
-#define EXCHANGE_MEDIUM 0xa6
-#define READ_12 0xa8
-#define SERVICE_ACTION_OUT_12 0xa9
-#define WRITE_12 0xaa
-#define SERVICE_ACTION_IN_12 0xab
-#define WRITE_VERIFY_12 0xae
-#define VERIFY_12 0xaf
-#define SEARCH_HIGH_12 0xb0
-#define SEARCH_EQUAL_12 0xb1
-#define SEARCH_LOW_12 0xb2
-#define SECURITY_PROTOCOL_OUT 0xb5
-#define READ_ELEMENT_STATUS 0xb8
-#define SEND_VOLUME_TAG 0xb6
-#define WRITE_LONG_2 0xea
-#define EXTENDED_COPY 0x83
-#define RECEIVE_COPY_RESULTS 0x84
-#define ACCESS_CONTROL_IN 0x86
-#define ACCESS_CONTROL_OUT 0x87
-#define READ_16 0x88
-#define WRITE_16 0x8a
-#define READ_ATTRIBUTE 0x8c
-#define WRITE_ATTRIBUTE 0x8d
-#define VERIFY_16 0x8f
-#define SYNCHRONIZE_CACHE_16 0x91
-#define WRITE_SAME_16 0x93
-#define SERVICE_ACTION_BIDIRECTIONAL 0x9d
-#define SERVICE_ACTION_IN_16 0x9e
-#define SERVICE_ACTION_OUT_16 0x9f
-/* values for service action in */
-#define SAI_READ_CAPACITY_16 0x10
-#define SAI_GET_LBA_STATUS 0x12
-/* values for VARIABLE_LENGTH_CMD service action codes
- * see spc4r17 Section D.3.5, table D.7 and D.8 */
-#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
-/* values for maintenance in */
-#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
-#define MI_REPORT_TARGET_PGS 0x0a
-#define MI_REPORT_ALIASES 0x0b
-#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
-#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
-#define MI_REPORT_PRIORITY 0x0e
-#define MI_REPORT_TIMESTAMP 0x0f
-#define MI_MANAGEMENT_PROTOCOL_IN 0x10
-/* value for MI_REPORT_TARGET_PGS ext header */
-#define MI_EXT_HDR_PARAM_FMT 0x20
-/* values for maintenance out */
-#define MO_SET_IDENTIFYING_INFORMATION 0x06
-#define MO_SET_TARGET_PGS 0x0a
-#define MO_CHANGE_ALIASES 0x0b
-#define MO_SET_PRIORITY 0x0e
-#define MO_SET_TIMESTAMP 0x0f
-#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
-/* values for variable length command */
-#define XDREAD_32 0x03
-#define XDWRITE_32 0x04
-#define XPWRITE_32 0x06
-#define XDWRITEREAD_32 0x07
-#define READ_32 0x09
-#define VERIFY_32 0x0a
-#define WRITE_32 0x0b
-#define WRITE_SAME_32 0x0d
-
-#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
-#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
-
-static const char *
-scsi_trace_misc(struct trace_seq *, unsigned char *, int);
-
-static const char *
-scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
- sector_t lba = 0, txlen = 0;
-
- lba |= ((cdb[1] & 0x1F) << 16);
- lba |= (cdb[2] << 8);
- lba |= cdb[3];
- txlen = cdb[4];
-
- trace_seq_printf(p, "lba=%llu txlen=%llu",
- (unsigned long long)lba, (unsigned long long)txlen);
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
- sector_t lba = 0, txlen = 0;
-
- lba |= (cdb[2] << 24);
- lba |= (cdb[3] << 16);
- lba |= (cdb[4] << 8);
- lba |= cdb[5];
- txlen |= (cdb[7] << 8);
- txlen |= cdb[8];
-
- trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
- (unsigned long long)lba, (unsigned long long)txlen,
- cdb[1] >> 5);
-
- if (cdb[0] == WRITE_SAME)
- trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
- sector_t lba = 0, txlen = 0;
-
- lba |= (cdb[2] << 24);
- lba |= (cdb[3] << 16);
- lba |= (cdb[4] << 8);
- lba |= cdb[5];
- txlen |= (cdb[6] << 24);
- txlen |= (cdb[7] << 16);
- txlen |= (cdb[8] << 8);
- txlen |= cdb[9];
-
- trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
- (unsigned long long)lba, (unsigned long long)txlen,
- cdb[1] >> 5);
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
- sector_t lba = 0, txlen = 0;
-
- lba |= ((u64)cdb[2] << 56);
- lba |= ((u64)cdb[3] << 48);
- lba |= ((u64)cdb[4] << 40);
- lba |= ((u64)cdb[5] << 32);
- lba |= (cdb[6] << 24);
- lba |= (cdb[7] << 16);
- lba |= (cdb[8] << 8);
- lba |= cdb[9];
- txlen |= (cdb[10] << 24);
- txlen |= (cdb[11] << 16);
- txlen |= (cdb[12] << 8);
- txlen |= cdb[13];
-
- trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
- (unsigned long long)lba, (unsigned long long)txlen,
- cdb[1] >> 5);
-
- if (cdb[0] == WRITE_SAME_16)
- trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len, *cmd;
- sector_t lba = 0, txlen = 0;
- u32 ei_lbrt = 0;
-
- switch (SERVICE_ACTION32(cdb)) {
- case READ_32:
- cmd = "READ";
- break;
- case VERIFY_32:
- cmd = "VERIFY";
- break;
- case WRITE_32:
- cmd = "WRITE";
- break;
- case WRITE_SAME_32:
- cmd = "WRITE_SAME";
- break;
- default:
- trace_seq_printf(p, "UNKNOWN");
- goto out;
- }
-
- lba |= ((u64)cdb[12] << 56);
- lba |= ((u64)cdb[13] << 48);
- lba |= ((u64)cdb[14] << 40);
- lba |= ((u64)cdb[15] << 32);
- lba |= (cdb[16] << 24);
- lba |= (cdb[17] << 16);
- lba |= (cdb[18] << 8);
- lba |= cdb[19];
- ei_lbrt |= (cdb[20] << 24);
- ei_lbrt |= (cdb[21] << 16);
- ei_lbrt |= (cdb[22] << 8);
- ei_lbrt |= cdb[23];
- txlen |= (cdb[28] << 24);
- txlen |= (cdb[29] << 16);
- txlen |= (cdb[30] << 8);
- txlen |= cdb[31];
-
- trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
- cmd, (unsigned long long)lba,
- (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
-
- if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
- trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
-
-out:
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
- unsigned int regions = cdb[7] << 8 | cdb[8];
-
- trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len, *cmd;
- sector_t lba = 0;
- u32 alloc_len = 0;
-
- switch (SERVICE_ACTION16(cdb)) {
- case SAI_READ_CAPACITY_16:
- cmd = "READ_CAPACITY_16";
- break;
- case SAI_GET_LBA_STATUS:
- cmd = "GET_LBA_STATUS";
- break;
- default:
- trace_seq_printf(p, "UNKNOWN");
- goto out;
- }
-
- lba |= ((u64)cdb[2] << 56);
- lba |= ((u64)cdb[3] << 48);
- lba |= ((u64)cdb[4] << 40);
- lba |= ((u64)cdb[5] << 32);
- lba |= (cdb[6] << 24);
- lba |= (cdb[7] << 16);
- lba |= (cdb[8] << 8);
- lba |= cdb[9];
- alloc_len |= (cdb[10] << 24);
- alloc_len |= (cdb[11] << 16);
- alloc_len |= (cdb[12] << 8);
- alloc_len |= cdb[13];
-
- trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
- (unsigned long long)lba, alloc_len);
-
-out:
- trace_seq_putc(p, 0);
- return ret;
-}
-
-static const char *
-scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
-{
- switch (SERVICE_ACTION32(cdb)) {
- case READ_32:
- case VERIFY_32:
- case WRITE_32:
- case WRITE_SAME_32:
- return scsi_trace_rw32(p, cdb, len);
- default:
- return scsi_trace_misc(p, cdb, len);
- }
-}
-
-static const char *
-scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
-{
- const char *ret = p->buffer + p->len;
-
- trace_seq_printf(p, "-");
- trace_seq_putc(p, 0);
- return ret;
-}
-
-const char *
-scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
-{
- switch (cdb[0]) {
- case READ_6:
- case WRITE_6:
- return scsi_trace_rw6(p, cdb, len);
- case READ_10:
- case VERIFY:
- case WRITE_10:
- case WRITE_SAME:
- return scsi_trace_rw10(p, cdb, len);
- case READ_12:
- case VERIFY_12:
- case WRITE_12:
- return scsi_trace_rw12(p, cdb, len);
- case READ_16:
- case VERIFY_16:
- case WRITE_16:
- case WRITE_SAME_16:
- return scsi_trace_rw16(p, cdb, len);
- case UNMAP:
- return scsi_trace_unmap(p, cdb, len);
- case SERVICE_ACTION_IN_16:
- return scsi_trace_service_action_in(p, cdb, len);
- case VARIABLE_LENGTH_CMD:
- return scsi_trace_varlen(p, cdb, len);
- default:
- return scsi_trace_misc(p, cdb, len);
- }
-}
-
-unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
- unsigned long long *args)
-{
- scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_print_function(tep,
- process_scsi_trace_parse_cdb,
- TEP_FUNC_ARG_STRING,
- "scsi_trace_parse_cdb",
- TEP_FUNC_ARG_PTR,
- TEP_FUNC_ARG_PTR,
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_VOID);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
- "scsi_trace_parse_cdb");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
deleted file mode 100644
index 43657fb60504..000000000000
--- a/tools/lib/traceevent/plugins/plugin_tlb.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-
-enum tlb_flush_reason {
- TLB_FLUSH_ON_TASK_SWITCH,
- TLB_REMOTE_SHOOTDOWN,
- TLB_LOCAL_SHOOTDOWN,
- TLB_LOCAL_MM_SHOOTDOWN,
- NR_TLB_FLUSH_REASONS,
-};
-
-static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context)
-{
- unsigned long long val;
-
- trace_seq_printf(s, "pages=");
-
- tep_print_num_field(s, "%ld", event, "pages", record, 1);
-
- if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
- return -1;
-
- trace_seq_puts(s, " reason=");
-
- switch (val) {
- case TLB_FLUSH_ON_TASK_SWITCH:
- trace_seq_puts(s, "flush on task switch");
- break;
- case TLB_REMOTE_SHOOTDOWN:
- trace_seq_puts(s, "remote shootdown");
- break;
- case TLB_LOCAL_SHOOTDOWN:
- trace_seq_puts(s, "local shootdown");
- break;
- case TLB_LOCAL_MM_SHOOTDOWN:
- trace_seq_puts(s, "local mm shootdown");
- break;
- }
-
- trace_seq_printf(s, " (%lld)", val);
-
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
- tlb_flush_handler, NULL);
-
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_event_handler(tep, -1,
- "tlb", "tlb_flush",
- tlb_flush_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c
deleted file mode 100644
index 993b208d0323..000000000000
--- a/tools/lib/traceevent/plugins/plugin_xen.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define __HYPERVISOR_set_trap_table 0
-#define __HYPERVISOR_mmu_update 1
-#define __HYPERVISOR_set_gdt 2
-#define __HYPERVISOR_stack_switch 3
-#define __HYPERVISOR_set_callbacks 4
-#define __HYPERVISOR_fpu_taskswitch 5
-#define __HYPERVISOR_sched_op_compat 6
-#define __HYPERVISOR_dom0_op 7
-#define __HYPERVISOR_set_debugreg 8
-#define __HYPERVISOR_get_debugreg 9
-#define __HYPERVISOR_update_descriptor 10
-#define __HYPERVISOR_memory_op 12
-#define __HYPERVISOR_multicall 13
-#define __HYPERVISOR_update_va_mapping 14
-#define __HYPERVISOR_set_timer_op 15
-#define __HYPERVISOR_event_channel_op_compat 16
-#define __HYPERVISOR_xen_version 17
-#define __HYPERVISOR_console_io 18
-#define __HYPERVISOR_physdev_op_compat 19
-#define __HYPERVISOR_grant_table_op 20
-#define __HYPERVISOR_vm_assist 21
-#define __HYPERVISOR_update_va_mapping_otherdomain 22
-#define __HYPERVISOR_iret 23 /* x86 only */
-#define __HYPERVISOR_vcpu_op 24
-#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
-#define __HYPERVISOR_mmuext_op 26
-#define __HYPERVISOR_acm_op 27
-#define __HYPERVISOR_nmi_op 28
-#define __HYPERVISOR_sched_op 29
-#define __HYPERVISOR_callback_op 30
-#define __HYPERVISOR_xenoprof_op 31
-#define __HYPERVISOR_event_channel_op 32
-#define __HYPERVISOR_physdev_op 33
-#define __HYPERVISOR_hvm_op 34
-#define __HYPERVISOR_tmem_op 38
-
-/* Architecture-specific hypercall definitions. */
-#define __HYPERVISOR_arch_0 48
-#define __HYPERVISOR_arch_1 49
-#define __HYPERVISOR_arch_2 50
-#define __HYPERVISOR_arch_3 51
-#define __HYPERVISOR_arch_4 52
-#define __HYPERVISOR_arch_5 53
-#define __HYPERVISOR_arch_6 54
-#define __HYPERVISOR_arch_7 55
-
-#define N(x) [__HYPERVISOR_##x] = "("#x")"
-static const char *xen_hypercall_names[] = {
- N(set_trap_table),
- N(mmu_update),
- N(set_gdt),
- N(stack_switch),
- N(set_callbacks),
- N(fpu_taskswitch),
- N(sched_op_compat),
- N(dom0_op),
- N(set_debugreg),
- N(get_debugreg),
- N(update_descriptor),
- N(memory_op),
- N(multicall),
- N(update_va_mapping),
- N(set_timer_op),
- N(event_channel_op_compat),
- N(xen_version),
- N(console_io),
- N(physdev_op_compat),
- N(grant_table_op),
- N(vm_assist),
- N(update_va_mapping_otherdomain),
- N(iret),
- N(vcpu_op),
- N(set_segment_base),
- N(mmuext_op),
- N(acm_op),
- N(nmi_op),
- N(sched_op),
- N(callback_op),
- N(xenoprof_op),
- N(event_channel_op),
- N(physdev_op),
- N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
- N(arch_0),
- N(arch_1),
- N(arch_2),
- N(arch_3),
- N(arch_4),
- N(arch_5),
- N(arch_6),
- N(arch_7),
-};
-#undef N
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-static const char *xen_hypercall_name(unsigned op)
-{
- if (op < ARRAY_SIZE(xen_hypercall_names) &&
- xen_hypercall_names[op] != NULL)
- return xen_hypercall_names[op];
-
- return "";
-}
-
-unsigned long long process_xen_hypercall_name(struct trace_seq *s,
- unsigned long long *args)
-{
- unsigned int op = args[0];
-
- trace_seq_printf(s, "%s", xen_hypercall_name(op));
- return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
- tep_register_print_function(tep,
- process_xen_hypercall_name,
- TEP_FUNC_ARG_STRING,
- "xen_hypercall_name",
- TEP_FUNC_ARG_INT,
- TEP_FUNC_ARG_VOID);
- return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
- tep_unregister_print_function(tep, process_xen_hypercall_name,
- "xen_hypercall_name");
-}
diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c
deleted file mode 100644
index 4ac26445b2f6..000000000000
--- a/tools/lib/traceevent/tep_strerror.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-#undef _GNU_SOURCE
-#include <string.h>
-#include <stdio.h>
-
-#include "event-parse.h"
-
-#undef _PE
-#define _PE(code, str) str
-static const char * const tep_error_str[] = {
- TEP_ERRORS
-};
-#undef _PE
-
-/*
- * The tools so far have been using the strerror_r() GNU variant, that returns
- * a string, be it the buffer passed or something else.
- *
- * But that, besides being tricky in cases where we expect that the function
- * using strerror_r() returns the error formatted in a provided buffer (we have
- * to check if it returned something else and copy that instead), breaks the
- * build on systems not using glibc, like Alpine Linux, where musl libc is
- * used.
- *
- * So, introduce yet another wrapper, str_error_r(), that has the GNU
- * interface, but uses the portable XSI variant of strerror_r(), so that users
- * rest asured that the provided buffer is used and it is what is returned.
- */
-int tep_strerror(struct tep_handle *tep __maybe_unused,
- enum tep_errno errnum, char *buf, size_t buflen)
-{
- const char *msg;
- int idx;
-
- if (!buflen)
- return 0;
-
- if (errnum >= 0) {
- int err = strerror_r(errnum, buf, buflen);
- buf[buflen - 1] = 0;
- return err;
- }
-
- if (errnum <= __TEP_ERRNO__START ||
- errnum >= __TEP_ERRNO__END)
- return -1;
-
- idx = errnum - __TEP_ERRNO__START - 1;
- msg = tep_error_str[idx];
- snprintf(buf, buflen, "%s", msg);
-
- return 0;
-}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
deleted file mode 100644
index 8d5ecd2bf877..000000000000
--- a/tools/lib/traceevent/trace-seq.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include "trace-seq.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include <asm/bug.h>
-#include "event-parse.h"
-#include "event-utils.h"
-
-/*
- * The TRACE_SEQ_POISON is to catch the use of using
- * a trace_seq structure after it was destroyed.
- */
-#define TRACE_SEQ_POISON ((void *)0xdeadbeef)
-#define TRACE_SEQ_CHECK(s) \
-do { \
- if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON, \
- "Usage of trace_seq after it was destroyed")) \
- (s)->state = TRACE_SEQ__BUFFER_POISONED; \
-} while (0)
-
-#define TRACE_SEQ_CHECK_RET_N(s, n) \
-do { \
- TRACE_SEQ_CHECK(s); \
- if ((s)->state != TRACE_SEQ__GOOD) \
- return n; \
-} while (0)
-
-#define TRACE_SEQ_CHECK_RET(s) TRACE_SEQ_CHECK_RET_N(s, )
-#define TRACE_SEQ_CHECK_RET0(s) TRACE_SEQ_CHECK_RET_N(s, 0)
-
-/**
- * trace_seq_init - initialize the trace_seq structure
- * @s: a pointer to the trace_seq structure to initialize
- */
-void trace_seq_init(struct trace_seq *s)
-{
- s->len = 0;
- s->readpos = 0;
- s->buffer_size = TRACE_SEQ_BUF_SIZE;
- s->buffer = malloc(s->buffer_size);
- if (s->buffer != NULL)
- s->state = TRACE_SEQ__GOOD;
- else
- s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
-}
-
-/**
- * trace_seq_reset - re-initialize the trace_seq structure
- * @s: a pointer to the trace_seq structure to reset
- */
-void trace_seq_reset(struct trace_seq *s)
-{
- if (!s)
- return;
- TRACE_SEQ_CHECK(s);
- s->len = 0;
- s->readpos = 0;
-}
-
-/**
- * trace_seq_destroy - free up memory of a trace_seq
- * @s: a pointer to the trace_seq to free the buffer
- *
- * Only frees the buffer, not the trace_seq struct itself.
- */
-void trace_seq_destroy(struct trace_seq *s)
-{
- if (!s)
- return;
- TRACE_SEQ_CHECK_RET(s);
- free(s->buffer);
- s->buffer = TRACE_SEQ_POISON;
-}
-
-static void expand_buffer(struct trace_seq *s)
-{
- char *buf;
-
- buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE);
- if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) {
- s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
- return;
- }
-
- s->buffer = buf;
- s->buffer_size += TRACE_SEQ_BUF_SIZE;
-}
-
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, the number of characters printed, or a negative
- * value in case of an error.
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
- va_list ap;
- int len;
- int ret;
-
- try_again:
- TRACE_SEQ_CHECK_RET0(s);
-
- len = (s->buffer_size - 1) - s->len;
-
- va_start(ap, fmt);
- ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
- va_end(ap);
-
- if (ret >= len) {
- expand_buffer(s);
- goto try_again;
- }
-
- if (ret > 0)
- s->len += ret;
-
- return ret;
-}
-
-/**
- * trace_seq_vprintf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, the number of characters printed, or a negative
- * value in case of an error.
- * *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-{
- int len;
- int ret;
-
- try_again:
- TRACE_SEQ_CHECK_RET0(s);
-
- len = (s->buffer_size - 1) - s->len;
-
- ret = vsnprintf(s->buffer + s->len, len, fmt, args);
-
- if (ret >= len) {
- expand_buffer(s);
- goto try_again;
- }
-
- if (ret > 0)
- s->len += ret;
-
- return ret;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-int trace_seq_puts(struct trace_seq *s, const char *str)
-{
- int len;
-
- TRACE_SEQ_CHECK_RET0(s);
-
- len = strlen(str);
-
- while (len > ((s->buffer_size - 1) - s->len))
- expand_buffer(s);
-
- TRACE_SEQ_CHECK_RET0(s);
-
- memcpy(s->buffer + s->len, str, len);
- s->len += len;
-
- return len;
-}
-
-int trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
- TRACE_SEQ_CHECK_RET0(s);
-
- while (s->len >= (s->buffer_size - 1))
- expand_buffer(s);
-
- TRACE_SEQ_CHECK_RET0(s);
-
- s->buffer[s->len++] = c;
-
- return 1;
-}
-
-void trace_seq_terminate(struct trace_seq *s)
-{
- TRACE_SEQ_CHECK_RET(s);
-
- /* There's always one character left on the buffer */
- s->buffer[s->len] = 0;
-}
-
-int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp)
-{
- TRACE_SEQ_CHECK(s);
-
- switch (s->state) {
- case TRACE_SEQ__GOOD:
- return fprintf(fp, "%.*s", s->len, s->buffer);
- case TRACE_SEQ__BUFFER_POISONED:
- fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed");
- break;
- case TRACE_SEQ__MEM_ALLOC_FAILED:
- fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory");
- break;
- }
- return -1;
-}
-
-int trace_seq_do_printf(struct trace_seq *s)
-{
- return trace_seq_do_fprintf(s, stdout);
-}
diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h
deleted file mode 100644
index d68ec69f8d1a..000000000000
--- a/tools/lib/traceevent/trace-seq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#ifndef _TRACE_SEQ_H
-#define _TRACE_SEQ_H
-
-#include <stdarg.h>
-#include <stdio.h>
-
-/* ----------------------- trace_seq ----------------------- */
-
-#ifndef TRACE_SEQ_BUF_SIZE
-#define TRACE_SEQ_BUF_SIZE 4096
-#endif
-
-enum trace_seq_fail {
- TRACE_SEQ__GOOD,
- TRACE_SEQ__BUFFER_POISONED,
- TRACE_SEQ__MEM_ALLOC_FAILED,
-};
-
-/*
- * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE).
- */
-
-struct trace_seq {
- char *buffer;
- unsigned int buffer_size;
- unsigned int len;
- unsigned int readpos;
- enum trace_seq_fail state;
-};
-
-void trace_seq_init(struct trace_seq *s);
-void trace_seq_reset(struct trace_seq *s);
-void trace_seq_destroy(struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
- __attribute__ ((format (printf, 2, 0)));
-
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-
-extern void trace_seq_terminate(struct trace_seq *s);
-
-extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
-extern int trace_seq_do_printf(struct trace_seq *s);
-
-#endif /* _TRACE_SEQ_H */