aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/bpf/bpftool/common.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/bpf/bpftool/common.c396
1 files changed, 313 insertions, 83 deletions
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 65303664417e..958e92acca8e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
@@ -13,16 +15,21 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
#include <bpf/bpf.h>
+#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
#include "main.h"
@@ -30,44 +37,6 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
- [BPF_SK_LOOKUP] = "sk_lookup",
-};
-
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -99,7 +68,7 @@ void p_info(const char *fmt, ...)
va_end(ap);
}
-static bool is_bpffs(char *path)
+static bool is_bpffs(const char *path)
{
struct statfs st_fs;
@@ -109,11 +78,73 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+ struct rlimit rlim_init, rlim_cur_zero = {};
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ union bpf_attr attr;
+ int prog_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insn_cnt;
+ attr.license = ptr_to_u64("GPL");
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+ return false;
+
+ /* Drop the soft limit to zero. We maintain the hard limit to its
+ * current value, because lowering it would be a permanent operation
+ * for unprivileged users.
+ */
+ rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+ return false;
+
+ /* Do not use bpf_prog_load() from libbpf here, because it calls
+ * bump_rlimit_memlock(), interfering with the current probe.
+ */
+ prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ err = errno;
+
+ /* reset soft rlimit to its initial value */
+ setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+ if (prog_fd < 0)
+ return err == EPERM;
+
+ close(prog_fd);
+ return false;
+}
+
void set_max_rlimit(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
- setrlimit(RLIMIT_MEMLOCK, &rinf);
+ if (known_to_need_rlimit())
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
}
static int
@@ -213,21 +244,101 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
return fd;
}
-int mount_bpffs_for_pin(const char *name)
+int create_and_mount_bpffs_dir(const char *dir_name)
+{
+ char err_str[ERR_MAX_LEN];
+ bool dir_exists;
+ int err = 0;
+
+ if (is_bpffs(dir_name))
+ return err;
+
+ dir_exists = access(dir_name, F_OK) == 0;
+
+ if (!dir_exists) {
+ char *temp_name;
+ char *parent_name;
+
+ temp_name = strdup(dir_name);
+ if (!temp_name) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
+ parent_name = dirname(temp_name);
+
+ if (is_bpffs(parent_name)) {
+ /* nothing to do if already mounted */
+ free(temp_name);
+ return err;
+ }
+
+ if (access(parent_name, F_OK) == -1) {
+ p_err("can't create dir '%s' to pin BPF object: parent dir '%s' doesn't exist",
+ dir_name, parent_name);
+ free(temp_name);
+ return -1;
+ }
+
+ free(temp_name);
+ }
+
+ if (block_mount) {
+ p_err("no BPF file system found, not mounting it due to --nomount option");
+ return -1;
+ }
+
+ if (!dir_exists) {
+ err = mkdir(dir_name, S_IRWXU);
+ if (err) {
+ p_err("failed to create dir '%s': %s", dir_name, strerror(errno));
+ return err;
+ }
+ }
+
+ err = mnt_fs(dir_name, "bpf", err_str, ERR_MAX_LEN);
+ if (err) {
+ err_str[ERR_MAX_LEN - 1] = '\0';
+ p_err("can't mount BPF file system on given dir '%s': %s",
+ dir_name, err_str);
+
+ if (!dir_exists)
+ rmdir(dir_name);
+ }
+
+ return err;
+}
+
+int mount_bpffs_for_file(const char *file_name)
{
char err_str[ERR_MAX_LEN];
- char *file;
+ char *temp_name;
char *dir;
int err = 0;
- file = malloc(strlen(name) + 1);
- strcpy(file, name);
- dir = dirname(file);
+ if (access(file_name, F_OK) != -1) {
+ p_err("can't pin BPF object: path '%s' already exists", file_name);
+ return -1;
+ }
+
+ temp_name = strdup(file_name);
+ if (!temp_name) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
+ dir = dirname(temp_name);
if (is_bpffs(dir))
/* nothing to do if already mounted */
goto out_free;
+ if (access(dir, F_OK) == -1) {
+ p_err("can't pin BPF object: dir '%s' doesn't exist", dir);
+ err = -1;
+ goto out_free;
+ }
+
if (block_mount) {
p_err("no BPF file system found, not mounting it due to --nomount option");
err = -1;
@@ -237,12 +348,12 @@ int mount_bpffs_for_pin(const char *name)
err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN);
if (err) {
err_str[ERR_MAX_LEN - 1] = '\0';
- p_err("can't mount BPF file system to pin the object (%s): %s",
- name, err_str);
+ p_err("can't mount BPF file system to pin the object '%s': %s",
+ file_name, err_str);
}
out_free:
- free(file);
+ free(temp_name);
return err;
}
@@ -250,7 +361,7 @@ int do_pin_fd(int fd, const char *name)
{
int err;
- err = mount_bpffs_for_pin(name);
+ err = mount_bpffs_for_file(name);
if (err)
return err;
@@ -266,6 +377,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
int err;
int fd;
+ if (!REQ_ARGS(3))
+ return -EINVAL;
+
fd = get_fd(&argc, &argv);
if (fd < 0)
return fd;
@@ -282,6 +396,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
[BPF_OBJ_UNKNOWN] = "unknown",
[BPF_OBJ_PROG] = "prog",
[BPF_OBJ_MAP] = "map",
+ [BPF_OBJ_LINK] = "link",
};
if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
@@ -290,6 +405,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
return names[type];
}
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+ char *name_buff, size_t buff_len)
+{
+ const char *prog_name = prog_info->name;
+ const struct btf_type *func_type;
+ const struct bpf_func_info finfo = {};
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ struct btf *prog_btf = NULL;
+
+ if (buff_len <= BPF_OBJ_NAME_LEN ||
+ strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+ goto copy_name;
+
+ if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+ goto copy_name;
+
+ info.nr_func_info = 1;
+ info.func_info_rec_size = prog_info->func_info_rec_size;
+ if (info.func_info_rec_size > sizeof(finfo))
+ info.func_info_rec_size = sizeof(finfo);
+ info.func_info = ptr_to_u64(&finfo);
+
+ if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len))
+ goto copy_name;
+
+ prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!prog_btf)
+ goto copy_name;
+
+ func_type = btf__type_by_id(prog_btf, finfo.type_id);
+ if (!func_type || !btf_is_func(func_type))
+ goto copy_name;
+
+ prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+ snprintf(name_buff, buff_len, "%s", prog_name);
+
+ if (prog_btf)
+ btf__free(prog_btf);
+}
+
int get_fd_type(int fd)
{
char path[PATH_MAX];
@@ -381,7 +539,7 @@ void print_hex_data_json(uint8_t *data, size_t len)
}
/* extra params for nftw cb */
-static struct pinned_obj_table *build_fn_table;
+static struct hashmap *build_fn_table;
static enum bpf_obj_type build_fn_type;
static int do_build_table_cb(const char *fpath, const struct stat *sb,
@@ -389,9 +547,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
{
struct bpf_prog_info pinned_info;
__u32 len = sizeof(pinned_info);
- struct pinned_obj *obj_node;
enum bpf_obj_type objtype;
int fd, err = 0;
+ char *path;
if (typeflag != FTW_F)
goto out_ret;
@@ -405,31 +563,30 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
goto out_close;
memset(&pinned_info, 0, sizeof(pinned_info));
- if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+ if (bpf_prog_get_info_by_fd(fd, &pinned_info, &len))
goto out_close;
- obj_node = calloc(1, sizeof(*obj_node));
- if (!obj_node) {
+ path = strdup(fpath);
+ if (!path) {
err = -1;
goto out_close;
}
- obj_node->id = pinned_info.id;
- obj_node->path = strdup(fpath);
- if (!obj_node->path) {
- err = -1;
- free(obj_node);
+ err = hashmap__append(build_fn_table, pinned_info.id, path);
+ if (err) {
+ p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
+ pinned_info.id, path, strerror(errno));
+ free(path);
goto out_close;
}
- hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
out_close:
close(fd);
out_ret:
return err;
}
-int build_pinned_obj_table(struct pinned_obj_table *tab,
+int build_pinned_obj_table(struct hashmap *tab,
enum bpf_obj_type type)
{
struct mntent *mntent = NULL;
@@ -458,17 +615,18 @@ int build_pinned_obj_table(struct pinned_obj_table *tab,
return err;
}
-void delete_pinned_obj_table(struct pinned_obj_table *tab)
+void delete_pinned_obj_table(struct hashmap *map)
{
- struct pinned_obj *obj;
- struct hlist_node *tmp;
- unsigned int bkt;
+ struct hashmap_entry *entry;
+ size_t bkt;
- hash_for_each_safe(tab->table, bkt, tmp, obj, hash) {
- hash_del(&obj->hash);
- free(obj->path);
- free(obj);
- }
+ if (!map)
+ return;
+
+ hashmap__for_each_entry(map, entry, bkt)
+ free(entry->pvalue);
+
+ hashmap__free(map);
}
unsigned int get_page_size(void)
@@ -548,12 +706,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
}
const char *
-ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
- const char **opt)
+ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
{
+ __maybe_unused int device_id;
char devname[IF_NAMESIZE];
int vendor_id;
- int device_id;
if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
p_err("Can't get net device name for ifindex %d: %s", ifindex,
@@ -568,6 +725,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
}
switch (vendor_id) {
+#ifdef HAVE_LIBBFD_SUPPORT
case 0x19ee:
device_id = read_sysfs_netdev_hex_int(devname, "device");
if (device_id != 0x4000 &&
@@ -576,8 +734,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
*opt = "ctx4";
return "NFP-6xxx";
+#endif /* HAVE_LIBBFD_SUPPORT */
+ /* No NFP support in LLVM, we have no valid triple to return. */
default:
- p_err("Can't get bfd arch name for device vendor id 0x%04x",
+ p_err("Can't get arch name for device vendor id 0x%04x",
vendor_id);
return NULL;
}
@@ -645,6 +805,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
{
+ char prog_name[MAX_PROG_FULL_NAME];
unsigned int id = 0;
int fd, nb_fds = 0;
void *tmp;
@@ -670,19 +831,27 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
goto err_close_fds;
}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get prog info (%u): %s",
id, strerror(errno));
goto err_close_fd;
}
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) {
close(fd);
continue;
}
+ if (!tag) {
+ get_prog_full_name(&info, fd, prog_name,
+ sizeof(prog_name));
+ if (strncmp(nametag, prog_name, sizeof(prog_name))) {
+ close(fd);
+ continue;
+ }
+ }
+
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
@@ -743,7 +912,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds)
NEXT_ARGP();
name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ if (strlen(name) > MAX_PROG_FULL_NAME - 1) {
p_err("can't parse name");
return -1;
}
@@ -822,7 +991,7 @@ static int map_fd_by_name(char *name, int **fds)
goto err_close_fds;
}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ err = bpf_map_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get map info (%u): %s",
id, strerror(errno));
@@ -932,7 +1101,8 @@ exit_free:
return fd;
}
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info,
+ __u32 *info_len)
{
int err;
int fd;
@@ -941,7 +1111,7 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
if (fd < 0)
return -1;
- err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ err = bpf_map_get_info_by_fd(fd, info, info_len);
if (err) {
p_err("can't get map info: %s", strerror(errno));
close(fd);
@@ -950,3 +1120,63 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
return fd;
}
+
+size_t hash_fn_for_key_as_id(long key, void *ctx)
+{
+ return key;
+}
+
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+const char *bpf_attach_type_input_str(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_CGROUP_INET_INGRESS: return "ingress";
+ case BPF_CGROUP_INET_EGRESS: return "egress";
+ case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create";
+ case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release";
+ case BPF_CGROUP_SOCK_OPS: return "sock_ops";
+ case BPF_CGROUP_DEVICE: return "device";
+ case BPF_CGROUP_INET4_BIND: return "bind4";
+ case BPF_CGROUP_INET6_BIND: return "bind6";
+ case BPF_CGROUP_INET4_CONNECT: return "connect4";
+ case BPF_CGROUP_INET6_CONNECT: return "connect6";
+ case BPF_CGROUP_INET4_POST_BIND: return "post_bind4";
+ case BPF_CGROUP_INET6_POST_BIND: return "post_bind6";
+ case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4";
+ case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6";
+ case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4";
+ case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6";
+ case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4";
+ case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6";
+ case BPF_CGROUP_SYSCTL: return "sysctl";
+ case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4";
+ case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6";
+ case BPF_CGROUP_GETSOCKOPT: return "getsockopt";
+ case BPF_CGROUP_SETSOCKOPT: return "setsockopt";
+ case BPF_TRACE_RAW_TP: return "raw_tp";
+ case BPF_TRACE_FENTRY: return "fentry";
+ case BPF_TRACE_FEXIT: return "fexit";
+ case BPF_MODIFY_RETURN: return "mod_ret";
+ case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select";
+ case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate";
+ default: return libbpf_bpf_attach_type_str(t);
+ }
+}
+
+int pathname_concat(char *buf, int buf_sz, const char *path,
+ const char *name)
+{
+ int len;
+
+ len = snprintf(buf, buf_sz, "%s/%s", path, name);
+ if (len < 0)
+ return -EINVAL;
+ if (len >= buf_sz)
+ return -ENAMETOOLONG;
+
+ return 0;
+}