diff options
Diffstat (limited to '')
-rw-r--r-- | tools/bpf/bpftool/common.c | 396 |
1 files changed, 313 insertions, 83 deletions
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 65303664417e..958e92acca8e 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -13,16 +15,21 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <linux/limits.h> -#include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> +#include <linux/filter.h> +#include <linux/limits.h> +#include <linux/magic.h> +#include <linux/unistd.h> + #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/btf.h> #include "main.h" @@ -30,44 +37,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", - [BPF_SK_LOOKUP] = "sk_lookup", -}; - void p_err(const char *fmt, ...) { va_list ap; @@ -99,7 +68,7 @@ void p_info(const char *fmt, ...) va_end(ap); } -static bool is_bpffs(char *path) +static bool is_bpffs(const char *path) { struct statfs st_fs; @@ -109,11 +78,73 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and programs. This was done in + * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory + * accounting'"), in Linux 5.11. + * + * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does + * so by checking for the availability of a given BPF helper and this has + * failed on some kernels with backports in the past, see commit 6b4384ff1088 + * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK""). + * Instead, we can probe by lowering the process-based rlimit to 0, trying to + * load a BPF object, and resetting the rlimit. If the load succeeds then + * memcg-based accounting is supported. + * + * This would be too dangerous to do in the library, because multithreaded + * applications might attempt to load items while the rlimit is at 0. Given + * that bpftool is single-threaded, this is fine to do here. + */ +static bool known_to_need_rlimit(void) +{ + struct rlimit rlim_init, rlim_cur_zero = {}; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + union bpf_attr attr; + int prog_fd, err; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + attr.license = ptr_to_u64("GPL"); + + if (getrlimit(RLIMIT_MEMLOCK, &rlim_init)) + return false; + + /* Drop the soft limit to zero. We maintain the hard limit to its + * current value, because lowering it would be a permanent operation + * for unprivileged users. + */ + rlim_cur_zero.rlim_max = rlim_init.rlim_max; + if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero)) + return false; + + /* Do not use bpf_prog_load() from libbpf here, because it calls + * bump_rlimit_memlock(), interfering with the current probe. + */ + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = errno; + + /* reset soft rlimit to its initial value */ + setrlimit(RLIMIT_MEMLOCK, &rlim_init); + + if (prog_fd < 0) + return err == EPERM; + + close(prog_fd); + return false; +} + void set_max_rlimit(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - setrlimit(RLIMIT_MEMLOCK, &rinf); + if (known_to_need_rlimit()) + setrlimit(RLIMIT_MEMLOCK, &rinf); } static int @@ -213,21 +244,101 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) return fd; } -int mount_bpffs_for_pin(const char *name) +int create_and_mount_bpffs_dir(const char *dir_name) +{ + char err_str[ERR_MAX_LEN]; + bool dir_exists; + int err = 0; + + if (is_bpffs(dir_name)) + return err; + + dir_exists = access(dir_name, F_OK) == 0; + + if (!dir_exists) { + char *temp_name; + char *parent_name; + + temp_name = strdup(dir_name); + if (!temp_name) { + p_err("mem alloc failed"); + return -1; + } + + parent_name = dirname(temp_name); + + if (is_bpffs(parent_name)) { + /* nothing to do if already mounted */ + free(temp_name); + return err; + } + + if (access(parent_name, F_OK) == -1) { + p_err("can't create dir '%s' to pin BPF object: parent dir '%s' doesn't exist", + dir_name, parent_name); + free(temp_name); + return -1; + } + + free(temp_name); + } + + if (block_mount) { + p_err("no BPF file system found, not mounting it due to --nomount option"); + return -1; + } + + if (!dir_exists) { + err = mkdir(dir_name, S_IRWXU); + if (err) { + p_err("failed to create dir '%s': %s", dir_name, strerror(errno)); + return err; + } + } + + err = mnt_fs(dir_name, "bpf", err_str, ERR_MAX_LEN); + if (err) { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system on given dir '%s': %s", + dir_name, err_str); + + if (!dir_exists) + rmdir(dir_name); + } + + return err; +} + +int mount_bpffs_for_file(const char *file_name) { char err_str[ERR_MAX_LEN]; - char *file; + char *temp_name; char *dir; int err = 0; - file = malloc(strlen(name) + 1); - strcpy(file, name); - dir = dirname(file); + if (access(file_name, F_OK) != -1) { + p_err("can't pin BPF object: path '%s' already exists", file_name); + return -1; + } + + temp_name = strdup(file_name); + if (!temp_name) { + p_err("mem alloc failed"); + return -1; + } + + dir = dirname(temp_name); if (is_bpffs(dir)) /* nothing to do if already mounted */ goto out_free; + if (access(dir, F_OK) == -1) { + p_err("can't pin BPF object: dir '%s' doesn't exist", dir); + err = -1; + goto out_free; + } + if (block_mount) { p_err("no BPF file system found, not mounting it due to --nomount option"); err = -1; @@ -237,12 +348,12 @@ int mount_bpffs_for_pin(const char *name) err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN); if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - name, err_str); + p_err("can't mount BPF file system to pin the object '%s': %s", + file_name, err_str); } out_free: - free(file); + free(temp_name); return err; } @@ -250,7 +361,7 @@ int do_pin_fd(int fd, const char *name) { int err; - err = mount_bpffs_for_pin(name); + err = mount_bpffs_for_file(name); if (err) return err; @@ -266,6 +377,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) int err; int fd; + if (!REQ_ARGS(3)) + return -EINVAL; + fd = get_fd(&argc, &argv); if (fd < 0) return fd; @@ -282,6 +396,7 @@ const char *get_fd_type_name(enum bpf_obj_type type) [BPF_OBJ_UNKNOWN] = "unknown", [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", + [BPF_OBJ_LINK] = "link", }; if (type < 0 || type >= ARRAY_SIZE(names) || !names[type]) @@ -290,6 +405,49 @@ const char *get_fd_type_name(enum bpf_obj_type type) return names[type]; } +void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, + char *name_buff, size_t buff_len) +{ + const char *prog_name = prog_info->name; + const struct btf_type *func_type; + const struct bpf_func_info finfo = {}; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + struct btf *prog_btf = NULL; + + if (buff_len <= BPF_OBJ_NAME_LEN || + strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1) + goto copy_name; + + if (!prog_info->btf_id || prog_info->nr_func_info == 0) + goto copy_name; + + info.nr_func_info = 1; + info.func_info_rec_size = prog_info->func_info_rec_size; + if (info.func_info_rec_size > sizeof(finfo)) + info.func_info_rec_size = sizeof(finfo); + info.func_info = ptr_to_u64(&finfo); + + if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) + goto copy_name; + + prog_btf = btf__load_from_kernel_by_id(info.btf_id); + if (!prog_btf) + goto copy_name; + + func_type = btf__type_by_id(prog_btf, finfo.type_id); + if (!func_type || !btf_is_func(func_type)) + goto copy_name; + + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); + +copy_name: + snprintf(name_buff, buff_len, "%s", prog_name); + + if (prog_btf) + btf__free(prog_btf); +} + int get_fd_type(int fd) { char path[PATH_MAX]; @@ -381,7 +539,7 @@ void print_hex_data_json(uint8_t *data, size_t len) } /* extra params for nftw cb */ -static struct pinned_obj_table *build_fn_table; +static struct hashmap *build_fn_table; static enum bpf_obj_type build_fn_type; static int do_build_table_cb(const char *fpath, const struct stat *sb, @@ -389,9 +547,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, { struct bpf_prog_info pinned_info; __u32 len = sizeof(pinned_info); - struct pinned_obj *obj_node; enum bpf_obj_type objtype; int fd, err = 0; + char *path; if (typeflag != FTW_F) goto out_ret; @@ -405,31 +563,30 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, goto out_close; memset(&pinned_info, 0, sizeof(pinned_info)); - if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) + if (bpf_prog_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { + path = strdup(fpath); + if (!path) { err = -1; goto out_close; } - obj_node->id = pinned_info.id; - obj_node->path = strdup(fpath); - if (!obj_node->path) { - err = -1; - free(obj_node); + err = hashmap__append(build_fn_table, pinned_info.id, path); + if (err) { + p_err("failed to append entry to hashmap for ID %u, path '%s': %s", + pinned_info.id, path, strerror(errno)); + free(path); goto out_close; } - hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); out_close: close(fd); out_ret: return err; } -int build_pinned_obj_table(struct pinned_obj_table *tab, +int build_pinned_obj_table(struct hashmap *tab, enum bpf_obj_type type) { struct mntent *mntent = NULL; @@ -458,17 +615,18 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, return err; } -void delete_pinned_obj_table(struct pinned_obj_table *tab) +void delete_pinned_obj_table(struct hashmap *map) { - struct pinned_obj *obj; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj->path); - free(obj); - } + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) + free(entry->pvalue); + + hashmap__free(map); } unsigned int get_page_size(void) @@ -548,12 +706,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) } const char * -ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, - const char **opt) +ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt) { + __maybe_unused int device_id; char devname[IF_NAMESIZE]; int vendor_id; - int device_id; if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { p_err("Can't get net device name for ifindex %d: %s", ifindex, @@ -568,6 +725,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, } switch (vendor_id) { +#ifdef HAVE_LIBBFD_SUPPORT case 0x19ee: device_id = read_sysfs_netdev_hex_int(devname, "device"); if (device_id != 0x4000 && @@ -576,8 +734,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); *opt = "ctx4"; return "NFP-6xxx"; +#endif /* HAVE_LIBBFD_SUPPORT */ + /* No NFP support in LLVM, we have no valid triple to return. */ default: - p_err("Can't get bfd arch name for device vendor id 0x%04x", + p_err("Can't get arch name for device vendor id 0x%04x", vendor_id); return NULL; } @@ -645,6 +805,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) { + char prog_name[MAX_PROG_FULL_NAME]; unsigned int id = 0; int fd, nb_fds = 0; void *tmp; @@ -670,19 +831,27 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) goto err_close_fds; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_prog_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get prog info (%u): %s", id, strerror(errno)); goto err_close_fd; } - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) { close(fd); continue; } + if (!tag) { + get_prog_full_name(&info, fd, prog_name, + sizeof(prog_name)); + if (strncmp(nametag, prog_name, sizeof(prog_name))) { + close(fd); + continue; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -743,7 +912,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) NEXT_ARGP(); name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + if (strlen(name) > MAX_PROG_FULL_NAME - 1) { p_err("can't parse name"); return -1; } @@ -822,7 +991,7 @@ static int map_fd_by_name(char *name, int **fds) goto err_close_fds; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info (%u): %s", id, strerror(errno)); @@ -932,7 +1101,8 @@ exit_free: return fd; } -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, + __u32 *info_len) { int err; int fd; @@ -941,7 +1111,7 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) if (fd < 0) return -1; - err = bpf_obj_get_info_by_fd(fd, info, info_len); + err = bpf_map_get_info_by_fd(fd, info, info_len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); @@ -950,3 +1120,63 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } + +size_t hash_fn_for_key_as_id(long key, void *ctx) +{ + return key; +} + +bool equal_fn_for_key_as_id(long k1, long k2, void *ctx) +{ + return k1 == k2; +} + +const char *bpf_attach_type_input_str(enum bpf_attach_type t) +{ + switch (t) { + case BPF_CGROUP_INET_INGRESS: return "ingress"; + case BPF_CGROUP_INET_EGRESS: return "egress"; + case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create"; + case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release"; + case BPF_CGROUP_SOCK_OPS: return "sock_ops"; + case BPF_CGROUP_DEVICE: return "device"; + case BPF_CGROUP_INET4_BIND: return "bind4"; + case BPF_CGROUP_INET6_BIND: return "bind6"; + case BPF_CGROUP_INET4_CONNECT: return "connect4"; + case BPF_CGROUP_INET6_CONNECT: return "connect6"; + case BPF_CGROUP_INET4_POST_BIND: return "post_bind4"; + case BPF_CGROUP_INET6_POST_BIND: return "post_bind6"; + case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4"; + case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6"; + case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4"; + case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6"; + case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4"; + case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6"; + case BPF_CGROUP_SYSCTL: return "sysctl"; + case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4"; + case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6"; + case BPF_CGROUP_GETSOCKOPT: return "getsockopt"; + case BPF_CGROUP_SETSOCKOPT: return "setsockopt"; + case BPF_TRACE_RAW_TP: return "raw_tp"; + case BPF_TRACE_FENTRY: return "fentry"; + case BPF_TRACE_FEXIT: return "fexit"; + case BPF_MODIFY_RETURN: return "mod_ret"; + case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select"; + case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate"; + default: return libbpf_bpf_attach_type_str(t); + } +} + +int pathname_concat(char *buf, int buf_sz, const char *path, + const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); + if (len < 0) + return -EINVAL; + if (len >= buf_sz) + return -ENAMETOOLONG; + + return 0; +} |