aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lib/bpf/libbpf.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
-rw-r--r--tools/lib/bpf/libbpf.c1741
1 files changed, 1454 insertions, 287 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3f09772192f1..7513165b104f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -18,6 +18,7 @@
#include <stdarg.h>
#include <libgen.h>
#include <inttypes.h>
+#include <limits.h>
#include <string.h>
#include <unistd.h>
#include <endian.h>
@@ -41,9 +42,11 @@
#include <sys/types.h>
#include <sys/vfs.h>
#include <sys/utsname.h>
+#include <sys/resource.h>
#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
+#include <zlib.h>
#include "libbpf.h"
#include "bpf.h"
@@ -99,14 +102,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
va_end(args);
}
-#define STRERR_BUFSIZE 128
+static void pr_perm_msg(int err)
+{
+ struct rlimit limit;
+ char buf[100];
+
+ if (err != -EPERM || geteuid() != 0)
+ return;
+
+ err = getrlimit(RLIMIT_MEMLOCK, &limit);
+ if (err)
+ return;
+
+ if (limit.rlim_cur == RLIM_INFINITY)
+ return;
+
+ if (limit.rlim_cur < 1024)
+ snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
+ else if (limit.rlim_cur < 1024*1024)
+ snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
+ else
+ snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
-#define CHECK_ERR(action, err, out) do { \
- err = action; \
- if (err) \
- goto out; \
-} while (0)
+ pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
+ buf);
+}
+#define STRERR_BUFSIZE 128
/* Copied from tools/perf/util/util.h */
#ifndef zfree
@@ -146,6 +168,20 @@ struct bpf_capabilities {
__u32 array_mmap:1;
};
+enum reloc_type {
+ RELO_LD64,
+ RELO_CALL,
+ RELO_DATA,
+ RELO_EXTERN,
+};
+
+struct reloc_desc {
+ enum reloc_type type;
+ int insn_idx;
+ int map_idx;
+ int sym_off;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -164,16 +200,7 @@ struct bpf_program {
size_t insns_cnt, main_prog_cnt;
enum bpf_prog_type type;
- struct reloc_desc {
- enum {
- RELO_LD64,
- RELO_CALL,
- RELO_DATA,
- } type;
- int insn_idx;
- int map_idx;
- int sym_off;
- } *reloc_desc;
+ struct reloc_desc *reloc_desc;
int nr_reloc;
int log_level;
@@ -202,22 +229,29 @@ struct bpf_program {
__u32 prog_flags;
};
+#define DATA_SEC ".data"
+#define BSS_SEC ".bss"
+#define RODATA_SEC ".rodata"
+#define KCONFIG_SEC ".kconfig"
+
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
LIBBPF_MAP_DATA,
LIBBPF_MAP_BSS,
LIBBPF_MAP_RODATA,
+ LIBBPF_MAP_KCONFIG,
};
static const char * const libbpf_type_to_btf_name[] = {
- [LIBBPF_MAP_DATA] = ".data",
- [LIBBPF_MAP_BSS] = ".bss",
- [LIBBPF_MAP_RODATA] = ".rodata",
+ [LIBBPF_MAP_DATA] = DATA_SEC,
+ [LIBBPF_MAP_BSS] = BSS_SEC,
+ [LIBBPF_MAP_RODATA] = RODATA_SEC,
+ [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC,
};
struct bpf_map {
- int fd;
char *name;
+ int fd;
int sec_idx;
size_t sec_offset;
int map_ifindex;
@@ -228,14 +262,32 @@ struct bpf_map {
void *priv;
bpf_map_clear_priv_t clear_priv;
enum libbpf_map_type libbpf_type;
+ void *mmaped;
char *pin_path;
bool pinned;
bool reused;
};
-struct bpf_secdata {
- void *rodata;
- void *data;
+enum extern_type {
+ EXT_UNKNOWN,
+ EXT_CHAR,
+ EXT_BOOL,
+ EXT_INT,
+ EXT_TRISTATE,
+ EXT_CHAR_ARR,
+};
+
+struct extern_desc {
+ const char *name;
+ int sym_idx;
+ int btf_id;
+ enum extern_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ bool is_weak;
+ bool is_set;
};
static LIST_HEAD(bpf_objects_list);
@@ -250,7 +302,11 @@ struct bpf_object {
struct bpf_map *maps;
size_t nr_maps;
size_t maps_cap;
- struct bpf_secdata sections;
+
+ char *kconfig;
+ struct extern_desc *externs;
+ int nr_extern;
+ int kconfig_map_idx;
bool loaded;
bool has_pseudo_calls;
@@ -279,6 +335,7 @@ struct bpf_object {
int maps_shndx;
int btf_maps_shndx;
int text_shndx;
+ int symbols_shndx;
int data_shndx;
int rodata_shndx;
int bss_shndx;
@@ -550,6 +607,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.data_shndx = -1;
obj->efile.rodata_shndx = -1;
obj->efile.bss_shndx = -1;
+ obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
@@ -748,13 +806,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
*size = 0;
if (!name) {
return -EINVAL;
- } else if (!strcmp(name, ".data")) {
+ } else if (!strcmp(name, DATA_SEC)) {
if (obj->efile.data)
*size = obj->efile.data->d_size;
- } else if (!strcmp(name, ".bss")) {
+ } else if (!strcmp(name, BSS_SEC)) {
if (obj->efile.bss)
*size = obj->efile.bss->d_size;
- } else if (!strcmp(name, ".rodata")) {
+ } else if (!strcmp(name, RODATA_SEC)) {
if (obj->efile.rodata)
*size = obj->efile.rodata->d_size;
} else {
@@ -835,13 +893,38 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++];
}
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t map_sz;
+
+ map_sz = roundup(map->def.value_size, 8) * map->def.max_entries;
+ map_sz = roundup(map_sz, page_sz);
+ return map_sz;
+}
+
+static char *internal_map_name(struct bpf_object *obj,
+ enum libbpf_map_type type)
+{
+ char map_name[BPF_OBJ_NAME_LEN];
+ const char *sfx = libbpf_type_to_btf_name[type];
+ int sfx_len = max((size_t)7, strlen(sfx));
+ int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
+ strlen(obj->name));
+
+ snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
+ sfx_len, libbpf_type_to_btf_name[type]);
+
+ return strdup(map_name);
+}
+
static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
- int sec_idx, Elf_Data *data, void **data_buff)
+ int sec_idx, void *data, size_t data_sz)
{
- char map_name[BPF_OBJ_NAME_LEN];
struct bpf_map_def *def;
struct bpf_map *map;
+ int err;
map = bpf_object__add_map(obj);
if (IS_ERR(map))
@@ -850,9 +933,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
map->libbpf_type = type;
map->sec_idx = sec_idx;
map->sec_offset = 0;
- snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
- libbpf_type_to_btf_name[type]);
- map->name = strdup(map_name);
+ map->name = internal_map_name(obj, type);
if (!map->name) {
pr_warn("failed to alloc map name\n");
return -ENOMEM;
@@ -861,25 +942,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def = &map->def;
def->type = BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int);
- def->value_size = data->d_size;
+ def->value_size = data_sz;
def->max_entries = 1;
- def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
- if (obj->caps.array_mmap)
- def->map_flags |= BPF_F_MMAPABLE;
+ def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
+ ? BPF_F_RDONLY_PROG : 0;
+ def->map_flags |= BPF_F_MMAPABLE;
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
- map_name, map->sec_idx, map->sec_offset, def->map_flags);
+ map->name, map->sec_idx, map->sec_offset, def->map_flags);
- if (data_buff) {
- *data_buff = malloc(data->d_size);
- if (!*data_buff) {
- zfree(&map->name);
- pr_warn("failed to alloc map content buffer\n");
- return -ENOMEM;
- }
- memcpy(*data_buff, data->d_buf, data->d_size);
+ map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("failed to alloc map '%s' content buffer: %d\n",
+ map->name, err);
+ zfree(&map->name);
+ return err;
}
+ if (data)
+ memcpy(map->mmaped, data, data_sz);
+
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
return 0;
}
@@ -888,37 +973,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
{
int err;
- if (!obj->caps.global_data)
- return 0;
/*
* Populate obj->maps with libbpf internal maps.
*/
if (obj->efile.data_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
obj->efile.data_shndx,
- obj->efile.data,
- &obj->sections.data);
+ obj->efile.data->d_buf,
+ obj->efile.data->d_size);
if (err)
return err;
}
if (obj->efile.rodata_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
obj->efile.rodata_shndx,
- obj->efile.rodata,
- &obj->sections.rodata);
+ obj->efile.rodata->d_buf,
+ obj->efile.rodata->d_size);
if (err)
return err;
}
if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
obj->efile.bss_shndx,
- obj->efile.bss, NULL);
+ NULL,
+ obj->efile.bss->d_size);
if (err)
return err;
}
return 0;
}
+
+static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
+ const void *name)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ if (strcmp(obj->externs[i].name, name) == 0)
+ return &obj->externs[i];
+ }
+ return NULL;
+}
+
+static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
+ char value)
+{
+ switch (ext->type) {
+ case EXT_BOOL:
+ if (value == 'm') {
+ pr_warn("extern %s=%c should be tristate or char\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+ *(bool *)ext_val = value == 'y' ? true : false;
+ break;
+ case EXT_TRISTATE:
+ if (value == 'y')
+ *(enum libbpf_tristate *)ext_val = TRI_YES;
+ else if (value == 'm')
+ *(enum libbpf_tristate *)ext_val = TRI_MODULE;
+ else /* value == 'n' */
+ *(enum libbpf_tristate *)ext_val = TRI_NO;
+ break;
+ case EXT_CHAR:
+ *(char *)ext_val = value;
+ break;
+ case EXT_UNKNOWN:
+ case EXT_INT:
+ case EXT_CHAR_ARR:
+ default:
+ pr_warn("extern %s=%c should be bool, tristate, or char\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+ ext->is_set = true;
+ return 0;
+}
+
+static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
+ const char *value)
+{
+ size_t len;
+
+ if (ext->type != EXT_CHAR_ARR) {
+ pr_warn("extern %s=%s should char array\n", ext->name, value);
+ return -EINVAL;
+ }
+
+ len = strlen(value);
+ if (value[len - 1] != '"') {
+ pr_warn("extern '%s': invalid string config '%s'\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+
+ /* strip quotes */
+ len -= 2;
+ if (len >= ext->sz) {
+ pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ ext->name, value, len, ext->sz - 1);
+ len = ext->sz - 1;
+ }
+ memcpy(ext_val, value + 1, len);
+ ext_val[len] = '\0';
+ ext->is_set = true;
+ return 0;
+}
+
+static int parse_u64(const char *value, __u64 *res)
+{
+ char *value_end;
+ int err;
+
+ errno = 0;
+ *res = strtoull(value, &value_end, 0);
+ if (errno) {
+ err = -errno;
+ pr_warn("failed to parse '%s' as integer: %d\n", value, err);
+ return err;
+ }
+ if (*value_end) {
+ pr_warn("failed to parse '%s' as integer completely\n", value);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+{
+ int bit_sz = ext->sz * 8;
+
+ if (ext->sz == 8)
+ return true;
+
+ /* Validate that value stored in u64 fits in integer of `ext->sz`
+ * bytes size without any loss of information. If the target integer
+ * is signed, we rely on the following limits of integer type of
+ * Y bits and subsequent transformation:
+ *
+ * -2^(Y-1) <= X <= 2^(Y-1) - 1
+ * 0 <= X + 2^(Y-1) <= 2^Y - 1
+ * 0 <= X + 2^(Y-1) < 2^Y
+ *
+ * For unsigned target integer, check that all the (64 - Y) bits are
+ * zero.
+ */
+ if (ext->is_signed)
+ return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
+ else
+ return (v >> bit_sz) == 0;
+}
+
+static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
+ __u64 value)
+{
+ if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
+ pr_warn("extern %s=%llu should be integer\n",
+ ext->name, (unsigned long long)value);
+ return -EINVAL;
+ }
+ if (!is_ext_value_in_range(ext, value)) {
+ pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
+ ext->name, (unsigned long long)value, ext->sz);
+ return -ERANGE;
+ }
+ switch (ext->sz) {
+ case 1: *(__u8 *)ext_val = value; break;
+ case 2: *(__u16 *)ext_val = value; break;
+ case 4: *(__u32 *)ext_val = value; break;
+ case 8: *(__u64 *)ext_val = value; break;
+ default:
+ return -EINVAL;
+ }
+ ext->is_set = true;
+ return 0;
+}
+
+static int bpf_object__process_kconfig_line(struct bpf_object *obj,
+ char *buf, void *data)
+{
+ struct extern_desc *ext;
+ char *sep, *value;
+ int len, err = 0;
+ void *ext_val;
+ __u64 num;
+
+ if (strncmp(buf, "CONFIG_", 7))
+ return 0;
+
+ sep = strchr(buf, '=');
+ if (!sep) {
+ pr_warn("failed to parse '%s': no separator\n", buf);
+ return -EINVAL;
+ }
+
+ /* Trim ending '\n' */
+ len = strlen(buf);
+ if (buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1]) {
+ *sep = '=';
+ pr_warn("failed to parse '%s': no value\n", buf);
+ return -EINVAL;
+ }
+
+ ext = find_extern_by_name(obj, buf);
+ if (!ext || ext->is_set)
+ return 0;
+
+ ext_val = data + ext->data_off;
+ value = sep + 1;
+
+ switch (*value) {
+ case 'y': case 'n': case 'm':
+ err = set_ext_value_tri(ext, ext_val, *value);
+ break;
+ case '"':
+ err = set_ext_value_str(ext, ext_val, value);
+ break;
+ default:
+ /* assume integer */
+ err = parse_u64(value, &num);
+ if (err) {
+ pr_warn("extern %s=%s should be integer\n",
+ ext->name, value);
+ return err;
+ }
+ err = set_ext_value_num(ext, ext_val, num);
+ break;
+ }
+ if (err)
+ return err;
+ pr_debug("extern %s=%s\n", ext->name, value);
+ return 0;
+}
+
+static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
+{
+ char buf[PATH_MAX];
+ struct utsname uts;
+ int len, err = 0;
+ gzFile file;
+
+ uname(&uts);
+ len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(buf, "r");
+ if (!file)
+ file = gzopen("/proc/config.gz", "r");
+
+ if (!file) {
+ pr_warn("failed to open system Kconfig\n");
+ return -ENOENT;
+ }
+
+ while (gzgets(file, buf, sizeof(buf))) {
+ err = bpf_object__process_kconfig_line(obj, buf, data);
+ if (err) {
+ pr_warn("error parsing system Kconfig line '%s': %d\n",
+ buf, err);
+ goto out;
+ }
+ }
+
+out:
+ gzclose(file);
+ return err;
+}
+
+static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
+ const char *config, void *data)
+{
+ char buf[PATH_MAX];
+ int err = 0;
+ FILE *file;
+
+ file = fmemopen((void *)config, strlen(config), "r");
+ if (!file) {
+ err = -errno;
+ pr_warn("failed to open in-memory Kconfig: %d\n", err);
+ return err;
+ }
+
+ while (fgets(buf, sizeof(buf), file)) {
+ err = bpf_object__process_kconfig_line(obj, buf, data);
+ if (err) {
+ pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
+ buf, err);
+ break;
+ }
+ }
+
+ fclose(file);
+ return err;
+}
+
+static int bpf_object__init_kconfig_map(struct bpf_object *obj)
+{
+ struct extern_desc *last_ext;
+ size_t map_sz;
+ int err;
+
+ if (obj->nr_extern == 0)
+ return 0;
+
+ last_ext = &obj->externs[obj->nr_extern - 1];
+ map_sz = last_ext->data_off + last_ext->sz;
+
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
+ obj->efile.symbols_shndx,
+ NULL, map_sz);
+ if (err)
+ return err;
+
+ obj->kconfig_map_idx = obj->nr_maps - 1;
+
+ return 0;
+}
+
static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
{
Elf_Data *symbols = obj->efile.symbols;
@@ -1242,15 +1622,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
- pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n",
- map_name, t->type, sz);
+ pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %lld.\n",
- map_name, t->type, sz);
+ pr_debug("map '%s': found key [%u], sz = %zd.\n",
+ map_name, t->type, (ssize_t)sz);
if (map->def.key_size && map->def.key_size != sz) {
- pr_warn("map '%s': conflicting key size %u != %lld.\n",
- map_name, map->def.key_size, sz);
+ pr_warn("map '%s': conflicting key size %u != %zd.\n",
+ map_name, map->def.key_size, (ssize_t)sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -1285,15 +1665,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
- pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n",
- map_name, t->type, sz);
+ pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %lld.\n",
- map_name, t->type, sz);
+ pr_debug("map '%s': found value [%u], sz = %zd.\n",
+ map_name, t->type, (ssize_t)sz);
if (map->def.value_size && map->def.value_size != sz) {
- pr_warn("map '%s': conflicting value size %u != %lld.\n",
- map_name, map->def.value_size, sz);
+ pr_warn("map '%s': conflicting value size %u != %zd.\n",
+ map_name, map->def.value_size, (ssize_t)sz);
return -EINVAL;
}
map->def.value_size = sz;
@@ -1393,21 +1773,20 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return 0;
}
-static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
- const char *pin_root_path)
+static int bpf_object__init_maps(struct bpf_object *obj,
+ const struct bpf_object_open_opts *opts)
{
- bool strict = !relaxed_maps;
+ const char *pin_root_path;
+ bool strict;
int err;
- err = bpf_object__init_user_maps(obj, strict);
- if (err)
- return err;
-
- err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
- if (err)
- return err;
+ strict = !OPTS_GET(opts, relaxed_maps, false);
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = bpf_object__init_global_data_maps(obj);
+ err = bpf_object__init_user_maps(obj, strict);
+ err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ err = err ?: bpf_object__init_global_data_maps(obj);
+ err = err ?: bpf_object__init_kconfig_map(obj);
if (err)
return err;
@@ -1509,7 +1888,8 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
{
- return obj->efile.btf_maps_shndx >= 0;
+ return obj->efile.btf_maps_shndx >= 0 ||
+ obj->nr_extern > 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -1526,11 +1906,6 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_ELF_SEC, err);
goto out;
}
- err = btf__finalize_data(obj, obj->btf);
- if (err) {
- pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
- goto out;
- }
}
if (btf_ext_data) {
if (!obj->btf) {
@@ -1564,6 +1939,30 @@ out:
return 0;
}
+static int bpf_object__finalize_btf(struct bpf_object *obj)
+{
+ int err;
+
+ if (!obj->btf)
+ return 0;
+
+ err = btf__finalize_data(obj, obj->btf);
+ if (!err)
+ return 0;
+
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+
+ if (bpf_object__is_btf_mandatory(obj)) {
+ pr_warn("BTF is required, but is missing or corrupted.\n");
+ return -ENOENT;
+ }
+ return 0;
+}
+
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
int err = 0;
@@ -1592,8 +1991,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
-static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
- const char *pin_root_path)
+static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -1665,6 +2063,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
return -LIBBPF_ERRNO__FORMAT;
}
obj->efile.symbols = data;
+ obj->efile.symbols_shndx = idx;
obj->efile.strtabidx = sh.sh_link;
} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
if (sh.sh_flags & SHF_EXECINSTR) {
@@ -1683,10 +2082,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
name, obj->path, cp);
return err;
}
- } else if (strcmp(name, ".data") == 0) {
+ } else if (strcmp(name, DATA_SEC) == 0) {
obj->efile.data = data;
obj->efile.data_shndx = idx;
- } else if (strcmp(name, ".rodata") == 0) {
+ } else if (strcmp(name, RODATA_SEC) == 0) {
obj->efile.rodata = data;
obj->efile.rodata_shndx = idx;
} else {
@@ -1716,7 +2115,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
obj->efile.reloc_sects[nr_sects].shdr = sh;
obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ } else if (sh.sh_type == SHT_NOBITS &&
+ strcmp(name, BSS_SEC) == 0) {
obj->efile.bss = data;
obj->efile.bss_shndx = idx;
} else {
@@ -1728,14 +2128,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
pr_warn("Corrupted ELF file: index of strtab invalid\n");
return -LIBBPF_ERRNO__FORMAT;
}
- err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
- if (!err)
- err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
- if (!err)
- err = bpf_object__sanitize_and_load_btf(obj);
- if (!err)
- err = bpf_object__init_prog_names(obj);
- return err;
+ return bpf_object__init_btf(obj, btf_data, btf_ext_data);
+}
+
+static bool sym_is_extern(const GElf_Sym *sym)
+{
+ int bind = GELF_ST_BIND(sym->st_info);
+ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
+ return sym->st_shndx == SHN_UNDEF &&
+ (bind == STB_GLOBAL || bind == STB_WEAK) &&
+ GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
+}
+
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
+{
+ const struct btf_type *t;
+ const char *var_name;
+ int i, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_var(t))
+ continue;
+
+ var_name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(var_name, ext_name))
+ continue;
+
+ if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static enum extern_type find_extern_type(const struct btf *btf, int id,
+ bool *is_signed)
+{
+ const struct btf_type *t;
+ const char *name;
+
+ t = skip_mods_and_typedefs(btf, id, NULL);
+ name = btf__name_by_offset(btf, t->name_off);
+
+ if (is_signed)
+ *is_signed = false;
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT: {
+ int enc = btf_int_encoding(t);
+
+ if (enc & BTF_INT_BOOL)
+ return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+ if (is_signed)
+ *is_signed = enc & BTF_INT_SIGNED;
+ if (t->size == 1)
+ return EXT_CHAR;
+ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
+ return EXT_UNKNOWN;
+ return EXT_INT;
+ }
+ case BTF_KIND_ENUM:
+ if (t->size != 4)
+ return EXT_UNKNOWN;
+ if (strcmp(name, "libbpf_tristate"))
+ return EXT_UNKNOWN;
+ return EXT_TRISTATE;
+ case BTF_KIND_ARRAY:
+ if (btf_array(t)->nelems == 0)
+ return EXT_UNKNOWN;
+ if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
+ return EXT_UNKNOWN;
+ return EXT_CHAR_ARR;
+ default:
+ return EXT_UNKNOWN;
+ }
+}
+
+static int cmp_externs(const void *_a, const void *_b)
+{
+ const struct extern_desc *a = _a;
+ const struct extern_desc *b = _b;
+
+ /* descending order by alignment requirements */
+ if (a->align != b->align)
+ return a->align > b->align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->sz != b->sz)
+ return a->sz < b->sz ? -1 : 1;
+ /* resolve ties by name */
+ return strcmp(a->name, b->name);
+}
+
+static int bpf_object__collect_externs(struct bpf_object *obj)
+{
+ const struct btf_type *t;
+ struct extern_desc *ext;
+ int i, n, off, btf_id;
+ struct btf_type *sec;
+ const char *ext_name;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+
+ if (!obj->efile.symbols)
+ return 0;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
+ if (!scn)
+ return -LIBBPF_ERRNO__FORMAT;
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return -LIBBPF_ERRNO__FORMAT;
+ n = sh.sh_size / sh.sh_entsize;
+
+ pr_debug("looking for externs among %d symbols...\n", n);
+ for (i = 0; i < n; i++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
+ return -LIBBPF_ERRNO__FORMAT;
+ if (!sym_is_extern(&sym))
+ continue;
+ ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!ext_name || !ext_name[0])
+ continue;
+
+ ext = obj->externs;
+ ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+ if (!ext)
+ return -ENOMEM;
+ obj->externs = ext;
+ ext = &ext[obj->nr_extern];
+ memset(ext, 0, sizeof(*ext));
+ obj->nr_extern++;
+
+ ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
+ if (ext->btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s': %d\n",
+ ext_name, ext->btf_id);
+ return ext->btf_id;
+ }
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ ext->name = btf__name_by_offset(obj->btf, t->name_off);
+ ext->sym_idx = i;
+ ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
+ ext->sz = btf__resolve_size(obj->btf, t->type);
+ if (ext->sz <= 0) {
+ pr_warn("failed to resolve size of extern '%s': %d\n",
+ ext_name, ext->sz);
+ return ext->sz;
+ }
+ ext->align = btf__align_of(obj->btf, t->type);
+ if (ext->align <= 0) {
+ pr_warn("failed to determine alignment of extern '%s': %d\n",
+ ext_name, ext->align);
+ return -EINVAL;
+ }
+ ext->type = find_extern_type(obj->btf, t->type,
+ &ext->is_signed);
+ if (ext->type == EXT_UNKNOWN) {
+ pr_warn("extern '%s' type is unsupported\n", ext_name);
+ return -ENOTSUP;
+ }
+ }
+ pr_debug("collected %d externs total\n", obj->nr_extern);
+
+ if (!obj->nr_extern)
+ return 0;
+
+ /* sort externs by (alignment, size, name) and calculate their offsets
+ * within a map */
+ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ ext->data_off = roundup(off, ext->align);
+ off = ext->data_off + ext->sz;
+ pr_debug("extern #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->data_off, ext->name);
+ }
+
+ btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
+ if (btf_id <= 0) {
+ pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
+ return -ESRCH;
+ }
+
+ sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+ t = btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ vs->offset = ext->data_off;
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ }
+
+ return 0;
}
static struct bpf_program *
@@ -1765,6 +2368,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
return NULL;
}
+struct bpf_program *
+bpf_object__find_program_by_name(const struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj) {
+ if (!strcmp(prog->name, name))
+ return prog;
+ }
+ return NULL;
+}
+
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
int shndx)
{
@@ -1789,6 +2405,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
return LIBBPF_MAP_BSS;
else if (shndx == obj->efile.rodata_shndx)
return LIBBPF_MAP_RODATA;
+ else if (shndx == obj->efile.symbols_shndx)
+ return LIBBPF_MAP_KCONFIG;
else
return LIBBPF_MAP_UNSPEC;
}
@@ -1817,7 +2435,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC;
}
if (sym->st_value % 8) {
- pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value);
+ pr_warn("bad call relo offset: %zu\n",
+ (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_CALL;
@@ -1832,6 +2451,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
}
+
+ if (sym_is_extern(sym)) {
+ int sym_idx = GELF_R_SYM(rel->r_info);
+ int i, n = obj->nr_extern;
+ struct extern_desc *ext;
+
+ for (i = 0; i < n; i++) {
+ ext = &obj->externs[i];
+ if (ext->sym_idx == sym_idx)
+ break;
+ }
+ if (i >= n) {
+ pr_warn("extern relo failed to find extern for sym %d\n",
+ sym_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
+ i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+ reloc_desc->type = RELO_EXTERN;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = ext->data_off;
+ return 0;
+ }
+
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
name, shdr_idx);
@@ -1859,8 +2502,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
break;
}
if (map_idx >= nr_maps) {
- pr_warn("map relo failed to find map for sec %u, off %llu\n",
- shdr_idx, (__u64)sym->st_value);
+ pr_warn("map relo failed to find map for sec %u, off %zu\n",
+ shdr_idx, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_LD64;
@@ -1875,11 +2518,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
pr_warn("bad data relo against section %u\n", shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
- if (!obj->caps.global_data) {
- pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
- name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
- }
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
if (map->libbpf_type != type)
@@ -1941,9 +2579,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
sym.st_name) ? : "<?>";
- pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
- (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info),
- (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info),
+ pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
+ (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
+ (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
GELF_ST_BIND(sym.st_info), sym.st_name, name,
insn_idx);
@@ -2298,29 +2936,35 @@ bpf_object__reuse_map(struct bpf_map *map)
static int
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{
+ enum libbpf_map_type map_type = map->libbpf_type;
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
- __u8 *data;
- /* Nothing to do here since kernel already zero-initializes .bss map. */
- if (map->libbpf_type == LIBBPF_MAP_BSS)
+ /* kernel already zero-initializes .bss map. */
+ if (map_type == LIBBPF_MAP_BSS)
return 0;
- data = map->libbpf_type == LIBBPF_MAP_DATA ?
- obj->sections.data : obj->sections.rodata;
+ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+ if (err) {
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error setting initial map(%s) contents: %s\n",
+ map->name, cp);
+ return err;
+ }
- err = bpf_map_update_elem(map->fd, &zero, data, 0);
- /* Freeze .rodata map as read-only from syscall side. */
- if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ /* Freeze .rodata and .kconfig map as read-only from syscall side. */
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
err = bpf_map_freeze(map->fd);
if (err) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error freezing map(%s) as read-only: %s\n",
map->name, cp);
- err = 0;
+ return err;
}
}
- return err;
+ return 0;
}
static int
@@ -2411,6 +3055,7 @@ err_out:
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to create map (name: '%s'): %s(%d)\n",
map->name, cp, err);
+ pr_perm_msg(err);
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
@@ -2536,6 +3181,21 @@ static bool str_is_empty(const char *s)
return !s || !s[0];
}
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf__type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
/*
* Turn bpf_field_reloc into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
@@ -2573,6 +3233,7 @@ static int bpf_core_spec_parse(const struct btf *btf,
struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
const struct btf_type *t;
const char *name;
__u32 id;
@@ -2620,6 +3281,7 @@ static int bpf_core_spec_parse(const struct btf *btf,
return -EINVAL;
access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
if (btf_is_composite(t)) {
const struct btf_member *m;
@@ -2637,18 +3299,23 @@ static int bpf_core_spec_parse(const struct btf *btf,
if (str_is_empty(name))
return -EINVAL;
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->spec[spec->len].name = name;
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
spec->len++;
}
id = m->type;
} else if (btf_is_array(t)) {
const struct btf_array *a = btf_array(t);
+ bool flex;
t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t || access_idx >= a->nelems)
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
return -EINVAL;
spec->spec[spec->len].type_id = id;
@@ -2953,12 +3620,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
*/
if (i > 0) {
const struct btf_array *a;
+ bool flex;
if (!btf_is_array(targ_type))
return 0;
a = btf_array(targ_type);
- if (local_acc->idx >= a->nelems)
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
return 0;
if (!skip_mods_and_typedefs(targ_btf, a->type,
&targ_id))
@@ -3142,11 +3811,13 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
- if (class == BPF_ALU || class == BPF_ALU64) {
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
if (!failed && validate && insn->imm != orig_val) {
- pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n",
+ pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
bpf_program__title(prog, false), insn_idx,
insn->imm, orig_val, new_val);
return -EINVAL;
@@ -3156,7 +3827,29 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
bpf_program__title(prog, false), insn_idx,
failed ? " w/ failed reloc" : "", orig_val, new_val);
- } else {
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (!failed && validate && insn->off != orig_val) {
+ pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ insn->off, orig_val, new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ new_val);
+ return -ERANGE;
+ }
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ failed ? " w/ failed reloc" : "", orig_val, new_val);
+ break;
+ default:
pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
bpf_program__title(prog, false),
insn_idx, insn->code, insn->src_reg, insn->dst_reg,
@@ -3559,9 +4252,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
size_t new_cnt;
int err;
- if (relo->type != RELO_CALL)
- return -LIBBPF_ERRNO__RELOC;
-
if (prog->idx == obj->efile.text_shndx) {
pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
relo->insn_idx, relo->sym_off, relo->sym_off / 8);
@@ -3623,27 +4313,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
- if (relo->type == RELO_LD64 || relo->type == RELO_DATA) {
- struct bpf_insn *insn = &prog->insns[relo->insn_idx];
-
- if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
- pr_warn("relocation out of range: '%s'\n",
- prog->section_name);
- return -LIBBPF_ERRNO__RELOC;
- }
+ if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
+ pr_warn("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
- if (relo->type != RELO_DATA) {
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- } else {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[1].imm = insn[0].imm + relo->sym_off;
- }
+ switch (relo->type) {
+ case RELO_LD64:
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ break;
+ case RELO_DATA:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
- } else if (relo->type == RELO_CALL) {
+ break;
+ case RELO_EXTERN:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = relo->sym_off;
+ break;
+ case RELO_CALL:
err = bpf_program__reloc_text(prog, obj, relo);
if (err)
return err;
+ break;
+ default:
+ pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
+ return -EINVAL;
}
}
@@ -3778,6 +4478,7 @@ retry_load:
ret = -errno;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("load bpf program failed: %s\n", cp);
+ pr_perm_msg(ret);
if (log_buf && log_buf[0] != '\0') {
ret = -LIBBPF_ERRNO__VERIFY;
@@ -3807,11 +4508,22 @@ out:
return ret;
}
-int
-bpf_program__load(struct bpf_program *prog,
- char *license, __u32 kern_version)
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd);
+
+int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
- int err = 0, fd, i;
+ int err = 0, fd, i, btf_id;
+
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ btf_id = libbpf_find_attach_btf_id(prog->section_name,
+ prog->expected_attach_type,
+ prog->attach_prog_fd);
+ if (btf_id <= 0)
+ return btf_id;
+ prog->attach_btf_id = btf_id;
+ }
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
@@ -3835,7 +4547,7 @@ bpf_program__load(struct bpf_program *prog,
prog->section_name, prog->instances.nr);
}
err = load_program(prog, prog->insns, prog->insns_cnt,
- license, kern_version, &fd);
+ license, kern_ver, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
@@ -3864,9 +4576,7 @@ bpf_program__load(struct bpf_program *prog,
}
err = load_program(prog, result.new_insn_ptr,
- result.new_insn_cnt,
- license, kern_version, &fd);
-
+ result.new_insn_cnt, license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
i, prog->section_name);
@@ -3910,20 +4620,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
-static int libbpf_find_attach_btf_id(const char *name,
- enum bpf_attach_type attach_type,
- __u32 attach_prog_fd);
static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
- struct bpf_object_open_opts *opts)
+ const struct bpf_object_open_opts *opts)
{
- const char *pin_root_path;
+ const char *obj_name, *kconfig;
struct bpf_program *prog;
struct bpf_object *obj;
- const char *obj_name;
char tmp_name[64];
- bool relaxed_maps;
- __u32 attach_prog_fd;
int err;
if (elf_version(EV_CURRENT) == EV_NONE) {
@@ -3952,16 +4656,23 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
return obj;
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
- relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
- pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-
- CHECK_ERR(bpf_object__elf_init(obj), err, out);
- CHECK_ERR(bpf_object__check_endianness(obj), err, out);
- CHECK_ERR(bpf_object__probe_caps(obj), err, out);
- CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
- err, out);
- CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
+ kconfig = OPTS_GET(opts, kconfig, NULL);
+ if (kconfig) {
+ obj->kconfig = strdup(kconfig);
+ if (!obj->kconfig)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = bpf_object__elf_init(obj);
+ err = err ? : bpf_object__check_endianness(obj);
+ err = err ? : bpf_object__elf_collect(obj);
+ err = err ? : bpf_object__collect_externs(obj);
+ err = err ? : bpf_object__finalize_btf(obj);
+ err = err ? : bpf_object__init_maps(obj, opts);
+ err = err ? : bpf_object__init_prog_names(obj);
+ err = err ? : bpf_object__collect_reloc(obj);
+ if (err)
+ goto out;
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
@@ -3978,15 +4689,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
bpf_program__set_type(prog, prog_type);
bpf_program__set_expected_attach_type(prog, attach_type);
- if (prog_type == BPF_PROG_TYPE_TRACING) {
- err = libbpf_find_attach_btf_id(prog->section_name,
- attach_type,
- attach_prog_fd);
- if (err <= 0)
- goto out;
- prog->attach_btf_id = err;
- prog->attach_prog_fd = attach_prog_fd;
- }
+ if (prog_type == BPF_PROG_TYPE_TRACING)
+ prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
}
return obj;
@@ -4026,7 +4730,7 @@ struct bpf_object *bpf_object__open(const char *path)
}
struct bpf_object *
-bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
if (!path)
return ERR_PTR(-EINVAL);
@@ -4038,7 +4742,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
- struct bpf_object_open_opts *opts)
+ const struct bpf_object_open_opts *opts)
{
if (!obj_buf || obj_buf_sz == 0)
return ERR_PTR(-EINVAL);
@@ -4079,6 +4783,92 @@ int bpf_object__unload(struct bpf_object *obj)
return 0;
}
+static int bpf_object__sanitize_maps(struct bpf_object *obj)
+{
+ struct bpf_map *m;
+
+ bpf_object__for_each_map(m, obj) {
+ if (!bpf_map__is_internal(m))
+ continue;
+ if (!obj->caps.global_data) {
+ pr_warn("kernel doesn't support global data\n");
+ return -ENOTSUP;
+ }
+ if (!obj->caps.array_mmap)
+ m->def.map_flags ^= BPF_F_MMAPABLE;
+ }
+
+ return 0;
+}
+
+static int bpf_object__resolve_externs(struct bpf_object *obj,
+ const char *extra_kconfig)
+{
+ bool need_config = false;
+ struct extern_desc *ext;
+ int err, i;
+ void *data;
+
+ if (obj->nr_extern == 0)
+ return 0;
+
+ data = obj->maps[obj->kconfig_map_idx].mmaped;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ void *ext_val = data + ext->data_off;
+ __u32 kver = get_kernel_version();
+
+ if (!kver) {
+ pr_warn("failed to get kernel version\n");
+ return -EINVAL;
+ }
+ err = set_ext_value_num(ext, ext_val, kver);
+ if (err)
+ return err;
+ pr_debug("extern %s=0x%x\n", ext->name, kver);
+ } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+ need_config = true;
+ } else {
+ pr_warn("unrecognized extern '%s'\n", ext->name);
+ return -EINVAL;
+ }
+ }
+ if (need_config && extra_kconfig) {
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+ if (err)
+ return -EINVAL;
+ need_config = false;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (!ext->is_set) {
+ need_config = true;
+ break;
+ }
+ }
+ }
+ if (need_config) {
+ err = bpf_object__read_kconfig_file(obj, data);
+ if (err)
+ return -EINVAL;
+ }
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (!ext->is_set && !ext->is_weak) {
+ pr_warn("extern %s (strong) not resolved\n", ext->name);
+ return -ESRCH;
+ } else if (!ext->is_set) {
+ pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
+ ext->name);
+ }
+ }
+
+ return 0;
+}
+
int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
{
struct bpf_object *obj;
@@ -4097,9 +4887,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
obj->loaded = true;
- CHECK_ERR(bpf_object__create_maps(obj), err, out);
- CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
- CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
+ err = bpf_object__probe_caps(obj);
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
+ err = err ? : bpf_object__sanitize_maps(obj);
+ err = err ? : bpf_object__create_maps(obj);
+ err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+ err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ if (err)
+ goto out;
return 0;
out:
@@ -4670,17 +5466,26 @@ void bpf_object__close(struct bpf_object *obj)
btf_ext__free(obj->btf_ext);
for (i = 0; i < obj->nr_maps; i++) {
- zfree(&obj->maps[i].name);
- zfree(&obj->maps[i].pin_path);
- if (obj->maps[i].clear_priv)
- obj->maps[i].clear_priv(&obj->maps[i],
- obj->maps[i].priv);
- obj->maps[i].priv = NULL;
- obj->maps[i].clear_priv = NULL;
+ struct bpf_map *map = &obj->maps[i];
+
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ map->priv = NULL;
+ map->clear_priv = NULL;
+
+ if (map->mmaped) {
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
+ }
+
+ zfree(&map->name);
+ zfree(&map->pin_path);
}
- zfree(&obj->sections.rodata);
- zfree(&obj->sections.data);
+ zfree(&obj->kconfig);
+ zfree(&obj->externs);
+ obj->nr_extern = 0;
+
zfree(&obj->maps);
obj->nr_maps = 0;
@@ -4820,6 +5625,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
prog->prog_ifindex = ifindex;
}
+const char *bpf_program__name(const struct bpf_program *prog)
+{
+ return prog->name;
+}
+
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
{
const char *title;
@@ -4972,7 +5782,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
*/
#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
-static const struct {
+#define SEC_DEF(sec_pfx, ptype, ...) { \
+ .sec = sec_pfx, \
+ .len = sizeof(sec_pfx) - 1, \
+ .prog_type = BPF_PROG_TYPE_##ptype, \
+ __VA_ARGS__ \
+}
+
+struct bpf_sec_def;
+
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+
+struct bpf_sec_def {
const char *sec;
size_t len;
enum bpf_prog_type prog_type;
@@ -4980,24 +5811,40 @@ static const struct {
bool is_attachable;
bool is_attach_btf;
enum bpf_attach_type attach_type;
-} section_names[] = {
+ attach_fn_t attach_fn;
+};
+
+static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
- BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
+ SEC_DEF("kprobe/", KPROBE,
+ .attach_fn = attach_kprobe),
BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
- BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ SEC_DEF("kretprobe/", KPROBE,
+ .attach_fn = attach_kprobe),
BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
- BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
- BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
- BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
- BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
- BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_RAW_TP),
- BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_FENTRY),
- BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_FEXIT),
+ SEC_DEF("tracepoint/", TRACEPOINT,
+ .attach_fn = attach_tp),
+ SEC_DEF("tp/", TRACEPOINT,
+ .attach_fn = attach_tp),
+ SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
+ .attach_fn = attach_raw_tp),
+ SEC_DEF("raw_tp/", RAW_TRACEPOINT,
+ .attach_fn = attach_raw_tp),
+ SEC_DEF("tp_btf/", TRACING,
+ .expected_attach_type = BPF_TRACE_RAW_TP,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fentry/", TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fexit/", TRACING,
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -5059,12 +5906,26 @@ static const struct {
#undef BPF_APROG_SEC
#undef BPF_EAPROG_SEC
#undef BPF_APROG_COMPAT
+#undef SEC_DEF
#define MAX_TYPE_NAME_SIZE 32
+static const struct bpf_sec_def *find_sec_def(const char *sec_name)
+{
+ int i, n = ARRAY_SIZE(section_defs);
+
+ for (i = 0; i < n; i++) {
+ if (strncmp(sec_name,
+ section_defs[i].sec, section_defs[i].len))
+ continue;
+ return &section_defs[i];
+ }
+ return NULL;
+}
+
static char *libbpf_get_type_names(bool attach_type)
{
- int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE;
+ int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
char *buf;
buf = malloc(len);
@@ -5073,16 +5934,16 @@ static char *libbpf_get_type_names(bool attach_type)
buf[0] = '\0';
/* Forge string buf with all available names */
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (attach_type && !section_names[i].is_attachable)
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (attach_type && !section_defs[i].is_attachable)
continue;
- if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) {
+ if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
free(buf);
return NULL;
}
strcat(buf, " ");
- strcat(buf, section_names[i].sec);
+ strcat(buf, section_defs[i].sec);
}
return buf;
@@ -5091,23 +5952,23 @@ static char *libbpf_get_type_names(bool attach_type)
int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type)
{
+ const struct bpf_sec_def *sec_def;
char *type_names;
- int i;
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (strncmp(name, section_names[i].sec, section_names[i].len))
- continue;
- *prog_type = section_names[i].prog_type;
- *expected_attach_type = section_names[i].expected_attach_type;
+ sec_def = find_sec_def(name);
+ if (sec_def) {
+ *prog_type = sec_def->prog_type;
+ *expected_attach_type = sec_def->expected_attach_type;
return 0;
}
- pr_warn("failed to guess program type from ELF section '%s'\n", name);
+
+ pr_debug("failed to guess program type from ELF section '%s'\n", name);
type_names = libbpf_get_type_names(false);
if (type_names != NULL) {
- pr_info("supported section(type) names are:%s\n", type_names);
+ pr_debug("supported section(type) names are:%s\n", type_names);
free(type_names);
}
@@ -5186,16 +6047,16 @@ static int libbpf_find_attach_btf_id(const char *name,
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (!section_names[i].is_attach_btf)
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (!section_defs[i].is_attach_btf)
continue;
- if (strncmp(name, section_names[i].sec, section_names[i].len))
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
if (attach_prog_fd)
- err = libbpf_find_prog_btf_id(name + section_names[i].len,
+ err = libbpf_find_prog_btf_id(name + section_defs[i].len,
attach_prog_fd);
else
- err = libbpf_find_vmlinux_btf_id(name + section_names[i].len,
+ err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len,
attach_type);
if (err <= 0)
pr_warn("%s is not found in vmlinux BTF\n", name);
@@ -5214,18 +6075,18 @@ int libbpf_attach_type_by_name(const char *name,
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (strncmp(name, section_names[i].sec, section_names[i].len))
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
- if (!section_names[i].is_attachable)
+ if (!section_defs[i].is_attachable)
return -EINVAL;
- *attach_type = section_names[i].attach_type;
+ *attach_type = section_defs[i].attach_type;
return 0;
}
- pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);
+ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
type_names = libbpf_get_type_names(true);
if (type_names != NULL) {
- pr_info("attachable section(type) names are:%s\n", type_names);
+ pr_debug("attachable section(type) names are:%s\n", type_names);
free(type_names);
}
@@ -5466,17 +6327,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
}
struct bpf_link {
+ int (*detach)(struct bpf_link *link);
int (*destroy)(struct bpf_link *link);
+ bool disconnected;
};
+/* Release "ownership" of underlying BPF resource (typically, BPF program
+ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
+ * link, when destructed through bpf_link__destroy() call won't attempt to
+ * detach/unregisted that BPF resource. This is useful in situations where,
+ * say, attached BPF program has to outlive userspace program that attached it
+ * in the system. Depending on type of BPF program, though, there might be
+ * additional steps (like pinning BPF program in BPF FS) necessary to ensure
+ * exit of userspace program doesn't trigger automatic detachment and clean up
+ * inside the kernel.
+ */
+void bpf_link__disconnect(struct bpf_link *link)
+{
+ link->disconnected = true;
+}
+
int bpf_link__destroy(struct bpf_link *link)
{
- int err;
+ int err = 0;
if (!link)
return 0;
- err = link->destroy(link);
+ if (!link->disconnected && link->detach)
+ err = link->detach(link);
+ if (link->destroy)
+ link->destroy(link);
free(link);
return err;
@@ -5487,7 +6368,7 @@ struct bpf_link_fd {
int fd; /* hook FD */
};
-static int bpf_link__destroy_perf_event(struct bpf_link *link)
+static int bpf_link__detach_perf_event(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
int err;
@@ -5519,10 +6400,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_perf_event;
+ link->link.detach = &bpf_link__detach_perf_event;
link->fd = pfd;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@@ -5679,6 +6560,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
return link;
}
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ const char *func_name;
+ bool retprobe;
+
+ func_name = bpf_program__title(prog, false) + sec->len;
+ retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+
+ return bpf_program__attach_kprobe(prog, retprobe, func_name);
+}
+
struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
bool retprobe, pid_t pid,
const char *binary_path,
@@ -5791,7 +6684,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
return link;
}
-static int bpf_link__destroy_fd(struct bpf_link *link)
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ char *sec_name, *tp_cat, *tp_name;
+ struct bpf_link *link;
+
+ sec_name = strdup(bpf_program__title(prog, false));
+ if (!sec_name)
+ return ERR_PTR(-ENOMEM);
+
+ /* extract "tp/<category>/<name>" */
+ tp_cat = sec_name + sec->len;
+ tp_name = strchr(tp_cat, '/');
+ if (!tp_name) {
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ *tp_name = '\0';
+ tp_name++;
+
+ link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
+out:
+ free(sec_name);
+ return link;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
@@ -5812,10 +6731,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_fd;
+ link->link.detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
if (pfd < 0) {
@@ -5830,6 +6749,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return (struct bpf_link *)link;
}
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ const char *tp_name = bpf_program__title(prog, false) + sec->len;
+
+ return bpf_program__attach_raw_tracepoint(prog, tp_name);
+}
+
struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
{
char errmsg[STRERR_BUFSIZE];
@@ -5843,10 +6770,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_fd;
+ link->link.detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
if (pfd < 0) {
@@ -5861,6 +6788,23 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return (struct bpf_link *)link;
}
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_trace(prog);
+}
+
+struct bpf_link *bpf_program__attach(struct bpf_program *prog)
+{
+ const struct bpf_sec_def *sec_def;
+
+ sec_def = find_sec_def(bpf_program__title(prog, false));
+ if (!sec_def || !sec_def->attach_fn)
+ return ERR_PTR(-ESRCH);
+
+ return sec_def->attach_fn(sec_def, prog);
+}
+
enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
@@ -5944,7 +6888,7 @@ struct perf_buffer {
size_t mmap_size;
struct perf_cpu_buf **cpu_bufs;
struct epoll_event *events;
- int cpu_cnt;
+ int cpu_cnt; /* number of allocated CPU buffers */
int epoll_fd; /* perf event FD */
int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
};
@@ -6078,11 +7022,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
+ const char *online_cpus_file = "/sys/devices/system/cpu/online";
struct bpf_map_info map = {};
char msg[STRERR_BUFSIZE];
struct perf_buffer *pb;
+ bool *online = NULL;
__u32 map_info_len;
- int err, i;
+ int err, i, j, n;
if (page_cnt & (page_cnt - 1)) {
pr_warn("page count should be power of two, but is %zu\n",
@@ -6151,20 +7097,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
goto error;
}
- for (i = 0; i < pb->cpu_cnt; i++) {
+ err = parse_cpu_mask_file(online_cpus_file, &online, &n);
+ if (err) {
+ pr_warn("failed to get online CPU mask: %d\n", err);
+ goto error;
+ }
+
+ for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
struct perf_cpu_buf *cpu_buf;
int cpu, map_key;
cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+ /* in case user didn't explicitly requested particular CPUs to
+ * be attached to, skip offline/not present CPUs
+ */
+ if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
+ continue;
+
cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
if (IS_ERR(cpu_buf)) {
err = PTR_ERR(cpu_buf);
goto error;
}
- pb->cpu_bufs[i] = cpu_buf;
+ pb->cpu_bufs[j] = cpu_buf;
err = bpf_map_update_elem(pb->map_fd, &map_key,
&cpu_buf->fd, 0);
@@ -6176,21 +7134,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
goto error;
}
- pb->events[i].events = EPOLLIN;
- pb->events[i].data.ptr = cpu_buf;
+ pb->events[j].events = EPOLLIN;
+ pb->events[j].data.ptr = cpu_buf;
if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
- &pb->events[i]) < 0) {
+ &pb->events[j]) < 0) {
err = -errno;
pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
cpu, cpu_buf->fd,
libbpf_strerror_r(err, msg, sizeof(msg)));
goto error;
}
+ j++;
}
+ pb->cpu_cnt = j;
+ free(online);
return pb;
error:
+ free(online);
if (pb)
perf_buffer__free(pb);
return ERR_PTR(err);
@@ -6521,62 +7483,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
}
}
-int libbpf_num_possible_cpus(void)
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
- static const char *fcpu = "/sys/devices/system/cpu/possible";
- int len = 0, n = 0, il = 0, ir = 0;
- unsigned int start = 0, end = 0;
- int tmp_cpus = 0;
- static int cpus;
- char buf[128];
- int error = 0;
- int fd = -1;
+ int err = 0, n, len, start, end = -1;
+ bool *tmp;
- tmp_cpus = READ_ONCE(cpus);
- if (tmp_cpus > 0)
- return tmp_cpus;
+ *mask = NULL;
+ *mask_sz = 0;
+
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+ while (*s) {
+ if (*s == ',' || *s == '\n') {
+ s++;
+ continue;
+ }
+ n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
+ if (n <= 0 || n > 2) {
+ pr_warn("Failed to get CPU range %s: %d\n", s, n);
+ err = -EINVAL;
+ goto cleanup;
+ } else if (n == 1) {
+ end = start;
+ }
+ if (start < 0 || start > end) {
+ pr_warn("Invalid CPU range [%d,%d] in %s\n",
+ start, end, s);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ tmp = realloc(*mask, end + 1);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ *mask = tmp;
+ memset(tmp + *mask_sz, 0, start - *mask_sz);
+ memset(tmp + start, 1, end - start + 1);
+ *mask_sz = end + 1;
+ s += len;
+ }
+ if (!*mask_sz) {
+ pr_warn("Empty CPU range\n");
+ return -EINVAL;
+ }
+ return 0;
+cleanup:
+ free(*mask);
+ *mask = NULL;
+ return err;
+}
+
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
+{
+ int fd, err = 0, len;
+ char buf[128];
fd = open(fcpu, O_RDONLY);
if (fd < 0) {
- error = errno;
- pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));
- return -error;
+ err = -errno;
+ pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
+ return err;
}
len = read(fd, buf, sizeof(buf));
close(fd);
if (len <= 0) {
- error = len ? errno : EINVAL;
- pr_warn("Failed to read # of possible cpus from %s: %s\n",
- fcpu, strerror(error));
- return -error;
+ err = len ? -errno : -EINVAL;
+ pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
+ return err;
}
- if (len == sizeof(buf)) {
- pr_warn("File %s size overflow\n", fcpu);
- return -EOVERFLOW;
+ if (len >= sizeof(buf)) {
+ pr_warn("CPU mask is too big in file %s\n", fcpu);
+ return -E2BIG;
}
buf[len] = '\0';
- for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
- /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
- if (buf[ir] == ',' || buf[ir] == '\0') {
- buf[ir] = '\0';
- n = sscanf(&buf[il], "%u-%u", &start, &end);
- if (n <= 0) {
- pr_warn("Failed to get # CPUs from %s\n",
- &buf[il]);
- return -EINVAL;
- } else if (n == 1) {
- end = start;
- }
- tmp_cpus += end - start + 1;
- il = ir + 1;
- }
- }
- if (tmp_cpus <= 0) {
- pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
- return -EINVAL;
+ return parse_cpu_mask_str(buf, mask, mask_sz);
+}
+
+int libbpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ static int cpus;
+ int err, n, i, tmp_cpus;
+ bool *mask;
+
+ tmp_cpus = READ_ONCE(cpus);
+ if (tmp_cpus > 0)
+ return tmp_cpus;
+
+ err = parse_cpu_mask_file(fcpu, &mask, &n);
+ if (err)
+ return err;
+
+ tmp_cpus = 0;
+ for (i = 0; i < n; i++) {
+ if (mask[i])
+ tmp_cpus++;
}
+ free(mask);
WRITE_ONCE(cpus, tmp_cpus);
return tmp_cpus;
}
+
+int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
+ const struct bpf_object_open_opts *opts)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
+ .object_name = s->name,
+ );
+ struct bpf_object *obj;
+ int i;
+
+ /* Attempt to preserve opts->object_name, unless overriden by user
+ * explicitly. Overwriting object name for skeletons is discouraged,
+ * as it breaks global data maps, because they contain object name
+ * prefix as their own map name prefix. When skeleton is generated,
+ * bpftool is making an assumption that this name will stay the same.
+ */
+ if (opts) {
+ memcpy(&skel_opts, opts, sizeof(*opts));
+ if (!opts->object_name)
+ skel_opts.object_name = s->name;
+ }
+
+ obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
+ if (IS_ERR(obj)) {
+ pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
+ s->name, PTR_ERR(obj));
+ return PTR_ERR(obj);
+ }
+
+ *s->obj = obj;
+
+ for (i = 0; i < s->map_cnt; i++) {
+ struct bpf_map **map = s->maps[i].map;
+ const char *name = s->maps[i].name;
+ void **mmaped = s->maps[i].mmaped;
+
+ *map = bpf_object__find_map_by_name(obj, name);
+ if (!*map) {
+ pr_warn("failed to find skeleton map '%s'\n", name);
+ return -ESRCH;
+ }
+
+ /* externs shouldn't be pre-setup from user code */
+ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
+ *mmaped = (*map)->mmaped;
+ }
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_program **prog = s->progs[i].prog;
+ const char *name = s->progs[i].name;
+
+ *prog = bpf_object__find_program_by_name(obj, name);
+ if (!*prog) {
+ pr_warn("failed to find skeleton program '%s'\n", name);
+ return -ESRCH;
+ }
+ }
+
+ return 0;
+}
+
+int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
+{
+ int i, err;
+
+ err = bpf_object__load(*s->obj);
+ if (err) {
+ pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
+ return err;
+ }
+
+ for (i = 0; i < s->map_cnt; i++) {
+ struct bpf_map *map = *s->maps[i].map;
+ size_t mmap_sz = bpf_map_mmap_sz(map);
+ int prot, map_fd = bpf_map__fd(map);
+ void **mmaped = s->maps[i].mmaped;
+
+ if (!mmaped)
+ continue;
+
+ if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
+ *mmaped = NULL;
+ continue;
+ }
+
+ if (map->def.map_flags & BPF_F_RDONLY_PROG)
+ prot = PROT_READ;
+ else
+ prot = PROT_READ | PROT_WRITE;
+
+ /* Remap anonymous mmap()-ed "map initialization image" as
+ * a BPF map-backed mmap()-ed memory, but preserving the same
+ * memory address. This will cause kernel to change process'
+ * page table to point to a different piece of kernel memory,
+ * but from userspace point of view memory address (and its
+ * contents, being identical at this point) will stay the
+ * same. This mapping will be released by bpf_object__close()
+ * as per normal clean up procedure, so we don't need to worry
+ * about it from skeleton's clean up perspective.
+ */
+ *mmaped = mmap(map->mmaped, mmap_sz, prot,
+ MAP_SHARED | MAP_FIXED, map_fd, 0);
+ if (*mmaped == MAP_FAILED) {
+ err = -errno;
+ *mmaped = NULL;
+ pr_warn("failed to re-mmap() map '%s': %d\n",
+ bpf_map__name(map), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
+{
+ int i;
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_program *prog = *s->progs[i].prog;
+ struct bpf_link **link = s->progs[i].link;
+ const struct bpf_sec_def *sec_def;
+ const char *sec_name = bpf_program__title(prog, false);
+
+ sec_def = find_sec_def(sec_name);
+ if (!sec_def || !sec_def->attach_fn)
+ continue;
+
+ *link = sec_def->attach_fn(sec_def, prog);
+ if (IS_ERR(*link)) {
+ pr_warn("failed to auto-attach program '%s': %ld\n",
+ bpf_program__name(prog), PTR_ERR(*link));
+ return PTR_ERR(*link);
+ }
+ }
+
+ return 0;
+}
+
+void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
+{
+ int i;
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_link **link = s->progs[i].link;
+
+ if (!IS_ERR_OR_NULL(*link))
+ bpf_link__destroy(*link);
+ *link = NULL;
+ }
+}
+
+void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
+{
+ if (s->progs)
+ bpf_object__detach_skeleton(s);
+ if (s->obj)
+ bpf_object__close(*s->obj);
+ free(s->maps);
+ free(s->progs);
+ free(s);
+}