aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/btf.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/btf.c')
-rw-r--r--kernel/bpf/btf.c2147
1 files changed, 1893 insertions, 254 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9bdb03767db5..35c07afac924 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
@@ -25,6 +25,7 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <net/sock.h>
+#include "../tools/lib/bpf/relo_core.h"
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program/map. Hence, it basically focus
@@ -197,6 +198,29 @@
DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
+enum btf_kfunc_hook {
+ BTF_KFUNC_HOOK_XDP,
+ BTF_KFUNC_HOOK_TC,
+ BTF_KFUNC_HOOK_STRUCT_OPS,
+ BTF_KFUNC_HOOK_TRACING,
+ BTF_KFUNC_HOOK_SYSCALL,
+ BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+ BTF_KFUNC_SET_MAX_CNT = 256,
+ BTF_DTOR_KFUNC_MAX_CNT = 256,
+};
+
+struct btf_kfunc_set_tab {
+ struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
+};
+
+struct btf_id_dtor_kfunc_tab {
+ u32 cnt;
+ struct btf_id_dtor_kfunc dtors[];
+};
+
struct btf {
void *data;
struct btf_type **types;
@@ -211,6 +235,8 @@ struct btf {
refcount_t refcnt;
u32 id;
struct rcu_head rcu;
+ struct btf_kfunc_set_tab *kfunc_set_tab;
+ struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
/* split BTF support */
struct btf *base_btf;
@@ -282,6 +308,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
[BTF_KIND_DECL_TAG] = "DECL_TAG",
+ [BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
const char *btf_type_str(const struct btf_type *t)
@@ -401,6 +429,9 @@ static struct btf_type btf_void;
static int btf_resolve(struct btf_verifier_env *env,
const struct btf_type *t, u32 type_id);
+static int btf_func_check(struct btf_verifier_env *env,
+ const struct btf_type *t);
+
static bool btf_type_is_modifier(const struct btf_type *t)
{
/* Some of them is not strictly a C modifier
@@ -418,6 +449,7 @@ static bool btf_type_is_modifier(const struct btf_type *t)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
return true;
}
@@ -503,6 +535,50 @@ s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
return -ENOENT;
}
+static s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
+{
+ struct btf *btf;
+ s32 ret;
+ int id;
+
+ btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+ if (!btf)
+ return -EINVAL;
+
+ ret = btf_find_by_name_kind(btf, name, kind);
+ /* ret is never zero, since btf_find_by_name_kind returns
+ * positive btf_id or negative error.
+ */
+ if (ret > 0) {
+ btf_get(btf);
+ *btf_p = btf;
+ return ret;
+ }
+
+ /* If name is not found in vmlinux's BTF then search in module's BTFs */
+ spin_lock_bh(&btf_idr_lock);
+ idr_for_each_entry(&btf_idr, btf, id) {
+ if (!btf_is_module(btf))
+ continue;
+ /* linear search could be slow hence unlock/lock
+ * the IDR to avoiding holding it for too long
+ */
+ btf_get(btf);
+ spin_unlock_bh(&btf_idr_lock);
+ ret = btf_find_by_name_kind(btf, name, kind);
+ if (ret > 0) {
+ *btf_p = btf;
+ return ret;
+ }
+ spin_lock_bh(&btf_idr_lock);
+ btf_put(btf);
+ }
+ spin_unlock_bh(&btf_idr_lock);
+ return ret;
+}
+
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id)
{
@@ -576,6 +652,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
+ btf_type_is_func(t) ||
btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
@@ -590,6 +667,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
return true;
}
@@ -635,6 +713,11 @@ static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
return (const struct btf_decl_tag *)(t + 1);
}
+static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t)
+{
+ return (const struct btf_enum64 *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -735,6 +818,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return NULL;
return btf->types[type_id];
}
+EXPORT_SYMBOL_GPL(btf_type_by_id);
/*
* Regular int is not a bit field and it must be either
@@ -834,7 +918,7 @@ static const char *btf_show_name(struct btf_show *show)
const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)];
const char *name = NULL, *prefix = "", *parens = "";
const struct btf_member *m = show->state.member;
- const struct btf_type *t = show->state.type;
+ const struct btf_type *t;
const struct btf_array *array;
u32 id = show->state.type_id;
const char *member = NULL;
@@ -943,6 +1027,7 @@ static const char *btf_show_name(struct btf_show *show)
parens = "{";
break;
case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
prefix = "enum";
break;
default:
@@ -1032,7 +1117,8 @@ __printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
*/
#define btf_show_type_value(show, fmt, value) \
do { \
- if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) || \
+ if ((value) != (__typeof__(value))0 || \
+ (show->flags & BTF_SHOW_ZERO) || \
show->state.depth == 0) { \
btf_show(show, "%s%s" fmt "%s%s", \
btf_show_indent(show), \
@@ -1311,7 +1397,6 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
const char *fmt, ...)
{
struct bpf_verifier_log *log = &env->log;
- u8 kind = BTF_INFO_KIND(t->info);
struct btf *btf = env->btf;
va_list args;
@@ -1327,7 +1412,7 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
__btf_verifier_log(log, "[%u] %s %s%s",
env->log_type_id,
- btf_kind_str[kind],
+ btf_type_str(t),
__btf_name_by_offset(btf, t->name_off),
log_details ? " " : "");
@@ -1528,8 +1613,39 @@ static void btf_free_id(struct btf *btf)
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+ struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+ int hook;
+
+ if (!tab)
+ return;
+ /* For module BTF, we directly assign the sets being registered, so
+ * there is nothing to free except kfunc_set_tab.
+ */
+ if (btf_is_module(btf))
+ goto free_tab;
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++)
+ kfree(tab->sets[hook]);
+free_tab:
+ kfree(tab);
+ btf->kfunc_set_tab = NULL;
+}
+
+static void btf_free_dtor_kfunc_tab(struct btf *btf)
+{
+ struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
+
+ if (!tab)
+ return;
+ kfree(tab);
+ btf->dtor_kfunc_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
+ btf_free_dtor_kfunc_tab(btf);
+ btf_free_kfunc_set_tab(btf);
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
@@ -1725,6 +1841,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM64:
size = type->size;
goto resolved;
@@ -1737,6 +1854,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
id = type->type;
type = btf_type_by_id(btf, type->type);
break;
@@ -2345,6 +2463,8 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
{
+ const char *value;
+
if (btf_type_vlen(t)) {
btf_verifier_log_type(env, t, "vlen != 0");
return -EINVAL;
@@ -2360,7 +2480,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- /* typedef type must have a valid name, and other ref types,
+ /* typedef/type_tag type must have a valid name, and other ref types,
* volatile, const, restrict, should have a null name.
*/
if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) {
@@ -2369,6 +2489,12 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
+ } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) {
+ value = btf_name_by_offset(env->btf, t->name_off);
+ if (!value || !value[0]) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
} else {
if (t->name_off) {
btf_verifier_log_type(env, t, "Invalid name");
@@ -2493,7 +2619,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
*
* We now need to continue from the last-resolved-ptr to
* ensure the last-resolved-ptr will not referring back to
- * the currenct ptr (t).
+ * the current ptr (t).
*/
if (btf_type_is_modifier(next_type)) {
const struct btf_type *resolved_type;
@@ -2958,7 +3084,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- offset = btf_member_bit_offset(t, member);
+ offset = __btf_member_bit_offset(t, member);
if (is_union && offset) {
btf_verifier_log_member(env, t, member,
"Invalid member bits_offset");
@@ -3002,7 +3128,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
if (v->next_member) {
const struct btf_type *last_member_type;
const struct btf_member *last_member;
- u16 last_member_type_id;
+ u32 last_member_type_id;
last_member = btf_type_member(v->t) + v->next_member - 1;
last_member_type_id = last_member->type;
@@ -3065,71 +3191,202 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
+enum btf_field_type {
+ BTF_FIELD_SPIN_LOCK,
+ BTF_FIELD_TIMER,
+ BTF_FIELD_KPTR,
+};
+
+enum {
+ BTF_FIELD_IGNORE = 0,
+ BTF_FIELD_FOUND = 1,
+};
+
+struct btf_field_info {
+ u32 type_id;
+ u32 off;
+ enum bpf_kptr_type type;
+};
+
+static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
+ u32 off, int sz, struct btf_field_info *info)
+{
+ if (!__btf_type_is_struct(t))
+ return BTF_FIELD_IGNORE;
+ if (t->size != sz)
+ return BTF_FIELD_IGNORE;
+ info->off = off;
+ return BTF_FIELD_FOUND;
+}
+
+static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
+ u32 off, int sz, struct btf_field_info *info)
+{
+ enum bpf_kptr_type type;
+ u32 res_id;
+
+ /* For PTR, sz is always == 8 */
+ if (!btf_type_is_ptr(t))
+ return BTF_FIELD_IGNORE;
+ t = btf_type_by_id(btf, t->type);
+
+ if (!btf_type_is_type_tag(t))
+ return BTF_FIELD_IGNORE;
+ /* Reject extra tags */
+ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
+ return -EINVAL;
+ if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_KPTR_UNREF;
+ else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_KPTR_REF;
+ else
+ return -EINVAL;
+
+ /* Get the base type */
+ t = btf_type_skip_modifiers(btf, t->type, &res_id);
+ /* Only pointer to struct is allowed */
+ if (!__btf_type_is_struct(t))
+ return -EINVAL;
+
+ info->type_id = res_id;
+ info->off = off;
+ info->type = type;
+ return BTF_FIELD_FOUND;
+}
+
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ const char *name, int sz, int align,
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
const struct btf_member *member;
- u32 i, off = -ENOENT;
+ struct btf_field_info tmp;
+ int ret, idx = 0;
+ u32 i, off;
for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
- if (!__btf_type_is_struct(member_type))
- continue;
- if (member_type->size != sz)
- continue;
- if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
+
+ if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
continue;
- if (off != -ENOENT)
- /* only one such field is allowed */
- return -E2BIG;
- off = btf_member_bit_offset(t, member);
+
+ off = __btf_member_bit_offset(t, member);
if (off % 8)
/* valid C code cannot generate such BTF */
return -EINVAL;
off /= 8;
if (off % align)
return -EINVAL;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ case BTF_FIELD_TIMER:
+ ret = btf_find_struct(btf, member_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BTF_FIELD_KPTR:
+ ret = btf_find_kptr(btf, member_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ if (ret == BTF_FIELD_IGNORE)
+ continue;
+ if (idx >= info_cnt)
+ return -E2BIG;
+ ++idx;
}
- return off;
+ return idx;
}
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ const char *name, int sz, int align,
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
const struct btf_var_secinfo *vsi;
- u32 i, off = -ENOENT;
+ struct btf_field_info tmp;
+ int ret, idx = 0;
+ u32 i, off;
for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
- if (!__btf_type_is_struct(var_type))
- continue;
- if (var_type->size != sz)
+ off = vsi->offset;
+
+ if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
continue;
if (vsi->size != sz)
continue;
- if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
- continue;
- if (off != -ENOENT)
- /* only one such field is allowed */
- return -E2BIG;
- off = vsi->offset;
if (off % align)
return -EINVAL;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ case BTF_FIELD_TIMER:
+ ret = btf_find_struct(btf, var_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BTF_FIELD_KPTR:
+ ret = btf_find_kptr(btf, var_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ if (ret == BTF_FIELD_IGNORE)
+ continue;
+ if (idx >= info_cnt)
+ return -E2BIG;
+ ++idx;
}
- return off;
+ return idx;
}
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
+ const char *name;
+ int sz, align;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ name = "bpf_spin_lock";
+ sz = sizeof(struct bpf_spin_lock);
+ align = __alignof__(struct bpf_spin_lock);
+ break;
+ case BTF_FIELD_TIMER:
+ name = "bpf_timer";
+ sz = sizeof(struct bpf_timer);
+ align = __alignof__(struct bpf_timer);
+ break;
+ case BTF_FIELD_KPTR:
+ name = NULL;
+ sz = sizeof(u64);
+ align = 8;
+ break;
+ default:
+ return -EFAULT;
+ }
if (__btf_type_is_struct(t))
- return btf_find_struct_field(btf, t, name, sz, align);
+ return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt);
else if (btf_type_is_datasec(t))
- return btf_find_datasec_var(btf, t, name, sz, align);
+ return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt);
return -EINVAL;
}
@@ -3139,16 +3396,130 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
*/
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
{
- return btf_find_field(btf, t, "bpf_spin_lock",
- sizeof(struct bpf_spin_lock),
- __alignof__(struct bpf_spin_lock));
+ struct btf_field_info info;
+ int ret;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENOENT;
+ return info.off;
}
int btf_find_timer(const struct btf *btf, const struct btf_type *t)
{
- return btf_find_field(btf, t, "bpf_timer",
- sizeof(struct bpf_timer),
- __alignof__(struct bpf_timer));
+ struct btf_field_info info;
+ int ret;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENOENT;
+ return info.off;
+}
+
+struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
+ const struct btf_type *t)
+{
+ struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX];
+ struct bpf_map_value_off *tab;
+ struct btf *kernel_btf = NULL;
+ struct module *mod = NULL;
+ int ret, i, nr_off;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (!ret)
+ return NULL;
+
+ nr_off = ret;
+ tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < nr_off; i++) {
+ const struct btf_type *t;
+ s32 id;
+
+ /* Find type in map BTF, and use it to look up the matching type
+ * in vmlinux or module BTFs, by name and kind.
+ */
+ t = btf_type_by_id(btf, info_arr[i].type_id);
+ id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
+ &kernel_btf);
+ if (id < 0) {
+ ret = id;
+ goto end;
+ }
+
+ /* Find and stash the function pointer for the destruction function that
+ * needs to be eventually invoked from the map free path.
+ */
+ if (info_arr[i].type == BPF_KPTR_REF) {
+ const struct btf_type *dtor_func;
+ const char *dtor_func_name;
+ unsigned long addr;
+ s32 dtor_btf_id;
+
+ /* This call also serves as a whitelist of allowed objects that
+ * can be used as a referenced pointer and be stored in a map at
+ * the same time.
+ */
+ dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
+ if (dtor_btf_id < 0) {
+ ret = dtor_btf_id;
+ goto end_btf;
+ }
+
+ dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
+ if (!dtor_func) {
+ ret = -ENOENT;
+ goto end_btf;
+ }
+
+ if (btf_is_module(kernel_btf)) {
+ mod = btf_try_get_module(kernel_btf);
+ if (!mod) {
+ ret = -ENXIO;
+ goto end_btf;
+ }
+ }
+
+ /* We already verified dtor_func to be btf_type_is_func
+ * in register_btf_id_dtor_kfuncs.
+ */
+ dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
+ addr = kallsyms_lookup_name(dtor_func_name);
+ if (!addr) {
+ ret = -EINVAL;
+ goto end_mod;
+ }
+ tab->off[i].kptr.dtor = (void *)addr;
+ }
+
+ tab->off[i].offset = info_arr[i].off;
+ tab->off[i].type = info_arr[i].type;
+ tab->off[i].kptr.btf_id = id;
+ tab->off[i].kptr.btf = kernel_btf;
+ tab->off[i].kptr.module = mod;
+ }
+ tab->nr_off = nr_off;
+ return tab;
+end_mod:
+ module_put(mod);
+end_btf:
+ btf_put(kernel_btf);
+end:
+ while (i--) {
+ btf_put(tab->off[i].kptr.btf);
+ if (tab->off[i].kptr.module)
+ module_put(tab->off[i].kptr.module);
+ }
+ kfree(tab);
+ return ERR_PTR(ret);
}
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
@@ -3173,8 +3544,8 @@ static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
btf_show_start_member(show, member);
- member_offset = btf_member_bit_offset(t, member);
- bitfield_size = btf_member_bitfield_size(t, member);
+ member_offset = __btf_member_bit_offset(t, member);
+ bitfield_size = __btf_member_bitfield_size(t, member);
bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
if (bitfield_size) {
@@ -3307,6 +3678,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
{
const struct btf_enum *enums = btf_type_enum(t);
struct btf *btf = env->btf;
+ const char *fmt_str;
u16 i, nr_enums;
u32 meta_needed;
@@ -3320,11 +3692,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
if (t->size > 8 || !is_power_of_2(t->size)) {
btf_verifier_log_type(env, t, "Unexpected size");
return -EINVAL;
@@ -3355,7 +3722,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
if (env->log.level == BPF_LOG_KERNEL)
continue;
- btf_verifier_log(env, "\t%s val=%d\n",
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n";
+ btf_verifier_log(env, fmt_str,
__btf_name_by_offset(btf, enums[i].name_off),
enums[i].val);
}
@@ -3396,7 +3764,10 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
return;
}
- btf_show_type_value(show, "%d", v);
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%d", v);
+ else
+ btf_show_type_value(show, "%u", v);
btf_show_end_type(show);
}
@@ -3409,6 +3780,109 @@ static struct btf_kind_operations enum_ops = {
.show = btf_enum_show,
};
+static s32 btf_enum64_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ struct btf *btf = env->btf;
+ const char *fmt_str;
+ u16 i, nr_enums;
+ u32 meta_needed;
+
+ nr_enums = btf_type_vlen(t);
+ meta_needed = nr_enums * sizeof(*enums);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (t->size > 8 || !is_power_of_2(t->size)) {
+ btf_verifier_log_type(env, t, "Unexpected size");
+ return -EINVAL;
+ }
+
+ /* enum type either no name or a valid one */
+ if (t->name_off &&
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for (i = 0; i < nr_enums; i++) {
+ if (!btf_name_offset_valid(btf, enums[i].name_off)) {
+ btf_verifier_log(env, "\tInvalid name_offset:%u",
+ enums[i].name_off);
+ return -EINVAL;
+ }
+
+ /* enum member must have a valid name */
+ if (!enums[i].name_off ||
+ !btf_name_valid_identifier(btf, enums[i].name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ if (env->log.level == BPF_LOG_KERNEL)
+ continue;
+
+ fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n";
+ btf_verifier_log(env, fmt_str,
+ __btf_name_by_offset(btf, enums[i].name_off),
+ btf_enum64_value(enums + i));
+ }
+
+ return meta_needed;
+}
+
+static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct btf_show *show)
+{
+ const struct btf_enum64 *enums = btf_type_enum64(t);
+ u32 i, nr_enums = btf_type_vlen(t);
+ void *safe_data;
+ s64 v;
+
+ safe_data = btf_show_start_type(show, t, type_id, data);
+ if (!safe_data)
+ return;
+
+ v = *(u64 *)safe_data;
+
+ for (i = 0; i < nr_enums; i++) {
+ if (v != btf_enum64_value(enums + i))
+ continue;
+
+ btf_show_type_value(show, "%s",
+ __btf_name_by_offset(btf,
+ enums[i].name_off));
+
+ btf_show_end_type(show);
+ return;
+ }
+
+ if (btf_type_kflag(t))
+ btf_show_type_value(show, "%lld", v);
+ else
+ btf_show_type_value(show, "%llu", v);
+ btf_show_end_type(show);
+}
+
+static struct btf_kind_operations enum64_ops = {
+ .check_meta = btf_enum64_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_enum_check_member,
+ .check_kflag_member = btf_enum_check_kflag_member,
+ .log_details = btf_enum_log,
+ .show = btf_enum64_show,
+};
+
static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
@@ -3521,9 +3995,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env,
return 0;
}
+static int btf_func_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ int err;
+
+ err = btf_func_check(env, t);
+ if (err)
+ return err;
+
+ env_stack_pop_resolved(env, next_type_id, 0);
+ return 0;
+}
+
static struct btf_kind_operations func_ops = {
.check_meta = btf_func_check_meta,
- .resolve = btf_df_resolve,
+ .resolve = btf_func_resolve,
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_ref_type_log,
@@ -3947,6 +4436,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
return -EINVAL;
}
+ if (btf_type_is_resolve_source_only(ret_type)) {
+ btf_verifier_log_type(env, t, "Invalid return type");
+ return -EINVAL;
+ }
+
if (btf_type_needs_resolve(ret_type) &&
!env_type_is_resolved(env, ret_type_id)) {
err = btf_resolve(env, ret_type, ret_type_id);
@@ -4059,6 +4553,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_DATASEC] = &datasec_ops,
[BTF_KIND_FLOAT] = &float_ops,
[BTF_KIND_DECL_TAG] = &decl_tag_ops,
+ [BTF_KIND_TYPE_TAG] = &modifier_ops,
+ [BTF_KIND_ENUM64] = &enum64_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4143,7 +4639,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
return !btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
- if (btf_type_is_decl_tag(t))
+ if (btf_type_is_decl_tag(t) || btf_type_is_func(t))
return btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
@@ -4233,12 +4729,6 @@ static int btf_check_all_types(struct btf_verifier_env *env)
if (err)
return err;
}
-
- if (btf_type_is_func(t)) {
- err = btf_func_check(env, t);
- if (err)
- return err;
- }
}
return 0;
@@ -4369,13 +4859,11 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
u32 hdr_len, hdr_copy, btf_data_size;
const struct btf_header *hdr;
struct btf *btf;
- int err;
btf = env->btf;
btf_data_size = btf->data_size;
- if (btf_data_size <
- offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
+ if (btf_data_size < offsetofend(struct btf_header, hdr_len)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
@@ -4427,10 +4915,53 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -EINVAL;
}
- err = btf_check_sec_info(env, btf_data_size);
- if (err)
- return err;
+ return btf_check_sec_info(env, btf_data_size);
+}
+
+static int btf_check_type_tags(struct btf_verifier_env *env,
+ struct btf *btf, int start_id)
+{
+ int i, n, good_id = start_id - 1;
+ bool in_tags;
+
+ n = btf_nr_types(btf);
+ for (i = start_id; i < n; i++) {
+ const struct btf_type *t;
+ int chain_limit = 32;
+ u32 cur_id = i;
+ t = btf_type_by_id(btf, i);
+ if (!t)
+ return -EINVAL;
+ if (!btf_type_is_modifier(t))
+ continue;
+
+ cond_resched();
+
+ in_tags = btf_type_is_type_tag(t);
+ while (btf_type_is_modifier(t)) {
+ if (!chain_limit--) {
+ btf_verifier_log(env, "Max chain length or cycle detected");
+ return -ELOOP;
+ }
+ if (btf_type_is_type_tag(t)) {
+ if (!in_tags) {
+ btf_verifier_log(env, "Type tags don't precede modifiers");
+ return -EINVAL;
+ }
+ } else if (in_tags) {
+ in_tags = false;
+ }
+ if (cur_id <= good_id)
+ break;
+ /* Move to next type */
+ cur_id = t->type;
+ t = btf_type_by_id(btf, cur_id);
+ if (!t)
+ return -EINVAL;
+ }
+ good_id = i;
+ }
return 0;
}
@@ -4460,8 +4991,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
log->len_total = log_size;
/* log attributes have to be sane */
- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
- !log->level || !log->ubuf) {
+ if (!bpf_verifier_log_attr_valid(log)) {
err = -EINVAL;
goto errout;
}
@@ -4502,6 +5032,10 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, 1);
+ if (err)
+ goto errout;
+
if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
@@ -4610,41 +5144,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
return ctx_type;
}
-static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
-#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
-#define BPF_LINK_TYPE(_id, _name)
-#define BPF_MAP_TYPE(_id, _ops) \
- [_id] = &_ops,
-#include <linux/bpf_types.h>
-#undef BPF_PROG_TYPE
-#undef BPF_LINK_TYPE
-#undef BPF_MAP_TYPE
-};
-
-static int btf_vmlinux_map_ids_init(const struct btf *btf,
- struct bpf_verifier_log *log)
-{
- const struct bpf_map_ops *ops;
- int i, btf_id;
-
- for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
- ops = btf_vmlinux_map_ops[i];
- if (!ops || (!ops->map_btf_name && !ops->map_btf_id))
- continue;
- if (!ops->map_btf_name || !ops->map_btf_id) {
- bpf_log(log, "map type %d is misconfigured\n", i);
- return -EINVAL;
- }
- btf_id = btf_find_by_name_kind(btf, ops->map_btf_name,
- BTF_KIND_STRUCT);
- if (btf_id < 0)
- return btf_id;
- *ops->map_btf_id = btf_id;
- }
-
- return 0;
-}
-
static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
struct btf *btf,
const struct btf_type *t,
@@ -4703,14 +5202,13 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, 1);
+ if (err)
+ goto errout;
+
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
- /* find bpf map structs for map_ptr access checking */
- err = btf_vmlinux_map_ids_init(btf, log);
- if (err < 0)
- goto errout;
-
bpf_struct_ops_init(btf, log);
refcount_set(&btf->refcnt, 1);
@@ -4788,6 +5286,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, btf_nr_types(base_btf));
+ if (err)
+ goto errout;
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -4814,7 +5316,7 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
return prog->aux->attach_btf;
}
-static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
+static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
{
/* t comes in already as a pointer */
t = btf_type_by_id(btf, t->type);
@@ -4823,8 +5325,35 @@ static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
t = btf_type_by_id(btf, t->type);
- /* char, signed char, unsigned char */
- return btf_type_is_int(t) && t->size == 1;
+ return btf_type_is_int(t);
+}
+
+static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
+ int off)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 offset = 0, nr_args;
+ int i;
+
+ if (!func_proto)
+ return off / 8;
+
+ nr_args = btf_type_vlen(func_proto);
+ args = (const struct btf_param *)(func_proto + 1);
+ for (i = 0; i < nr_args; i++) {
+ t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+ offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
+ if (off < offset)
+ return i;
+ }
+
+ t = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
+ if (off < offset)
+ return nr_args;
+
+ return nr_args + 1;
}
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
@@ -4837,6 +5366,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
+ const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -4845,7 +5375,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
tname, off);
return false;
}
- arg = off / 8;
+ arg = get_ctx_arg_idx(btf, t, off);
args = (const struct btf_param *)(t + 1);
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
@@ -4865,6 +5395,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
+ case BPF_LSM_CGROUP:
case BPF_LSM_MAC:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
@@ -4895,7 +5426,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (!btf_type_is_small_int(t)) {
bpf_log(log,
"ret type %s not allowed for fmod_ret\n",
- btf_kind_str[BTF_INFO_KIND(t->info)]);
+ btf_type_str(t));
return false;
}
break;
@@ -4914,7 +5445,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -4922,17 +5453,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
__btf_name_by_offset(btf, t->name_off),
- btf_kind_str[BTF_INFO_KIND(t->info)]);
+ btf_type_str(t));
return false;
}
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+ u32 type, flag;
- if (ctx_arg_info->offset == off &&
- (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
- ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
+ type = base_type(ctx_arg_info->reg_type);
+ flag = type_flag(ctx_arg_info->reg_type);
+ if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
+ (flag & PTR_MAYBE_NULL)) {
info->reg_type = ctx_arg_info->reg_type;
return true;
}
@@ -4945,7 +5478,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
*/
return true;
- if (is_string_ptr(btf, t))
+ if (is_int_ptr(btf, t))
return true;
/* this is a pointer to another type */
@@ -4987,6 +5520,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
+
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tag_value, "user") == 0)
+ info->reg_type |= MEM_USER;
+ if (strcmp(tag_value, "percpu") == 0)
+ info->reg_type |= MEM_PERCPU;
+ }
+
/* skip modifiers */
while (btf_type_is_modifier(t)) {
info->btf_id = t->type;
@@ -4995,11 +5537,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
- tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, arg, btf_type_str(t));
return false;
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
- tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
+ tname, arg, info->btf_id, btf_type_str(t),
__btf_name_by_offset(btf, t->name_off));
return true;
}
@@ -5013,12 +5555,12 @@ enum bpf_struct_walk_result {
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const char *tname, *mname;
+ const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
again:
@@ -5048,7 +5590,7 @@ again:
if (array_elem->nelems != 0)
goto error;
- moff = btf_member_bit_offset(t, member) / 8;
+ moff = __btf_member_bit_offset(t, member) / 8;
if (off < moff)
goto error;
@@ -5071,14 +5613,14 @@ error:
for_each_member(i, t, member) {
/* offset of the field in bytes */
- moff = btf_member_bit_offset(t, member) / 8;
+ moff = __btf_member_bit_offset(t, member) / 8;
if (off + size <= moff)
/* won't find anything, field is already too far */
break;
- if (btf_member_bitfield_size(t, member)) {
- u32 end_bit = btf_member_bit_offset(t, member) +
- btf_member_bitfield_size(t, member);
+ if (__btf_member_bitfield_size(t, member)) {
+ u32 end_bit = __btf_member_bit_offset(t, member) +
+ __btf_member_bitfield_size(t, member);
/* off <= moff instead of off == moff because clang
* does not generate a BTF member for anonymous
@@ -5202,7 +5744,8 @@ error:
}
if (btf_type_is_ptr(mtype)) {
- const struct btf_type *stype;
+ const struct btf_type *stype, *t;
+ enum bpf_type_flag tmp_flag = 0;
u32 id;
if (msize != size || off != moff) {
@@ -5211,9 +5754,23 @@ error:
mname, moff, tname, off, size);
return -EACCES;
}
+
+ /* check type tag */
+ t = btf_type_by_id(btf, mtype->type);
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ /* check __user tag */
+ if (strcmp(tag_value, "user") == 0)
+ tmp_flag = MEM_USER;
+ /* check __percpu tag */
+ if (strcmp(tag_value, "percpu") == 0)
+ tmp_flag = MEM_PERCPU;
+ }
+
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
+ *flag = tmp_flag;
return WALK_PTR;
}
}
@@ -5240,13 +5797,14 @@ error:
int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype __maybe_unused,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ enum bpf_type_flag tmp_flag = 0;
int err;
u32 id;
do {
- err = btf_struct_walk(log, btf, t, off, size, &id);
+ err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
switch (err) {
case WALK_PTR:
@@ -5254,6 +5812,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
* we're done.
*/
*next_btf_id = id;
+ *flag = tmp_flag;
return PTR_TO_BTF_ID;
case WALK_SCALAR:
return SCALAR_VALUE;
@@ -5295,20 +5854,27 @@ static bool btf_types_are_same(const struct btf *btf1, u32 id1,
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
- const struct btf *need_btf, u32 need_type_id)
+ const struct btf *need_btf, u32 need_type_id,
+ bool strict)
{
const struct btf_type *type;
+ enum bpf_type_flag flag;
int err;
/* Are we already done? */
if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
return true;
-
+ /* In case of strict type match, we do not walk struct, the top level
+ * type match must succeed. When strict is true, off should have already
+ * been 0.
+ */
+ if (strict)
+ return false;
again:
type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, btf, type, off, 1, &id);
+ err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
if (err != WALK_STRUCT)
return false;
@@ -5326,26 +5892,25 @@ again:
}
static int __get_type_size(struct btf *btf, u32 btf_id,
- const struct btf_type **bad_type)
+ const struct btf_type **ret_type)
{
const struct btf_type *t;
+ *ret_type = btf_type_by_id(btf, 0);
if (!btf_id)
/* void */
return 0;
t = btf_type_by_id(btf, btf_id);
while (t && btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!t) {
- *bad_type = btf_type_by_id(btf, 0);
+ if (!t)
return -EINVAL;
- }
+ *ret_type = t;
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
return t->size;
- *bad_type = t;
return -EINVAL;
}
@@ -5364,25 +5929,27 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
/* BTF function prototype doesn't match the verifier types.
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
*/
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
m->arg_size[i] = 8;
+ m->arg_flags[i] = 0;
+ }
m->ret_size = 8;
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
return 0;
}
args = (const struct btf_param *)(func + 1);
nargs = btf_type_vlen(func);
- if (nargs >= MAX_BPF_FUNC_ARGS) {
+ if (nargs > MAX_BPF_FUNC_ARGS) {
bpf_log(log,
"The function %s has %d arguments. Too many.\n",
tname, nargs);
return -EINVAL;
}
ret = __get_type_size(btf, func->type, &t);
- if (ret < 0) {
+ if (ret < 0 || __btf_type_is_struct(t)) {
bpf_log(log,
"The function %s return type %s is unsupported.\n",
- tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, btf_type_str(t));
return -EINVAL;
}
m->ret_size = ret;
@@ -5395,10 +5962,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = __get_type_size(btf, args[i].type, &t);
- if (ret < 0) {
+
+ /* No support of struct argument size greater than 16 bytes */
+ if (ret < 0 || ret > 16) {
bpf_log(log,
"The function %s arg%d type %s is unsupported.\n",
- tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, i, btf_type_str(t));
return -EINVAL;
}
if (ret == 0) {
@@ -5408,6 +5977,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
m->arg_size[i] = ret;
+ m->arg_flags[i] = __btf_type_is_struct(t) ? BTF_FMODEL_STRUCT_ARG : 0;
}
m->nr_args = nargs;
return 0;
@@ -5491,7 +6061,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log,
* to context only. And only global functions can be replaced.
* Hence type check only those types.
*/
- if (btf_type_is_int(t1) || btf_type_is_enum(t1))
+ if (btf_type_is_int(t1) || btf_is_any_enum(t1))
continue;
if (!btf_type_is_ptr(t1)) {
bpf_log(log,
@@ -5563,16 +6133,114 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#endif
};
+/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
+static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int rec)
+{
+ const struct btf_type *member_type;
+ const struct btf_member *member;
+ u32 i;
+
+ if (!btf_type_is_struct(t))
+ return false;
+
+ for_each_member(i, t, member) {
+ const struct btf_array *array;
+
+ member_type = btf_type_skip_modifiers(btf, member->type, NULL);
+ if (btf_type_is_struct(member_type)) {
+ if (rec >= 3) {
+ bpf_log(log, "max struct nesting depth exceeded\n");
+ return false;
+ }
+ if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1))
+ return false;
+ continue;
+ }
+ if (btf_type_is_array(member_type)) {
+ array = btf_type_array(member_type);
+ if (!array->nelems)
+ return false;
+ member_type = btf_type_skip_modifiers(btf, array->type, NULL);
+ if (!btf_type_is_scalar(member_type))
+ return false;
+ continue;
+ }
+ if (!btf_type_is_scalar(member_type))
+ return false;
+ }
+ return true;
+}
+
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ int len, sfx_len = sizeof("__sz") - 1;
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len < sfx_len)
+ return false;
+ param_name += len - sfx_len;
+ if (strncmp(param_name, "__sz", sfx_len))
+ return false;
+
+ return true;
+}
+
+static bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg,
+ const char *name)
+{
+ int len, target_len = strlen(name);
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len != target_len)
+ return false;
+ if (strcmp(param_name, name))
+ return false;
+
+ return true;
+}
+
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
- bool ptr_to_mem_ok)
+ bool ptr_to_mem_ok,
+ struct bpf_kfunc_arg_meta *kfunc_meta,
+ bool processing_call)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ bool rel = false, kptr_get = false, trusted_args = false;
+ bool sleepable = false;
struct bpf_verifier_log *log = &env->log;
+ u32 i, nargs, ref_id, ref_obj_id = 0;
+ bool is_kfunc = btf_is_kernel(btf);
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
- u32 i, nargs, ref_id;
+ int ref_regno = 0, ret;
t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
@@ -5598,15 +6266,57 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (is_kfunc && kfunc_meta) {
+ /* Only kfunc can be release func */
+ rel = kfunc_meta->flags & KF_RELEASE;
+ kptr_get = kfunc_meta->flags & KF_KPTR_GET;
+ trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
+ sleepable = kfunc_meta->flags & KF_SLEEPABLE;
+ }
+
/* check that BTF function arguments match actual types that the
* verifier sees.
*/
for (i = 0; i < nargs; i++) {
+ enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno];
+ bool obj_ptr = false;
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
if (btf_type_is_scalar(t)) {
+ if (is_kfunc && kfunc_meta) {
+ bool is_buf_size = false;
+
+ /* check for any const scalar parameter of name "rdonly_buf_size"
+ * or "rdwr_buf_size"
+ */
+ if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
+ "rdonly_buf_size")) {
+ kfunc_meta->r0_rdonly = true;
+ is_buf_size = true;
+ } else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
+ "rdwr_buf_size"))
+ is_buf_size = true;
+
+ if (is_buf_size) {
+ if (kfunc_meta->r0_size) {
+ bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
+ return -EINVAL;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ bpf_log(log, "R%d is not a const\n", regno);
+ return -EINVAL;
+ }
+
+ kfunc_meta->r0_size = reg->var_off.value;
+ ret = mark_chain_precision(env, regno);
+ if (ret)
+ return ret;
+ }
+ }
+
if (reg->type == SCALAR_VALUE)
continue;
bpf_log(log, "R%d is not a scalar\n", regno);
@@ -5619,9 +6329,99 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ /* These register types have special constraints wrt ref_obj_id
+ * and offset checks. The rest of trusted args don't.
+ */
+ obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
+ reg2btf_ids[base_type(reg->type)];
+
+ /* Check if argument must be a referenced pointer, args + i has
+ * been verified to be a pointer (after skipping modifiers).
+ * PTR_TO_CTX is ok without having non-zero ref_obj_id.
+ */
+ if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
+ bpf_log(log, "R%d must be referenced\n", regno);
+ return -EINVAL;
+ }
+
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
- if (btf_is_kernel(btf)) {
+
+ /* Trusted args have the same offset checks as release arguments */
+ if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id))
+ arg_type |= OBJ_RELEASE;
+ ret = check_func_arg_reg_off(env, reg, regno, arg_type);
+ if (ret < 0)
+ return ret;
+
+ if (is_kfunc && reg->ref_obj_id) {
+ /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+ if (ref_obj_id) {
+ bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id, ref_obj_id);
+ return -EFAULT;
+ }
+ ref_regno = regno;
+ ref_obj_id = reg->ref_obj_id;
+ }
+
+ /* kptr_get is only true for kfunc */
+ if (i == 0 && kptr_get) {
+ struct bpf_map_value_off_desc *off_desc;
+
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ bpf_log(log, "arg#0 expected pointer to map value\n");
+ return -EINVAL;
+ }
+
+ /* check_func_arg_reg_off allows var_off for
+ * PTR_TO_MAP_VALUE, but we need fixed offset to find
+ * off_desc.
+ */
+ if (!tnum_is_const(reg->var_off)) {
+ bpf_log(log, "arg#0 must have constant offset\n");
+ return -EINVAL;
+ }
+
+ off_desc = bpf_map_kptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value);
+ if (!off_desc || off_desc->type != BPF_KPTR_REF) {
+ bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n",
+ reg->off + reg->var_off.value);
+ return -EINVAL;
+ }
+
+ if (!btf_type_is_ptr(ref_t)) {
+ bpf_log(log, "arg#0 BTF type must be a double pointer\n");
+ return -EINVAL;
+ }
+
+ ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+
+ if (!btf_type_is_struct(ref_t)) {
+ bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
+ func_name, i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->kptr.btf,
+ off_desc->kptr.btf_id, true)) {
+ bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n",
+ func_name, i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ /* rest of the arguments can be anything, like normal kfunc */
+ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+ /* If function expects ctx type in BTF check that caller
+ * is passing PTR_TO_CTX.
+ */
+ if (reg->type != PTR_TO_CTX) {
+ bpf_log(log,
+ "arg#%d expected pointer to ctx, but got %s\n",
+ i, btf_type_str(t));
+ return -EINVAL;
+ }
+ } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
+ (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
const struct btf_type *reg_ref_t;
const struct btf *reg_btf;
const char *reg_ref_tname;
@@ -5637,14 +6437,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
- } else if (reg2btf_ids[reg->type]) {
- reg_btf = btf_vmlinux;
- reg_ref_id = *reg2btf_ids[reg->type];
} else {
- bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
- func_name, i,
- btf_type_str(ref_t), ref_tname, regno);
- return -EINVAL;
+ reg_btf = btf_vmlinux;
+ reg_ref_id = *reg2btf_ids[base_type(reg->type)];
}
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
@@ -5652,7 +6447,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_tname = btf_name_by_offset(reg_btf,
reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
- reg->off, btf, ref_id)) {
+ reg->off, btf, ref_id,
+ trusted_args || (rel && reg->ref_obj_id))) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i,
btf_type_str(ref_t), ref_tname,
@@ -5660,23 +6456,73 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_tname);
return -EINVAL;
}
- } else if (btf_get_prog_ctx_type(log, btf, t,
- env->prog->type, i)) {
- /* If function expects ctx type in BTF check that caller
- * is passing PTR_TO_CTX.
- */
- if (reg->type != PTR_TO_CTX) {
- bpf_log(log,
- "arg#%d expected pointer to ctx, but got %s\n",
- i, btf_type_str(t));
- return -EINVAL;
- }
- if (check_ctx_reg(env, reg, regno))
- return -EINVAL;
- } else if (ptr_to_mem_ok) {
+ } else if (ptr_to_mem_ok && processing_call) {
const struct btf_type *resolve_ret;
u32 type_size;
+ if (is_kfunc) {
+ bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+ bool arg_dynptr = btf_type_is_struct(ref_t) &&
+ !strcmp(ref_tname,
+ stringify_struct(bpf_dynptr_kern));
+
+ /* Permit pointer to mem, but only when argument
+ * type is pointer to scalar, or struct composed
+ * (recursively) of scalars.
+ * When arg_mem_size is true, the pointer can be
+ * void *.
+ * Also permit initialized local dynamic pointers.
+ */
+ if (!btf_type_is_scalar(ref_t) &&
+ !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+ !arg_dynptr &&
+ (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
+ bpf_log(log,
+ "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+ i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
+ return -EINVAL;
+ }
+
+ if (arg_dynptr) {
+ if (reg->type != PTR_TO_STACK) {
+ bpf_log(log, "arg#%d pointer type %s %s not to stack\n",
+ i, btf_type_str(ref_t),
+ ref_tname);
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_reg_valid_init(env, reg)) {
+ bpf_log(log,
+ "arg#%d pointer type %s %s must be valid and initialized\n",
+ i, btf_type_str(ref_t),
+ ref_tname);
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_type_expected(env, reg,
+ ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
+ bpf_log(log,
+ "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
+ i, btf_type_str(ref_t),
+ ref_tname);
+ return -EINVAL;
+ }
+
+ continue;
+ }
+
+ /* Check for mem, len pair */
+ if (arg_mem_size) {
+ if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+ bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+ i, i + 1);
+ return -EINVAL;
+ }
+ i++;
+ continue;
+ }
+ }
+
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
if (IS_ERR(resolve_ret)) {
bpf_log(log,
@@ -5689,14 +6535,39 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (check_mem_reg(env, reg, regno, type_size))
return -EINVAL;
} else {
+ bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i,
+ is_kfunc ? "kernel " : "", func_name, func_id);
return -EINVAL;
}
}
- return 0;
+ /* Either both are set, or neither */
+ WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+ /* We already made sure ref_obj_id is set only for one argument. We do
+ * allow (!rel && ref_obj_id), so that passing such referenced
+ * PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when
+ * is_kfunc is true.
+ */
+ if (rel && !ref_obj_id) {
+ bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+ func_name);
+ return -EINVAL;
+ }
+
+ if (sleepable && !env->prog->aux->sleepable) {
+ bpf_log(log, "kernel function %s is sleepable but the program is not\n",
+ func_name);
+ return -EINVAL;
+ }
+
+ if (kfunc_meta && ref_obj_id)
+ kfunc_meta->ref_obj_id = ref_obj_id;
+
+ /* returns argument register number > 0 in case of reference release kfunc */
+ return rel ? ref_regno : 0;
}
-/* Compare BTF of a function with given bpf_reg_state.
+/* Compare BTF of a function declaration with given bpf_reg_state.
* Returns:
* EFAULT - there is a verifier bug. Abort verification.
* EINVAL - there is a type mismatch or BTF is not available.
@@ -5723,7 +6594,50 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, false);
+
+ /* Compiler optimizations can remove arguments from static functions
+ * or mismatched type can be passed into a global function.
+ * In such cases mark the function as unreliable from BTF point of view.
+ */
+ if (err)
+ prog->aux->func_info_aux[subprog].unreliable = true;
+ return err;
+}
+
+/* Compare BTF of a function call with given bpf_reg_state.
+ * Returns:
+ * EFAULT - there is a verifier bug. Abort verification.
+ * EINVAL - there is a type mismatch or BTF is not available.
+ * 0 - BTF matches with what bpf_reg_state expects.
+ * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
+ *
+ * NOTE: the code is duplicated from btf_check_subprog_arg_match()
+ * because btf_check_func_arg_match() is still doing both. Once that
+ * function is split in 2, we can call from here btf_check_subprog_arg_match()
+ * first, and then treat the calling part in a new code path.
+ */
+int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_prog *prog = env->prog;
+ struct btf *btf = prog->aux->btf;
+ bool is_global;
+ u32 btf_id;
+ int err;
+
+ if (!prog->aux->func_info)
+ return -EINVAL;
+
+ btf_id = prog->aux->func_info[subprog].type_id;
+ if (!btf_id)
+ return -EFAULT;
+
+ if (prog->aux->func_info_aux[subprog].unreliable)
+ return -EINVAL;
+
+ is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, true);
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
@@ -5736,9 +6650,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs)
+ struct bpf_reg_state *regs,
+ struct bpf_kfunc_arg_meta *meta)
{
- return btf_check_func_arg_match(env, btf, func_id, regs, false);
+ return btf_check_func_arg_match(env, btf, func_id, regs, true, meta, true);
}
/* Convert BTF of a function into bpf_reg_state if possible
@@ -5810,7 +6725,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_int(t) && !btf_type_is_enum(t)) {
+ if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
bpf_log(log,
"Global function %s() doesn't return scalar. Only those are supported.\n",
tname);
@@ -5825,7 +6740,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
t = btf_type_by_id(btf, args[i].type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (btf_type_is_int(t) || btf_is_any_enum(t)) {
reg->type = SCALAR_VALUE;
continue;
}
@@ -5846,13 +6761,13 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
}
- reg->type = PTR_TO_MEM_OR_NULL;
+ reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
reg->id = ++env->id_gen;
continue;
}
bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
- i, btf_kind_str[BTF_INFO_KIND(t->info)], tname);
+ i, btf_type_str(t), tname);
return -EINVAL;
}
return 0;
@@ -5915,7 +6830,7 @@ static void btf_snprintf_show(struct btf_show *show, const char *fmt,
if (len < 0) {
ssnprintf->len_left = 0;
ssnprintf->len = len;
- } else if (len > ssnprintf->len_left) {
+ } else if (len >= ssnprintf->len_left) {
/* no space, drive on to get length we would have written */
ssnprintf->len_left = 0;
ssnprintf->len += len;
@@ -5939,7 +6854,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
- /* If we encontered an error, return it. */
+ /* If we encountered an error, return it. */
if (ssnprintf.show.state.status)
return ssnprintf.show.state.status;
@@ -6135,12 +7050,22 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
+enum {
+ BTF_MODULE_F_LIVE = (1 << 0),
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module {
struct list_head list;
struct module *module;
struct btf *btf;
struct bin_attribute *sysfs_attr;
+ int flags;
};
static LIST_HEAD(btf_modules);
@@ -6157,6 +7082,8 @@ btf_module_read(struct file *file, struct kobject *kobj,
return len;
}
+static void purge_cand_cache(struct btf *btf);
+
static int btf_module_notify(struct notifier_block *nb, unsigned long op,
void *module)
{
@@ -6166,7 +7093,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
int err = 0;
if (mod->btf_data_size == 0 ||
- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+ op != MODULE_STATE_GOING))
goto out;
switch (op) {
@@ -6181,7 +7109,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
pr_warn("failed to validate module [%s] BTF: %ld\n",
mod->name, PTR_ERR(btf));
kfree(btf_mod);
- err = PTR_ERR(btf);
+ if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
+ err = PTR_ERR(btf);
goto out;
}
err = btf_alloc_id(btf);
@@ -6191,6 +7120,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
goto out;
}
+ purge_cand_cache(NULL);
mutex_lock(&btf_module_mutex);
btf_mod->module = module;
btf_mod->btf = btf;
@@ -6224,6 +7154,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
}
break;
+ case MODULE_STATE_LIVE:
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_mod->flags |= BTF_MODULE_F_LIVE;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+ break;
case MODULE_STATE_GOING:
mutex_lock(&btf_module_mutex);
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6233,6 +7174,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
list_del(&btf_mod->list);
if (btf_mod->sysfs_attr)
sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
+ purge_cand_cache(btf_mod->btf);
btf_put(btf_mod->btf);
kfree(btf_mod->sysfs_attr);
kfree(btf_mod);
@@ -6269,7 +7211,12 @@ struct module *btf_try_get_module(const struct btf *btf)
if (btf_mod->btf != btf)
continue;
- if (try_module_get(btf_mod->module))
+ /* We must only consider module whose __init routine has
+ * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+ * which is set from the notifier callback for
+ * MODULE_STATE_LIVE.
+ */
+ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
res = btf_mod->module;
break;
@@ -6280,9 +7227,43 @@ struct module *btf_try_get_module(const struct btf *btf)
return res;
}
+/* Returns struct btf corresponding to the struct module.
+ * This function can return NULL or ERR_PTR.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ struct btf_module *btf_mod, *tmp;
+#endif
+ struct btf *btf = NULL;
+
+ if (!module) {
+ btf = bpf_get_btf_vmlinux();
+ if (!IS_ERR_OR_NULL(btf))
+ btf_get(btf);
+ return btf;
+ }
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_get(btf_mod->btf);
+ btf = btf_mod->btf;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+#endif
+
+ return btf;
+}
+
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
- struct btf *btf;
+ struct btf *btf = NULL;
+ int btf_obj_fd = 0;
long ret;
if (flags)
@@ -6291,44 +7272,17 @@ BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int
if (name_sz <= 1 || name[name_sz - 1])
return -EINVAL;
- btf = bpf_get_btf_vmlinux();
- if (IS_ERR(btf))
- return PTR_ERR(btf);
-
- ret = btf_find_by_name_kind(btf, name, kind);
- /* ret is never zero, since btf_find_by_name_kind returns
- * positive btf_id or negative error.
- */
- if (ret < 0) {
- struct btf *mod_btf;
- int id;
-
- /* If name is not found in vmlinux's BTF then search in module's BTFs */
- spin_lock_bh(&btf_idr_lock);
- idr_for_each_entry(&btf_idr, mod_btf, id) {
- if (!btf_is_module(mod_btf))
- continue;
- /* linear search could be slow hence unlock/lock
- * the IDR to avoiding holding it for too long
- */
- btf_get(mod_btf);
- spin_unlock_bh(&btf_idr_lock);
- ret = btf_find_by_name_kind(mod_btf, name, kind);
- if (ret > 0) {
- int btf_obj_fd;
-
- btf_obj_fd = __btf_new_fd(mod_btf);
- if (btf_obj_fd < 0) {
- btf_put(mod_btf);
- return btf_obj_fd;
- }
- return ret | (((u64)btf_obj_fd) << 32);
- }
- spin_lock_bh(&btf_idr_lock);
- btf_put(mod_btf);
+ ret = bpf_find_btf_id(name, kind, &btf);
+ if (ret > 0 && btf_is_module(btf)) {
+ btf_obj_fd = __btf_new_fd(btf);
+ if (btf_obj_fd < 0) {
+ btf_put(btf);
+ return btf_obj_fd;
}
- spin_unlock_bh(&btf_idr_lock);
+ return ret | (((u64)btf_obj_fd) << 32);
}
+ if (ret > 0)
+ btf_put(btf);
return ret;
}
@@ -6336,58 +7290,743 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
.func = bpf_btf_find_by_name_kind,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};
-BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
+BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
+#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type)
+BTF_TRACING_TYPE_xxx
+#undef BTF_TRACING_TYPE
-/* BTF ID set registration API for modules */
+/* Kernel Function (kfunc) BTF ID set registration API */
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ struct btf_id_set8 *add_set)
+{
+ bool vmlinux_set = !btf_is_module(btf);
+ struct btf_kfunc_set_tab *tab;
+ struct btf_id_set8 *set;
+ u32 set_cnt;
+ int ret;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (!add_set->cnt)
+ return 0;
+
+ tab = btf->kfunc_set_tab;
+ if (!tab) {
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return -ENOMEM;
+ btf->kfunc_set_tab = tab;
+ }
+
+ set = tab->sets[hook];
+ /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+ * for module sets.
+ */
+ if (WARN_ON_ONCE(set && !vmlinux_set)) {
+ ret = -EINVAL;
+ goto end;
+ }
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+ /* We don't need to allocate, concatenate, and sort module sets, because
+ * only one is allowed per hook. Hence, we can directly assign the
+ * pointer and return.
+ */
+ if (!vmlinux_set) {
+ tab->sets[hook] = add_set;
+ return 0;
+ }
+
+ /* In case of vmlinux sets, there may be more than one set being
+ * registered per hook. To create a unified set, we allocate a new set
+ * and concatenate all individual sets being registered. While each set
+ * is individually sorted, they may become unsorted when concatenated,
+ * hence re-sorting the final set again is required to make binary
+ * searching the set using btf_id_set8_contains function work.
+ */
+ set_cnt = set ? set->cnt : 0;
+
+ if (set_cnt > U32_MAX - add_set->cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Grow set */
+ set = krealloc(tab->sets[hook],
+ offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!set) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* For newly allocated set, initialize set->cnt to 0 */
+ if (!tab->sets[hook])
+ set->cnt = 0;
+ tab->sets[hook] = set;
+
+ /* Concatenate the two sets */
+ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0]));
+ set->cnt += add_set->cnt;
+
+ sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_kfunc_set_tab(btf);
+ return ret;
+}
+
+static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ u32 kfunc_btf_id)
{
- mutex_lock(&l->mutex);
- list_add(&s->list, &l->list);
- mutex_unlock(&l->mutex);
+ struct btf_id_set8 *set;
+ u32 *id;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX)
+ return NULL;
+ if (!btf->kfunc_set_tab)
+ return NULL;
+ set = btf->kfunc_set_tab->sets[hook];
+ if (!set)
+ return NULL;
+ id = btf_id_set8_contains(set, kfunc_btf_id);
+ if (!id)
+ return NULL;
+ /* The flags for BTF ID are located next to it */
+ return id + 1;
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_XDP:
+ return BTF_KFUNC_HOOK_XDP;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return BTF_KFUNC_HOOK_TC;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return BTF_KFUNC_HOOK_STRUCT_OPS;
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ return BTF_KFUNC_HOOK_TRACING;
+ case BPF_PROG_TYPE_SYSCALL:
+ return BTF_KFUNC_HOOK_SYSCALL;
+ default:
+ return BTF_KFUNC_HOOK_MAX;
+ }
}
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+u32 *btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ u32 kfunc_btf_id)
{
- mutex_lock(&l->mutex);
- list_del_init(&s->list);
- mutex_unlock(&l->mutex);
+ enum btf_kfunc_hook hook;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner)
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
{
- struct kfunc_btf_id_set *s;
+ enum btf_kfunc_hook hook;
+ struct btf *btf;
+ int ret;
- mutex_lock(&klist->mutex);
- list_for_each_entry(s, &klist->list, list) {
- if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
- mutex_unlock(&klist->mutex);
- return true;
+ btf = btf_get_module_btf(kset->owner);
+ if (!btf) {
+ if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register kfuncs\n");
+ return -ENOENT;
+ }
+ if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+ pr_err("missing module BTF, cannot register kfuncs\n");
+ return -ENOENT;
}
+ return 0;
}
- mutex_unlock(&klist->mutex);
- return false;
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ ret = btf_populate_kfunc_set(btf, hook, kset->set);
+ btf_put(btf);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
+
+s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
+{
+ struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
+ struct btf_id_dtor_kfunc *dtor;
+
+ if (!tab)
+ return -ENOENT;
+ /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need
+ * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func.
+ */
+ BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0);
+ dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func);
+ if (!dtor)
+ return -ENOENT;
+ return dtor->kfunc_btf_id;
+}
+
+static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt)
+{
+ const struct btf_type *dtor_func, *dtor_func_proto, *t;
+ const struct btf_param *args;
+ s32 dtor_btf_id;
+ u32 nr_args, i;
+
+ for (i = 0; i < cnt; i++) {
+ dtor_btf_id = dtors[i].kfunc_btf_id;
+
+ dtor_func = btf_type_by_id(btf, dtor_btf_id);
+ if (!dtor_func || !btf_type_is_func(dtor_func))
+ return -EINVAL;
+
+ dtor_func_proto = btf_type_by_id(btf, dtor_func->type);
+ if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto))
+ return -EINVAL;
+
+ /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
+ t = btf_type_by_id(btf, dtor_func_proto->type);
+ if (!t || !btf_type_is_void(t))
+ return -EINVAL;
+
+ nr_args = btf_type_vlen(dtor_func_proto);
+ if (nr_args != 1)
+ return -EINVAL;
+ args = btf_params(dtor_func_proto);
+ t = btf_type_by_id(btf, args[0].type);
+ /* Allow any pointer type, as width on targets Linux supports
+ * will be same for all pointer types (i.e. sizeof(void *))
+ */
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
+ struct module *owner)
+{
+ struct btf_id_dtor_kfunc_tab *tab;
+ struct btf *btf;
+ u32 tab_cnt;
+ int ret;
+
+ btf = btf_get_module_btf(owner);
+ if (!btf) {
+ if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n");
+ return -ENOENT;
+ }
+ if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+ pr_err("missing module BTF, cannot register dtor kfuncs\n");
+ return -ENOENT;
+ }
+ return 0;
+ }
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
+ pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Ensure that the prototype of dtor kfuncs being registered is sane */
+ ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt);
+ if (ret < 0)
+ goto end;
+
+ tab = btf->dtor_kfunc_tab;
+ /* Only one call allowed for modules */
+ if (WARN_ON_ONCE(tab && btf_is_module(btf))) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ tab_cnt = tab ? tab->cnt : 0;
+ if (tab_cnt > U32_MAX - add_cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+ if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
+ pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
+ ret = -E2BIG;
+ goto end;
+ }
+
+ tab = krealloc(btf->dtor_kfunc_tab,
+ offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!tab) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ if (!btf->dtor_kfunc_tab)
+ tab->cnt = 0;
+ btf->dtor_kfunc_tab = tab;
+
+ memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0]));
+ tab->cnt += add_cnt;
+
+ sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_dtor_kfunc_tab(btf);
+ btf_put(btf);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
+
+#define MAX_TYPES_ARE_COMPAT_DEPTH 2
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs/ENUM64s, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
+{
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+ MAX_TYPES_ARE_COMPAT_DEPTH);
+}
+
+#define MAX_TYPES_MATCH_DEPTH 2
+
+int bpf_core_types_match(const struct btf *local_btf, u32 local_id,
+ const struct btf *targ_btf, u32 targ_id)
+{
+ return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false,
+ MAX_TYPES_MATCH_DEPTH);
}
-#define DEFINE_KFUNC_BTF_ID_LIST(name) \
- struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
- __MUTEX_INITIALIZER(name.mutex) }; \
- EXPORT_SYMBOL_GPL(name)
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+ /* check X___Y name pattern, where X and Y are not underscores */
+ return s[0] != '_' && /* X */
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
+ s[4] != '_'; /* Y */
+}
+
+size_t bpf_core_essential_name_len(const char *name)
+{
+ size_t n = strlen(name);
+ int i;
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+ for (i = n - 5; i >= 0; i--) {
+ if (bpf_core_is_flavor_sep(name + i))
+ return i + 1;
+ }
+ return n;
+}
+
+struct bpf_cand_cache {
+ const char *name;
+ u32 name_len;
+ u16 kind;
+ u16 cnt;
+ struct {
+ const struct btf *btf;
+ u32 id;
+ } cands[];
+};
+
+static void bpf_free_cands(struct bpf_cand_cache *cands)
+{
+ if (!cands->cnt)
+ /* empty candidate array was allocated on stack */
+ return;
+ kfree(cands);
+}
+
+static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands)
+{
+ kfree(cands->name);
+ kfree(cands);
+}
+
+#define VMLINUX_CAND_CACHE_SIZE 31
+static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
+
+#define MODULE_CAND_CACHE_SIZE 31
+static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
+
+static DEFINE_MUTEX(cand_cache_mutex);
+
+static void __print_cand_cache(struct bpf_verifier_log *log,
+ struct bpf_cand_cache **cache,
+ int cache_size)
+{
+ struct bpf_cand_cache *cc;
+ int i, j;
+
+ for (i = 0; i < cache_size; i++) {
+ cc = cache[i];
+ if (!cc)
+ continue;
+ bpf_log(log, "[%d]%s(", i, cc->name);
+ for (j = 0; j < cc->cnt; j++) {
+ bpf_log(log, "%d", cc->cands[j].id);
+ if (j < cc->cnt - 1)
+ bpf_log(log, " ");
+ }
+ bpf_log(log, "), ");
+ }
+}
+
+static void print_cand_cache(struct bpf_verifier_log *log)
+{
+ mutex_lock(&cand_cache_mutex);
+ bpf_log(log, "vmlinux_cand_cache:");
+ __print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+ bpf_log(log, "\nmodule_cand_cache:");
+ __print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+ bpf_log(log, "\n");
+ mutex_unlock(&cand_cache_mutex);
+}
+
+static u32 hash_cands(struct bpf_cand_cache *cands)
+{
+ return jhash(cands->name, cands->name_len, 0);
+}
+
+static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands,
+ struct bpf_cand_cache **cache,
+ int cache_size)
+{
+ struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size];
+ if (cc && cc->name_len == cands->name_len &&
+ !strncmp(cc->name, cands->name, cands->name_len))
+ return cc;
+ return NULL;
+}
+
+static size_t sizeof_cands(int cnt)
+{
+ return offsetof(struct bpf_cand_cache, cands[cnt]);
+}
+
+static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
+ struct bpf_cand_cache **cache,
+ int cache_size)
+{
+ struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands;
+
+ if (*cc) {
+ bpf_free_cands_from_cache(*cc);
+ *cc = NULL;
+ }
+ new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
+ if (!new_cands) {
+ bpf_free_cands(cands);
+ return ERR_PTR(-ENOMEM);
+ }
+ /* strdup the name, since it will stay in cache.
+ * the cands->name points to strings in prog's BTF and the prog can be unloaded.
+ */
+ new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
+ bpf_free_cands(cands);
+ if (!new_cands->name) {
+ kfree(new_cands);
+ return ERR_PTR(-ENOMEM);
+ }
+ *cc = new_cands;
+ return new_cands;
+}
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache,
+ int cache_size)
+{
+ struct bpf_cand_cache *cc;
+ int i, j;
+
+ for (i = 0; i < cache_size; i++) {
+ cc = cache[i];
+ if (!cc)
+ continue;
+ if (!btf) {
+ /* when new module is loaded purge all of module_cand_cache,
+ * since new module might have candidates with the name
+ * that matches cached cands.
+ */
+ bpf_free_cands_from_cache(cc);
+ cache[i] = NULL;
+ continue;
+ }
+ /* when module is unloaded purge cache entries
+ * that match module's btf
+ */
+ for (j = 0; j < cc->cnt; j++)
+ if (cc->cands[j].btf == btf) {
+ bpf_free_cands_from_cache(cc);
+ cache[i] = NULL;
+ break;
+ }
+ }
+
+}
+
+static void purge_cand_cache(struct btf *btf)
+{
+ mutex_lock(&cand_cache_mutex);
+ __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+ mutex_unlock(&cand_cache_mutex);
+}
#endif
+
+static struct bpf_cand_cache *
+bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
+ int targ_start_id)
+{
+ struct bpf_cand_cache *new_cands;
+ const struct btf_type *t;
+ const char *targ_name;
+ size_t targ_essent_len;
+ int n, i;
+
+ n = btf_nr_types(targ_btf);
+ for (i = targ_start_id; i < n; i++) {
+ t = btf_type_by_id(targ_btf, i);
+ if (btf_kind(t) != cands->kind)
+ continue;
+
+ targ_name = btf_name_by_offset(targ_btf, t->name_off);
+ if (!targ_name)
+ continue;
+
+ /* the resched point is before strncmp to make sure that search
+ * for non-existing name will have a chance to schedule().
+ */
+ cond_resched();
+
+ if (strncmp(cands->name, targ_name, cands->name_len) != 0)
+ continue;
+
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != cands->name_len)
+ continue;
+
+ /* most of the time there is only one candidate for a given kind+name pair */
+ new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
+ if (!new_cands) {
+ bpf_free_cands(cands);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(new_cands, cands, sizeof_cands(cands->cnt));
+ bpf_free_cands(cands);
+ cands = new_cands;
+ cands->cands[cands->cnt].btf = targ_btf;
+ cands->cands[cands->cnt].id = i;
+ cands->cnt++;
+ }
+ return cands;
+}
+
+static struct bpf_cand_cache *
+bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
+{
+ struct bpf_cand_cache *cands, *cc, local_cand = {};
+ const struct btf *local_btf = ctx->btf;
+ const struct btf_type *local_type;
+ const struct btf *main_btf;
+ size_t local_essent_len;
+ struct btf *mod_btf;
+ const char *name;
+ int id;
+
+ main_btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(main_btf))
+ return ERR_CAST(main_btf);
+ if (!main_btf)
+ return ERR_PTR(-EINVAL);
+
+ local_type = btf_type_by_id(local_btf, local_type_id);
+ if (!local_type)
+ return ERR_PTR(-EINVAL);
+
+ name = btf_name_by_offset(local_btf, local_type->name_off);
+ if (str_is_empty(name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(name);
+
+ cands = &local_cand;
+ cands->name = name;
+ cands->kind = btf_kind(local_type);
+ cands->name_len = local_essent_len;
+
+ cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+ /* cands is a pointer to stack here */
+ if (cc) {
+ if (cc->cnt)
+ return cc;
+ goto check_modules;
+ }
+
+ /* Attempt to find target candidates in vmlinux BTF first */
+ cands = bpf_core_add_cands(cands, main_btf, 1);
+ if (IS_ERR(cands))
+ return ERR_CAST(cands);
+
+ /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
+
+ /* populate cache even when cands->cnt == 0 */
+ cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+ if (IS_ERR(cc))
+ return ERR_CAST(cc);
+
+ /* if vmlinux BTF has any candidate, don't go for module BTFs */
+ if (cc->cnt)
+ return cc;
+
+check_modules:
+ /* cands is a pointer to stack here and cands->cnt == 0 */
+ cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+ if (cc)
+ /* if cache has it return it even if cc->cnt == 0 */
+ return cc;
+
+ /* If candidate is not found in vmlinux's BTF then search in module's BTFs */
+ spin_lock_bh(&btf_idr_lock);
+ idr_for_each_entry(&btf_idr, mod_btf, id) {
+ if (!btf_is_module(mod_btf))
+ continue;
+ /* linear search could be slow hence unlock/lock
+ * the IDR to avoiding holding it for too long
+ */
+ btf_get(mod_btf);
+ spin_unlock_bh(&btf_idr_lock);
+ cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
+ if (IS_ERR(cands)) {
+ btf_put(mod_btf);
+ return ERR_CAST(cands);
+ }
+ spin_lock_bh(&btf_idr_lock);
+ btf_put(mod_btf);
+ }
+ spin_unlock_bh(&btf_idr_lock);
+ /* cands is a pointer to kmalloced memory here if cands->cnt > 0
+ * or pointer to stack if cands->cnd == 0.
+ * Copy it into the cache even when cands->cnt == 0 and
+ * return the result.
+ */
+ return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+}
+
+int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
+ int relo_idx, void *insn)
+{
+ bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL;
+ struct bpf_core_cand_list cands = {};
+ struct bpf_core_relo_res targ_res;
+ struct bpf_core_spec *specs;
+ int err;
+
+ /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
+ * into arrays of btf_ids of struct fields and array indices.
+ */
+ specs = kcalloc(3, sizeof(*specs), GFP_KERNEL);
+ if (!specs)
+ return -ENOMEM;
+
+ if (need_cands) {
+ struct bpf_cand_cache *cc;
+ int i;
+
+ mutex_lock(&cand_cache_mutex);
+ cc = bpf_core_find_cands(ctx, relo->type_id);
+ if (IS_ERR(cc)) {
+ bpf_log(ctx->log, "target candidate search failed for %d\n",
+ relo->type_id);
+ err = PTR_ERR(cc);
+ goto out;
+ }
+ if (cc->cnt) {
+ cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
+ if (!cands.cands) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ for (i = 0; i < cc->cnt; i++) {
+ bpf_log(ctx->log,
+ "CO-RE relocating %s %s: found target candidate [%d]\n",
+ btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
+ cands.cands[i].btf = cc->cands[i].btf;
+ cands.cands[i].id = cc->cands[i].id;
+ }
+ cands.len = cc->cnt;
+ /* cand_cache_mutex needs to span the cache lookup and
+ * copy of btf pointer into bpf_core_cand_list,
+ * since module can be unloaded while bpf_core_calc_relo_insn
+ * is working with module's btf.
+ */
+ }
+
+ err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs,
+ &targ_res);
+ if (err)
+ goto out;
+
+ err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx,
+ &targ_res);
+
+out:
+ kfree(specs);
+ if (need_cands) {
+ kfree(cands.cands);
+ mutex_unlock(&cand_cache_mutex);
+ if (ctx->log->level & BPF_LOG_LEVEL2)
+ print_cand_cache(ctx->log);
+ }
+ return err;
+}