aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-23 13:11:11 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-23 13:11:11 -0700
commit6d772f328d6ad3e4fb64385784571be4be25e63d (patch)
treede6e9d5b1aac58a7e1fd9502f0baa45b5c12b296 /kernel
parentMerge tag 'linux-can-next-for-5.10-20200923' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next (diff)
parenttools resolve_btfids: Always force HOSTARCH (diff)
downloadlinux-dev-6d772f328d6ad3e4fb64385784571be4be25e63d.tar.xz
linux-dev-6d772f328d6ad3e4fb64385784571be4be25e63d.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-09-23 The following pull-request contains BPF updates for your *net-next* tree. We've added 95 non-merge commits during the last 22 day(s) which contain a total of 124 files changed, 4211 insertions(+), 2040 deletions(-). The main changes are: 1) Full multi function support in libbpf, from Andrii. 2) Refactoring of function argument checks, from Lorenz. 3) Make bpf_tail_call compatible with functions (subprograms), from Maciej. 4) Program metadata support, from YiFei. 5) bpf iterator optimizations, from Yonghong. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c55
-rw-r--r--kernel/bpf/bpf_inode_storage.c8
-rw-r--r--kernel/bpf/bpf_local_storage.c2
-rw-r--r--kernel/bpf/btf.c15
-rw-r--r--kernel/bpf/core.c18
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/syscall.c87
-rw-r--r--kernel/bpf/task_iter.c15
-rw-r--r--kernel/bpf/verifier.c540
-rw-r--r--kernel/trace/bpf_trace.c23
10 files changed, 523 insertions, 245 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e046fb7d17cd..e5fd31268ae0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -898,6 +898,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
+ u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@@ -918,12 +919,13 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* there could be danger of use after free otherwise.
* 2) Initially when we start tracking aux, the program
* is not JITed yet and also does not have a kallsyms
- * entry. We skip these as poke->ip_stable is not
- * active yet. The JIT will do the final fixup before
- * setting it stable. The various poke->ip_stable are
- * successively activated, so tail call updates can
- * arrive from here while JIT is still finishing its
- * final fixup for non-activated poke entries.
+ * entry. We skip these as poke->tailcall_target_stable
+ * is not active yet. The JIT will do the final fixup
+ * before setting it stable. The various
+ * poke->tailcall_target_stable are successively
+ * activated, so tail call updates can arrive from here
+ * while JIT is still finishing its final fixup for
+ * non-activated poke entries.
* 3) On program teardown, the program's kallsym entry gets
* removed out of RCU callback, but we can only untrack
* from sleepable context, therefore bpf_arch_text_poke()
@@ -940,7 +942,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* 5) Any other error happening below from bpf_arch_text_poke()
* is a unexpected bug.
*/
- if (!READ_ONCE(poke->ip_stable))
+ if (!READ_ONCE(poke->tailcall_target_stable))
continue;
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
continue;
@@ -948,12 +950,39 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
- ret = bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP,
- old ? (u8 *)old->bpf_func +
- poke->adj_off : NULL,
- new ? (u8 *)new->bpf_func +
- poke->adj_off : NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
+ old_bypass_addr = old ? NULL : poke->bypass_addr;
+ old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+ new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+
+ if (new) {
+ ret = bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, new_addr);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ if (!old) {
+ ret = bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ poke->bypass_addr,
+ NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
+ } else {
+ ret = bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ old_bypass_addr,
+ poke->bypass_addr);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ /* let other CPUs finish the execution of program
+ * so that it will not possible to expose them
+ * to invalid nop, stack unwind, nop state
+ */
+ if (!ret)
+ synchronize_rcu();
+ ret = bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
}
}
}
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 75be02799c0f..6edff97ad594 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -249,9 +249,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_owner_storage_ptr = inode_storage_ptr,
};
-BTF_ID_LIST(bpf_inode_storage_btf_ids)
-BTF_ID_UNUSED
-BTF_ID(struct, inode)
+BTF_ID_LIST_SINGLE(bpf_inode_storage_btf_ids, struct, inode)
const struct bpf_func_proto bpf_inode_storage_get_proto = {
.func = bpf_inode_storage_get,
@@ -259,9 +257,9 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = {
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
- .btf_id = bpf_inode_storage_btf_ids,
};
const struct bpf_func_proto bpf_inode_storage_delete_proto = {
@@ -270,5 +268,5 @@ const struct bpf_func_proto bpf_inode_storage_delete_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .btf_id = bpf_inode_storage_btf_ids,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index ffa7d11fc2bd..5d3a7af9ba9b 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -159,7 +159,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem)
{
RCU_INIT_POINTER(selem->local_storage, local_storage);
- hlist_add_head(&selem->snode, &local_storage->list);
+ hlist_add_head_rcu(&selem->snode, &local_storage->list);
}
void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f9ac6935ab3c..5d3c36e13139 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4193,19 +4193,6 @@ again:
return true;
}
-int btf_resolve_helper_id(struct bpf_verifier_log *log,
- const struct bpf_func_proto *fn, int arg)
-{
- int id;
-
- if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID || !btf_vmlinux)
- return -EINVAL;
- id = fn->btf_id[arg];
- if (!id || id > btf_vmlinux->nr_types)
- return -EINVAL;
- return id;
-}
-
static int __get_type_size(struct btf *btf, u32 btf_id,
const struct btf_type **bad_type)
{
@@ -4772,7 +4759,7 @@ static int btf_id_cmp_func(const void *a, const void *b)
return *pa - *pb;
}
-bool btf_id_set_contains(struct btf_id_set *set, u32 id)
+bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
{
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ed0b3578867c..c4811b139caa 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -98,6 +98,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->jit_requested = ebpf_jit_enabled();
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
+ mutex_init(&fp->aux->used_maps_mutex);
return fp;
}
@@ -253,6 +254,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
void __bpf_prog_free(struct bpf_prog *fp)
{
if (fp->aux) {
+ mutex_destroy(&fp->aux->used_maps_mutex);
free_percpu(fp->aux->stats);
kfree(fp->aux->poke_tab);
kfree(fp->aux);
@@ -773,7 +775,8 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
if (size > poke_tab_max)
return -ENOSPC;
- if (poke->ip || poke->ip_stable || poke->adj_off)
+ if (poke->tailcall_target || poke->tailcall_target_stable ||
+ poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
return -EINVAL;
switch (poke->reason) {
@@ -1747,8 +1750,9 @@ bool bpf_prog_array_compatible(struct bpf_array *array,
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
- int i;
+ int i, ret = 0;
+ mutex_lock(&aux->used_maps_mutex);
for (i = 0; i < aux->used_map_cnt; i++) {
struct bpf_map *map = aux->used_maps[i];
struct bpf_array *array;
@@ -1757,11 +1761,15 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
continue;
array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp))
- return -EINVAL;
+ if (!bpf_prog_array_compatible(array, fp)) {
+ ret = -EINVAL;
+ goto out;
+ }
}
- return 0;
+out:
+ mutex_unlock(&aux->used_maps_mutex);
+ return ret;
}
static void bpf_prog_select_func(struct bpf_prog *fp)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a2fa006f430e..06065fa27124 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -665,18 +665,17 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
return __bpf_get_stack(regs, task, NULL, buf, size, flags);
}
-BTF_ID_LIST(bpf_get_task_stack_btf_ids)
-BTF_ID(struct, task_struct)
+BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_get_task_stack_btf_ids[0],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
- .btf_id = bpf_get_task_stack_btf_ids,
};
BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 178c147350f5..34268491d2de 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2345,12 +2345,8 @@ void bpf_link_put(struct bpf_link *link)
if (!atomic64_dec_and_test(&link->refcnt))
return;
- if (in_atomic()) {
- INIT_WORK(&link->work, bpf_link_put_deferred);
- schedule_work(&link->work);
- } else {
- bpf_link_free(link);
- }
+ INIT_WORK(&link->work, bpf_link_put_deferred);
+ schedule_work(&link->work);
}
static int bpf_link_release(struct inode *inode, struct file *filp)
@@ -3162,21 +3158,25 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
const struct bpf_map *map;
int i;
+ mutex_lock(&prog->aux->used_maps_mutex);
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
map = prog->aux->used_maps[i];
if (map == (void *)addr) {
*type = BPF_PSEUDO_MAP_FD;
- return map;
+ goto out;
}
if (!map->ops->map_direct_value_meta)
continue;
if (!map->ops->map_direct_value_meta(map, addr, off)) {
*type = BPF_PSEUDO_MAP_VALUE;
- return map;
+ goto out;
}
}
+ map = NULL;
- return NULL;
+out:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+ return map;
}
static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
@@ -3294,6 +3294,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
memcpy(info.tag, prog->tag, sizeof(prog->tag));
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
+ mutex_lock(&prog->aux->used_maps_mutex);
ulen = info.nr_map_ids;
info.nr_map_ids = prog->aux->used_map_cnt;
ulen = min_t(u32, info.nr_map_ids, ulen);
@@ -3303,9 +3304,12 @@ static int bpf_prog_get_info_by_fd(struct file *file,
for (i = 0; i < ulen; i++)
if (put_user(prog->aux->used_maps[i]->id,
- &user_map_ids[i]))
+ &user_map_ids[i])) {
+ mutex_unlock(&prog->aux->used_maps_mutex);
return -EFAULT;
+ }
}
+ mutex_unlock(&prog->aux->used_maps_mutex);
err = set_info_rec_size(&info);
if (err)
@@ -4153,6 +4157,66 @@ static int bpf_iter_create(union bpf_attr *attr)
return err;
}
+#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
+
+static int bpf_prog_bind_map(union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct bpf_map **used_maps_old, **used_maps_new;
+ int i, ret = 0;
+
+ if (CHECK_ATTR(BPF_PROG_BIND_MAP))
+ return -EINVAL;
+
+ if (attr->prog_bind_map.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ map = bpf_map_get(attr->prog_bind_map.map_fd);
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+ goto out_prog_put;
+ }
+
+ mutex_lock(&prog->aux->used_maps_mutex);
+
+ used_maps_old = prog->aux->used_maps;
+
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (used_maps_old[i] == map)
+ goto out_unlock;
+
+ used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
+ sizeof(used_maps_new[0]),
+ GFP_KERNEL);
+ if (!used_maps_new) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(used_maps_new, used_maps_old,
+ sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
+ used_maps_new[prog->aux->used_map_cnt] = map;
+
+ prog->aux->used_map_cnt++;
+ prog->aux->used_maps = used_maps_new;
+
+ kfree(used_maps_old);
+
+out_unlock:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+
+ if (ret)
+ bpf_map_put(map);
+out_prog_put:
+ bpf_prog_put(prog);
+ return ret;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
@@ -4286,6 +4350,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_LINK_DETACH:
err = link_detach(&attr);
break;
+ case BPF_PROG_BIND_MAP:
+ err = bpf_prog_bind_map(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 99af4cea1102..5b6af30bfbcd 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -22,7 +22,8 @@ struct bpf_iter_seq_task_info {
};
static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
- u32 *tid)
+ u32 *tid,
+ bool skip_if_dup_files)
{
struct task_struct *task = NULL;
struct pid *pid;
@@ -36,6 +37,12 @@ retry:
if (!task) {
++*tid;
goto retry;
+ } else if (skip_if_dup_files && task->tgid != task->pid &&
+ task->files == task->group_leader->files) {
+ put_task_struct(task);
+ task = NULL;
+ ++*tid;
+ goto retry;
}
}
rcu_read_unlock();
@@ -48,7 +55,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_task_info *info = seq->private;
struct task_struct *task;
- task = task_seq_get_next(info->common.ns, &info->tid);
+ task = task_seq_get_next(info->common.ns, &info->tid, false);
if (!task)
return NULL;
@@ -65,7 +72,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
++info->tid;
put_task_struct((struct task_struct *)v);
- task = task_seq_get_next(info->common.ns, &info->tid);
+ task = task_seq_get_next(info->common.ns, &info->tid, false);
if (!task)
return NULL;
@@ -148,7 +155,7 @@ again:
curr_files = *fstruct;
curr_fd = info->fd;
} else {
- curr_task = task_seq_get_next(ns, &curr_tid);
+ curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task)
return NULL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86fdebb5ffd8..42dee5dcbc74 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,7 +238,6 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
- u32 btf_id;
};
struct btf *btf_vmlinux;
@@ -436,6 +435,15 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
return type == ARG_PTR_TO_SOCK_COMMON;
}
+static bool arg_type_may_be_null(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
+ type == ARG_PTR_TO_MEM_OR_NULL ||
+ type == ARG_PTR_TO_CTX_OR_NULL ||
+ type == ARG_PTR_TO_SOCKET_OR_NULL ||
+ type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+}
+
/* Determine whether the function releases some resources allocated by another
* function call. The first reference type argument will be assumed to be
* released by release_reference().
@@ -1490,6 +1498,13 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
+ if (code == (BPF_JMP | BPF_CALL) &&
+ insn[i].imm == BPF_FUNC_tail_call &&
+ insn[i].src_reg != BPF_PSEUDO_CALL)
+ subprog[cur_subprog].has_tail_call = true;
+ if (BPF_CLASS(code) == BPF_LD &&
+ (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
+ subprog[cur_subprog].has_ld_abs = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -2979,10 +2994,37 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
+ bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
+ int j;
process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+ * depth for such case down to 256 so that the worst case scenario
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
+ * 8k).
+ *
+ * To get the idea what might happen, see an example:
+ * func1 -> sub rsp, 128
+ * subfunc1 -> sub rsp, 256
+ * tailcall1 -> add rsp, 256
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+ * subfunc2 -> sub rsp, 64
+ * subfunc22 -> sub rsp, 128
+ * tailcall2 -> add rsp, 128
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+ *
+ * tailcall will unwind the current stack frame but it will not get rid
+ * of caller's stack as shown on the example above.
+ */
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
+ verbose(env,
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+ depth);
+ return -EACCES;
+ }
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -3011,6 +3053,10 @@ continue_func:
i);
return -EFAULT;
}
+
+ if (subprog[idx].has_tail_call)
+ tail_call_reachable = true;
+
frame++;
if (frame >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep !\n",
@@ -3019,6 +3065,15 @@ continue_func:
}
goto process_func;
}
+ /* if tail call got detected across bpf2bpf calls then mark each of the
+ * currently present subprog frames as tail call reachable subprogs;
+ * this info will be utilized by JIT so that we will be preserving the
+ * tail call counter throughout bpf2bpf calls combined with tailcalls
+ */
+ if (tail_call_reachable)
+ for (j = 0; j < frame; j++)
+ subprog[ret_prog[j]].tail_call_reachable = true;
+
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
*/
@@ -3594,18 +3649,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
struct bpf_func_state *state = func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
- if (reg->type != PTR_TO_STACK) {
- /* Allow zero-byte read from NULL, regardless of pointer type */
- if (zero_size_allowed && access_size == 0 &&
- register_is_null(reg))
- return 0;
-
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type],
- reg_type_str[PTR_TO_STACK]);
- return -EACCES;
- }
-
if (tnum_is_const(reg->var_off)) {
min_off = max_off = reg->var_off.value + reg->off;
err = __check_stack_boundary(env, regno, min_off, access_size,
@@ -3750,9 +3793,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
access_size, zero_size_allowed,
"rdwr",
&env->prog->aux->max_rdwr_access);
- default: /* scalar_value|ptr_to_stack or invalid ptr */
+ case PTR_TO_STACK:
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
+ default: /* scalar_value or invalid ptr */
+ /* Allow zero-byte read from NULL, regardless of pointer type */
+ if (zero_size_allowed && access_size == 0 &&
+ register_is_null(reg))
+ return 0;
+
+ verbose(env, "R%d type=%s expected=%s\n", regno,
+ reg_type_str[reg->type],
+ reg_type_str[PTR_TO_STACK]);
+ return -EACCES;
}
}
@@ -3784,10 +3837,6 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
- if (reg->type != PTR_TO_MAP_VALUE) {
- verbose(env, "R%d is not a pointer to map_value\n", regno);
- return -EINVAL;
- }
if (!is_const) {
verbose(env,
"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
@@ -3854,12 +3903,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
-static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_ALLOC_MEM ||
- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
-}
-
static bool arg_type_is_alloc_size(enum bpf_arg_type type)
{
return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
@@ -3908,14 +3951,114 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
return 0;
}
+struct bpf_reg_types {
+ const enum bpf_reg_type types[10];
+};
+
+static const struct bpf_reg_types map_key_value_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types sock_types = {
+ .types = {
+ PTR_TO_SOCK_COMMON,
+ PTR_TO_SOCKET,
+ PTR_TO_TCP_SOCK,
+ PTR_TO_XDP_SOCK,
+ },
+};
+
+static const struct bpf_reg_types mem_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ PTR_TO_MEM,
+ PTR_TO_RDONLY_BUF,
+ PTR_TO_RDWR_BUF,
+ },
+};
+
+static const struct bpf_reg_types int_ptr_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
+static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
+static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
+static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
+static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+
+static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
+ [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
+ [ARG_CONST_SIZE] = &scalar_types,
+ [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
+ [ARG_PTR_TO_CTX] = &context_types,
+ [ARG_PTR_TO_CTX_OR_NULL] = &context_types,
+ [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+ [ARG_PTR_TO_SOCKET] = &fullsock_types,
+ [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
+ [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
+ [ARG_PTR_TO_MEM] = &mem_types,
+ [ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
+ [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+ [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
+ [ARG_PTR_TO_INT] = &int_ptr_types,
+ [ARG_PTR_TO_LONG] = &int_ptr_types,
+};
+
+static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
+ const struct bpf_reg_types *compatible)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ enum bpf_reg_type expected, type = reg->type;
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
+ expected = compatible->types[i];
+ if (expected == NOT_INIT)
+ break;
+
+ if (type == expected)
+ return 0;
+ }
+
+ verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
+ for (j = 0; j + 1 < i; j++)
+ verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
+ verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
+ return -EACCES;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- enum bpf_reg_type expected_type, type = reg->type;
enum bpf_arg_type arg_type = fn->arg_type[arg];
+ const struct bpf_reg_types *compatible;
+ enum bpf_reg_type type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -3948,125 +4091,48 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return err;
}
- if (arg_type == ARG_PTR_TO_MAP_KEY ||
- arg_type == ARG_PTR_TO_MAP_VALUE ||
- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
- expected_type = PTR_TO_STACK;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_SIZE ||
- arg_type == ARG_CONST_SIZE_OR_ZERO ||
- arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
- expected_type = SCALAR_VALUE;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_MAP_PTR) {
- expected_type = CONST_PTR_TO_MAP;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_PTR_TO_CTX ||
- arg_type == ARG_PTR_TO_CTX_OR_NULL) {
- expected_type = PTR_TO_CTX;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- err = check_ctx_reg(env, reg, regno);
- if (err < 0)
- return err;
+ if (register_is_null(reg) && arg_type_may_be_null(arg_type))
+ /* A NULL register has a SCALAR_VALUE type, so skip
+ * type checking.
+ */
+ goto skip_type_check;
+
+ compatible = compatible_reg_types[arg_type];
+ if (!compatible) {
+ verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+ return -EFAULT;
+ }
+
+ err = check_reg_type(env, regno, compatible);
+ if (err)
+ return err;
+
+ if (type == PTR_TO_BTF_ID) {
+ const u32 *btf_id = fn->arg_btf_id[arg];
+
+ if (!btf_id) {
+ verbose(env, "verifier internal error: missing BTF ID\n");
+ return -EFAULT;
}
- } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
- expected_type = PTR_TO_SOCK_COMMON;
- /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
- if (!type_is_sk_pointer(type))
- goto err_type;
- if (reg->ref_obj_id) {
- if (meta->ref_obj_id) {
- verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
- regno, reg->ref_obj_id,
- meta->ref_obj_id);
- return -EFAULT;
- }
- meta->ref_obj_id = reg->ref_obj_id;
- }
- } else if (arg_type == ARG_PTR_TO_SOCKET ||
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
- expected_type = PTR_TO_SOCKET;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- }
- } else if (arg_type == ARG_PTR_TO_BTF_ID) {
- bool ids_match = false;
-
- expected_type = PTR_TO_BTF_ID;
- if (type != expected_type)
- goto err_type;
- if (!fn->check_btf_id) {
- if (reg->btf_id != meta->btf_id) {
- ids_match = btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
- meta->btf_id);
- if (!ids_match) {
- verbose(env, "Helper has type %s got %s in R%d\n",
- kernel_type_name(meta->btf_id),
- kernel_type_name(reg->btf_id), regno);
- return -EACCES;
- }
- }
- } else if (!fn->check_btf_id(reg->btf_id, arg)) {
- verbose(env, "Helper does not support %s in R%d\n",
- kernel_type_name(reg->btf_id), regno);
+ if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *btf_id)) {
+ verbose(env, "R%d is of type %s but %s is expected\n",
+ regno, kernel_type_name(reg->btf_id), kernel_type_name(*btf_id));
return -EACCES;
}
- if ((reg->off && !ids_match) || !tnum_is_const(reg->var_off) || reg->var_off.value) {
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
}
- } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
- if (meta->func_id == BPF_FUNC_spin_lock) {
- if (process_spin_lock(env, regno, true))
- return -EACCES;
- } else if (meta->func_id == BPF_FUNC_spin_unlock) {
- if (process_spin_lock(env, regno, false))
- return -EACCES;
- } else {
- verbose(env, "verifier internal error\n");
- return -EFAULT;
- }
- } else if (arg_type_is_mem_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- /* One exception here. In case function allows for NULL to be
- * passed in as argument, it's a SCALAR_VALUE type. Final test
- * happens during stack boundary checking.
- */
- if (register_is_null(reg) &&
- (arg_type == ARG_PTR_TO_MEM_OR_NULL ||
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != PTR_TO_MEM &&
- type != PTR_TO_RDONLY_BUF &&
- type != PTR_TO_RDWR_BUF &&
- type != expected_type)
- goto err_type;
- meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
- } else if (arg_type_is_alloc_mem_ptr(arg_type)) {
- expected_type = PTR_TO_MEM;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (type != expected_type)
- goto err_type;
+ } else if (type == PTR_TO_CTX) {
+ err = check_ctx_reg(env, reg, regno);
+ if (err < 0)
+ return err;
+ }
+
+skip_type_check:
+ if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
@@ -4074,15 +4140,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
- } else if (arg_type_is_int_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else {
- verbose(env, "unsupported arg_type %d\n", arg_type);
- return -EFAULT;
}
if (arg_type == ARG_CONST_MAP_PTR) {
@@ -4121,6 +4178,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
+ } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+ if (meta->func_id == BPF_FUNC_spin_lock) {
+ if (process_spin_lock(env, regno, true))
+ return -EACCES;
+ } else if (meta->func_id == BPF_FUNC_spin_unlock) {
+ if (process_spin_lock(env, regno, false))
+ return -EACCES;
+ } else {
+ verbose(env, "verifier internal error\n");
+ return -EFAULT;
+ }
+ } else if (arg_type_is_mem_ptr(arg_type)) {
+ /* The access to this pointer is only checked when we hit the
+ * next is_mem_size argument below.
+ */
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -4186,10 +4259,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
return err;
-err_type:
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[type], reg_type_str[expected_type]);
- return -EACCES;
}
static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
@@ -4224,6 +4293,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
return false;
}
+static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
+{
+ return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+}
+
static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
@@ -4339,8 +4413,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
- if (env->subprog_cnt > 1) {
- verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
return -EINVAL;
}
break;
@@ -4495,10 +4569,22 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
return count <= 1;
}
+static bool check_btf_id_ok(const struct bpf_func_proto *fn)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++)
+ if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
+ return false;
+
+ return true;
+}
+
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
+ check_btf_id_ok(fn) &&
check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
@@ -4894,11 +4980,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (!fn->check_btf_id) {
- err = btf_resolve_helper_id(&env->log, fn, i);
- if (err > 0)
- meta.btf_id = err;
- }
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@@ -5317,6 +5398,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case CONST_PTR_TO_MAP:
+ /* smin_val represents the known value */
+ if (known && smin_val == 0 && opcode == BPF_ADD)
+ break;
+ /* fall-through */
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -7461,18 +7546,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
- if (env->subprog_cnt > 1) {
- /* when program has LD_ABS insn JITs and interpreter assume
- * that r1 == ctx == skb which is not the case for callees
- * that can have arbitrary arguments. It's problematic
- * for main prog as well since JITs would need to analyze
- * all functions in order to make proper register save/restore
- * decisions in the main prog. Hence disallow LD_ABS with calls
- */
- verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
- return -EINVAL;
- }
-
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -7883,6 +7956,23 @@ err_free:
return ret;
}
+static int check_abnormal_return(struct bpf_verifier_env *env)
+{
+ int i;
+
+ for (i = 1; i < env->subprog_cnt; i++) {
+ if (env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ if (env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
@@ -7891,20 +7981,24 @@ static int check_btf_func(struct bpf_verifier_env *env,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ const struct btf_type *type, *func_proto, *ret_type;
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
struct bpf_func_info_aux *info_aux = NULL;
- const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
void __user *urecord;
u32 prev_offset = 0;
+ bool scalar_return;
int ret = -ENOMEM;
nfuncs = attr->func_info_cnt;
- if (!nfuncs)
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
if (nfuncs != env->subprog_cnt) {
verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
@@ -7952,25 +8046,23 @@ static int check_btf_func(struct bpf_verifier_env *env,
}
/* check insn_off */
+ ret = -EINVAL;
if (i == 0) {
if (krecord[i].insn_off) {
verbose(env,
"nonzero insn_off %u for the first func info record",
krecord[i].insn_off);
- ret = -EINVAL;
goto err_free;
}
} else if (krecord[i].insn_off <= prev_offset) {
verbose(env,
"same or smaller insn offset (%u) than previous func info record (%u)",
krecord[i].insn_off, prev_offset);
- ret = -EINVAL;
goto err_free;
}
if (env->subprog_info[i].start != krecord[i].insn_off) {
verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
- ret = -EINVAL;
goto err_free;
}
@@ -7979,10 +8071,26 @@ static int check_btf_func(struct bpf_verifier_env *env,
if (!type || !btf_type_is_func(type)) {
verbose(env, "invalid type id %d in func info",
krecord[i].type_id);
- ret = -EINVAL;
goto err_free;
}
info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+
+ func_proto = btf_type_by_id(btf, type->type);
+ if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
+ /* btf_func_check() already verified it during BTF load */
+ goto err_free;
+ ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ scalar_return =
+ btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+ if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
@@ -8143,8 +8251,11 @@ static int check_btf_info(struct bpf_verifier_env *env,
struct btf *btf;
int err;
- if (!attr->func_info_cnt && !attr->line_info_cnt)
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
btf = btf_get_by_fd(attr->prog_btf_fd);
if (IS_ERR(btf))
@@ -9619,6 +9730,18 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
}
}
+static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ int i, sz = prog->aux->size_poke_tab;
+ struct bpf_jit_poke_descriptor *desc;
+
+ for (i = 0; i < sz; i++) {
+ desc = &tab[i];
+ desc->insn_idx += len - 1;
+ }
+}
+
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -9635,6 +9758,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
+ adjust_poke_descs(new_prog, len);
return new_prog;
}
@@ -10165,6 +10289,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_map *map_ptr;
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
@@ -10232,6 +10357,31 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
+ for (j = 0; j < prog->aux->size_poke_tab; j++) {
+ u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
+ int ret;
+
+ if (!(insn_idx >= subprog_start &&
+ insn_idx <= subprog_end))
+ continue;
+
+ ret = bpf_jit_add_poke_descriptor(func[i],
+ &prog->aux->poke_tab[j]);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ goto out_free;
+ }
+
+ func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
+
+ map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
+ ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ goto out_free;
+ }
+ }
+
/* Use bpf_prog_F_tag to indicate functions in stack traces.
* Long term would need debug info to populate names
*/
@@ -10250,6 +10400,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
num_exentries++;
}
func[i]->aux->num_exentries = num_exentries;
+ func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
@@ -10257,6 +10408,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
cond_resched();
}
+
+ /* Untrack main program's aux structs so that during map_poke_run()
+ * we will not stumble upon the unfilled poke descriptors; each
+ * of the main program's poke descs got distributed across subprogs
+ * and got tracked onto map, so we are sure that none of them will
+ * be missed after the operation below
+ */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+
+ map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
+ }
+
/* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
@@ -10325,9 +10489,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_free_unused_jited_linfo(prog);
return 0;
out_free:
- for (i = 0; i < env->subprog_cnt; i++)
- if (func[i])
- bpf_jit_free(func[i]);
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (!func[i])
+ continue;
+
+ for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
+ map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
+ map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
+ }
+ bpf_jit_free(func[i]);
+ }
kfree(func);
out_undo_insn:
/* cleanup main prog to be interpreted */
@@ -10361,6 +10532,13 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
+ /* When JIT fails the progs with bpf2bpf calls and tail_calls
+ * have to be rejected, since interpreter doesn't support them yet.
+ */
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
for (i = 0; i < prog->len; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
@@ -10524,8 +10702,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
- env->prog->aux->stack_depth = MAX_BPF_STACK;
- env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+ if (!allow_tail_call_in_subprogs(env))
+ prog->aux->stack_depth = MAX_BPF_STACK;
+ prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
@@ -10545,6 +10724,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
.reason = BPF_POKE_REASON_TAIL_CALL,
.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
.tail_call.key = bpf_map_key_immediate(aux),
+ .insn_idx = i + delta,
};
ret = bpf_jit_add_poke_descriptor(prog, &desc);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b2a5380eb187..36508f46a8db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -743,19 +743,18 @@ out:
return err;
}
-BTF_ID_LIST(bpf_seq_printf_btf_ids)
-BTF_ID(struct, seq_file)
+BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
static const struct bpf_func_proto bpf_seq_printf_proto = {
.func = bpf_seq_printf,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_printf_btf_ids,
};
BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
@@ -763,17 +762,14 @@ BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
return seq_write(m, data, len) ? -EOVERFLOW : 0;
}
-BTF_ID_LIST(bpf_seq_write_btf_ids)
-BTF_ID(struct, seq_file)
-
static const struct bpf_func_proto bpf_seq_write_proto = {
.func = bpf_seq_write,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_write_btf_ids,
};
static __always_inline int
@@ -1118,6 +1114,14 @@ BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
}
BTF_SET_START(btf_allowlist_d_path)
+#ifdef CONFIG_SECURITY
+BTF_ID(func, security_file_permission)
+BTF_ID(func, security_inode_getattr)
+BTF_ID(func, security_file_open)
+#endif
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, security_path_truncate)
+#endif
BTF_ID(func, vfs_truncate)
BTF_ID(func, vfs_fallocate)
BTF_ID(func, dentry_open)
@@ -1130,17 +1134,16 @@ static bool bpf_d_path_allowed(const struct bpf_prog *prog)
return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id);
}
-BTF_ID_LIST(bpf_d_path_btf_ids)
-BTF_ID(struct, path)
+BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
static const struct bpf_func_proto bpf_d_path_proto = {
.func = bpf_d_path,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_d_path_btf_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_d_path_btf_ids,
.allowed = bpf_d_path_allowed,
};