aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c336
1 files changed, 280 insertions, 56 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 86299a292214..8f50c9c19f1b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/bpf_lirc.h>
+#include <linux/bpf_verifier.h>
#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
@@ -29,6 +30,7 @@
#include <linux/bpf_lsm.h>
#include <linux/poll.h>
#include <linux/bpf-netns.h>
+#include <linux/rcupdate_trace.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -90,6 +92,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
}
const struct bpf_map_ops bpf_map_offload_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = bpf_map_offload_map_alloc,
.map_free = bpf_map_offload_map_free,
.map_check_btf = map_check_no_btf,
@@ -157,10 +160,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
if (bpf_map_is_dev_bound(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
- map->map_type == BPF_MAP_TYPE_SOCKHASH ||
- map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
+ } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+ map->map_type == BPF_MAP_TYPE_SOCKMAP) {
+ return sock_map_update_elem_sys(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
return bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
@@ -768,7 +772,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
- map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
return -ENOTSUPP;
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
map->value_size) {
@@ -1728,10 +1733,14 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
btf_put(prog->aux->btf);
bpf_prog_free_linfo(prog);
- if (deferred)
- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
- else
+ if (deferred) {
+ if (prog->aux->sleepable)
+ call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
+ else
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ } else {
__bpf_prog_put_rcu(&prog->aux->rcu);
+ }
}
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
@@ -2029,7 +2038,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
return -EINVAL;
- /* fallthrough */
+ fallthrough;
default:
return 0;
}
@@ -2101,6 +2110,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_STATE_FREQ |
+ BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32))
return -EINVAL;
@@ -2145,17 +2155,18 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->attach_btf_id = attr->attach_btf_id;
if (attr->attach_prog_fd) {
- struct bpf_prog *tgt_prog;
+ struct bpf_prog *dst_prog;
- tgt_prog = bpf_prog_get(attr->attach_prog_fd);
- if (IS_ERR(tgt_prog)) {
- err = PTR_ERR(tgt_prog);
+ dst_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(dst_prog)) {
+ err = PTR_ERR(dst_prog);
goto free_prog_nouncharge;
}
- prog->aux->linked_prog = tgt_prog;
+ prog->aux->dst_prog = dst_prog;
}
prog->aux->offload_requested = !!attr->prog_ifindex;
+ prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
err = security_bpf_prog_alloc(prog->aux);
if (err)
@@ -2488,11 +2499,23 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
struct bpf_tracing_link {
struct bpf_link link;
enum bpf_attach_type attach_type;
+ struct bpf_trampoline *trampoline;
+ struct bpf_prog *tgt_prog;
};
static void bpf_tracing_link_release(struct bpf_link *link)
{
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
+ struct bpf_tracing_link *tr_link =
+ container_of(link, struct bpf_tracing_link, link);
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
+ tr_link->trampoline));
+
+ bpf_trampoline_put(tr_link->trampoline);
+
+ /* tgt_prog is NULL if target is a kernel function */
+ if (tr_link->tgt_prog)
+ bpf_prog_put(tr_link->tgt_prog);
}
static void bpf_tracing_link_dealloc(struct bpf_link *link)
@@ -2532,10 +2555,15 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
.fill_link_info = bpf_tracing_link_fill_link_info,
};
-static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+static int bpf_tracing_prog_attach(struct bpf_prog *prog,
+ int tgt_prog_fd,
+ u32 btf_id)
{
struct bpf_link_primer link_primer;
+ struct bpf_prog *tgt_prog = NULL;
+ struct bpf_trampoline *tr = NULL;
struct bpf_tracing_link *link;
+ u64 key = 0;
int err;
switch (prog->type) {
@@ -2564,6 +2592,28 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
goto out_put_prog;
}
+ if (!!tgt_prog_fd != !!btf_id) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ if (tgt_prog_fd) {
+ /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
+ if (prog->type != BPF_PROG_TYPE_EXT) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ tgt_prog = bpf_prog_get(tgt_prog_fd);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ tgt_prog = NULL;
+ goto out_put_prog;
+ }
+
+ key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+ }
+
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
@@ -2573,20 +2623,100 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
&bpf_tracing_link_lops, prog);
link->attach_type = prog->expected_attach_type;
- err = bpf_link_prime(&link->link, &link_primer);
- if (err) {
- kfree(link);
- goto out_put_prog;
+ mutex_lock(&prog->aux->dst_mutex);
+
+ /* There are a few possible cases here:
+ *
+ * - if prog->aux->dst_trampoline is set, the program was just loaded
+ * and not yet attached to anything, so we can use the values stored
+ * in prog->aux
+ *
+ * - if prog->aux->dst_trampoline is NULL, the program has already been
+ * attached to a target and its initial target was cleared (below)
+ *
+ * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
+ * target_btf_id using the link_create API.
+ *
+ * - if tgt_prog == NULL when this function was called using the old
+ * raw_tracepoint_open API, and we need a target from prog->aux
+ *
+ * The combination of no saved target in prog->aux, and no target
+ * specified on load is illegal, and we reject that here.
+ */
+ if (!prog->aux->dst_trampoline && !tgt_prog) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (!prog->aux->dst_trampoline ||
+ (key && key != prog->aux->dst_trampoline->key)) {
+ /* If there is no saved target, or the specified target is
+ * different from the destination specified at load time, we
+ * need a new trampoline and a check for compatibility
+ */
+ struct bpf_attach_target_info tgt_info = {};
+
+ err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
+ &tgt_info);
+ if (err)
+ goto out_unlock;
+
+ tr = bpf_trampoline_get(key, &tgt_info);
+ if (!tr) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ } else {
+ /* The caller didn't specify a target, or the target was the
+ * same as the destination supplied during program load. This
+ * means we can reuse the trampoline and reference from program
+ * load time, and there is no need to allocate a new one. This
+ * can only happen once for any program, as the saved values in
+ * prog->aux are cleared below.
+ */
+ tr = prog->aux->dst_trampoline;
+ tgt_prog = prog->aux->dst_prog;
}
- err = bpf_trampoline_link_prog(prog);
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto out_unlock;
+
+ err = bpf_trampoline_link_prog(prog, tr);
if (err) {
bpf_link_cleanup(&link_primer);
- goto out_put_prog;
+ link = NULL;
+ goto out_unlock;
}
+ link->tgt_prog = tgt_prog;
+ link->trampoline = tr;
+
+ /* Always clear the trampoline and target prog from prog->aux to make
+ * sure the original attach destination is not kept alive after a
+ * program is (re-)attached to another target.
+ */
+ if (prog->aux->dst_prog &&
+ (tgt_prog_fd || tr != prog->aux->dst_trampoline))
+ /* got extra prog ref from syscall, or attaching to different prog */
+ bpf_prog_put(prog->aux->dst_prog);
+ if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
+ /* we allocated a new trampoline, so free the old one */
+ bpf_trampoline_put(prog->aux->dst_trampoline);
+
+ prog->aux->dst_prog = NULL;
+ prog->aux->dst_trampoline = NULL;
+ mutex_unlock(&prog->aux->dst_mutex);
+
return bpf_link_settle(&link_primer);
+out_unlock:
+ if (tr && tr != prog->aux->dst_trampoline)
+ bpf_trampoline_put(tr);
+ mutex_unlock(&prog->aux->dst_mutex);
+ kfree(link);
out_put_prog:
+ if (tgt_prog_fd && tgt_prog)
+ bpf_prog_put(tgt_prog);
bpf_prog_put(prog);
return err;
}
@@ -2634,7 +2764,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
u32 ulen = info->raw_tracepoint.tp_name_len;
size_t tp_len = strlen(tp_name);
- if (ulen && !ubuf)
+ if (!ulen ^ !ubuf)
return -EINVAL;
info->raw_tracepoint.tp_name_len = tp_len + 1;
@@ -2700,7 +2830,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
tp_name = prog->aux->attach_func_name;
break;
}
- return bpf_tracing_prog_attach(prog);
+ return bpf_tracing_prog_attach(prog, 0, 0);
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
if (strncpy_from_user(buf,
@@ -2783,7 +2913,6 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
return BPF_PROG_TYPE_CGROUP_SKB;
- break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET_SOCK_RELEASE:
case BPF_CGROUP_INET4_POST_BIND:
@@ -2969,7 +3098,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
}
}
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -3152,21 +3281,25 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
const struct bpf_map *map;
int i;
+ mutex_lock(&prog->aux->used_maps_mutex);
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
map = prog->aux->used_maps[i];
if (map == (void *)addr) {
*type = BPF_PSEUDO_MAP_FD;
- return map;
+ goto out;
}
if (!map->ops->map_direct_value_meta)
continue;
if (!map->ops->map_direct_value_meta(map, addr, off)) {
*type = BPF_PSEUDO_MAP_VALUE;
- return map;
+ goto out;
}
}
+ map = NULL;
- return NULL;
+out:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+ return map;
}
static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
@@ -3284,6 +3417,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
memcpy(info.tag, prog->tag, sizeof(prog->tag));
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
+ mutex_lock(&prog->aux->used_maps_mutex);
ulen = info.nr_map_ids;
info.nr_map_ids = prog->aux->used_map_cnt;
ulen = min_t(u32, info.nr_map_ids, ulen);
@@ -3293,9 +3427,12 @@ static int bpf_prog_get_info_by_fd(struct file *file,
for (i = 0; i < ulen; i++)
if (put_user(prog->aux->used_maps[i]->id,
- &user_map_ids[i]))
+ &user_map_ids[i])) {
+ mutex_unlock(&prog->aux->used_maps_mutex);
return -EFAULT;
+ }
}
+ mutex_unlock(&prog->aux->used_maps_mutex);
err = set_info_rec_size(&info);
if (err)
@@ -3876,10 +4013,15 @@ err_put:
static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
- if (attr->link_create.attach_type == BPF_TRACE_ITER &&
- prog->expected_attach_type == BPF_TRACE_ITER)
- return bpf_iter_link_attach(attr, prog);
+ if (attr->link_create.attach_type != prog->expected_attach_type)
+ return -EINVAL;
+ if (prog->expected_attach_type == BPF_TRACE_ITER)
+ return bpf_iter_link_attach(attr, prog);
+ else if (prog->type == BPF_PROG_TYPE_EXT)
+ return bpf_tracing_prog_attach(prog,
+ attr->link_create.target_fd,
+ attr->link_create.target_btf_id);
return -EINVAL;
}
@@ -3893,18 +4035,25 @@ static int link_create(union bpf_attr *attr)
if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;
- ptype = attach_type_to_prog_type(attr->link_create.attach_type);
- if (ptype == BPF_PROG_TYPE_UNSPEC)
- return -EINVAL;
-
- prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
+ prog = bpf_prog_get(attr->link_create.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
ret = bpf_prog_attach_check_attach_type(prog,
attr->link_create.attach_type);
if (ret)
- goto err_out;
+ goto out;
+
+ if (prog->type == BPF_PROG_TYPE_EXT) {
+ ret = tracing_bpf_link_attach(attr, prog);
+ goto out;
+ }
+
+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+ if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+ ret = -EINVAL;
+ goto out;
+ }
switch (ptype) {
case BPF_PROG_TYPE_CGROUP_SKB:
@@ -3932,7 +4081,7 @@ static int link_create(union bpf_attr *attr)
ret = -EINVAL;
}
-err_out:
+out:
if (ret < 0)
bpf_prog_put(prog);
return ret;
@@ -4014,40 +4163,50 @@ static int link_detach(union bpf_attr *attr)
return ret;
}
-static int bpf_link_inc_not_zero(struct bpf_link *link)
+static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
{
- return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
+ return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
}
-#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
-
-static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
+struct bpf_link *bpf_link_by_id(u32 id)
{
struct bpf_link *link;
- u32 id = attr->link_id;
- int fd, err;
-
- if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
- return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!id)
+ return ERR_PTR(-ENOENT);
spin_lock_bh(&link_idr_lock);
- link = idr_find(&link_idr, id);
/* before link is "settled", ID is 0, pretend it doesn't exist yet */
+ link = idr_find(&link_idr, id);
if (link) {
if (link->id)
- err = bpf_link_inc_not_zero(link);
+ link = bpf_link_inc_not_zero(link);
else
- err = -EAGAIN;
+ link = ERR_PTR(-EAGAIN);
} else {
- err = -ENOENT;
+ link = ERR_PTR(-ENOENT);
}
spin_unlock_bh(&link_idr_lock);
+ return link;
+}
- if (err)
- return err;
+#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
+
+static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ u32 id = attr->link_id;
+ int fd;
+
+ if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ link = bpf_link_by_id(id);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
fd = bpf_link_new_fd(link);
if (fd < 0)
@@ -4133,6 +4292,68 @@ static int bpf_iter_create(union bpf_attr *attr)
return err;
}
+#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
+
+static int bpf_prog_bind_map(union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct bpf_map **used_maps_old, **used_maps_new;
+ int i, ret = 0;
+
+ if (CHECK_ATTR(BPF_PROG_BIND_MAP))
+ return -EINVAL;
+
+ if (attr->prog_bind_map.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ map = bpf_map_get(attr->prog_bind_map.map_fd);
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+ goto out_prog_put;
+ }
+
+ mutex_lock(&prog->aux->used_maps_mutex);
+
+ used_maps_old = prog->aux->used_maps;
+
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (used_maps_old[i] == map) {
+ bpf_map_put(map);
+ goto out_unlock;
+ }
+
+ used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
+ sizeof(used_maps_new[0]),
+ GFP_KERNEL);
+ if (!used_maps_new) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(used_maps_new, used_maps_old,
+ sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
+ used_maps_new[prog->aux->used_map_cnt] = map;
+
+ prog->aux->used_map_cnt++;
+ prog->aux->used_maps = used_maps_new;
+
+ kfree(used_maps_old);
+
+out_unlock:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+
+ if (ret)
+ bpf_map_put(map);
+out_prog_put:
+ bpf_prog_put(prog);
+ return ret;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
@@ -4266,6 +4487,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_LINK_DETACH:
err = link_detach(&attr);
break;
+ case BPF_PROG_BIND_MAP:
+ err = bpf_prog_bind_map(&attr);
+ break;
default:
err = -EINVAL;
break;