aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-11-09 10:45:45 -0800
committerAlexei Starovoitov <ast@kernel.org>2023-11-09 19:07:52 -0800
commit3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5 (patch)
treec234716003d73512a51a9e4e43b0a3cecf0c38ef /kernel
parentbpf: replace register_is_const() with is_reg_const() (diff)
parentselftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld (diff)
downloadlinux-rng-3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5.tar.xz
linux-rng-3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5.zip
Merge branch 'allow-bpf_refcount_acquire-of-mapval-obtained-via-direct-ld'
Dave Marchevsky says: ==================== Allow bpf_refcount_acquire of mapval obtained via direct LD Consider this BPF program: struct cgv_node { int d; struct bpf_refcount r; struct bpf_rb_node rb; }; struct val_stash { struct cgv_node __kptr *v; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct val_stash); __uint(max_entries, 10); } array_map SEC(".maps"); long bpf_program(void *ctx) { struct val_stash *mapval; struct cgv_node *p; int idx = 0; mapval = bpf_map_lookup_elem(&array_map, &idx); if (!mapval || !mapval->v) { /* omitted */ } p = bpf_refcount_acquire(mapval->v); /* Verification FAILs here */ /* Add p to some tree */ return 0; } Verification fails on the refcount_acquire: 160: (79) r1 = *(u64 *)(r8 +8) ; R1_w=untrusted_ptr_or_null_cgv_node(id=11,off=0,imm=0) R8_w=map_value(id=10,off=0,ks=8,vs=16,imm=0) refs=6 161: (b7) r2 = 0 ; R2_w=0 refs=6 162: (85) call bpf_refcount_acquire_impl#117824 arg#0 is neither owning or non-owning ref The above verifier dump is actually from sched_ext's scx_flatcg [0], which is the motivating usecase for this series' changes. Specifically, scx_flatcg stashes a rb_node type w/ cgroup-specific info (struct cgv_node) in a map when the cgroup is created, then later puts that cgroup's node in a rbtree in .enqueue . Making struct cgv_node refcounted would simplify the code a bit by virtue of allowing us to remove the kptr_xchg's, but "later puts that cgroups node in a rbtree" is not possible without a refcount_acquire, which suffers from the above verification failure. If we get rid of PTR_UNTRUSTED flag, and add MEM_ALLOC | NON_OWN_REF, mapval->v would be a non-owning ref and verification would succeed. Due to the most recent set of refcount changes [1], which modified bpf_obj_drop behavior to not reuse refcounted graph node's underlying memory until after RCU grace period, this is safe to do. Once mapval->v has the correct flags it _is_ a non-owning reference and verification of the motivating example will succeed. [0]: https://github.com/sched-ext/sched_ext/blob/52911e1040a0f94b9c426dddcc00be5364a7ae9f/tools/sched_ext/scx_flatcg.bpf.c#L275 [1]: https://lore.kernel.org/bpf/20230821193311.3290257-1-davemarchevsky@fb.com/ Summary of patches: * Patch 1 fixes an issue with bpf_refcount_acquire verification letting MAYBE_NULL ptrs through * Patch 2 tests Patch 1's fix * Patch 3 broadens the use of "free only after RCU GP" to all user-allocated types * Patch 4 is a small nonfunctional refactoring * Patch 5 changes verifier to mark direct LD of stashed graph node kptr as non-owning ref * Patch 6 tests Patch 5's verifier changes Changelog: v1 -> v2: https://lore.kernel.org/bpf/20231025214007.2920506-1-davemarchevsky@fb.com/ Series title changed to "Allow bpf_refcount_acquire of mapval obtained via direct LD". V1's title was mistakenly truncated. * Patch 5 ("bpf: Mark direct ld of stashed bpf_{rb,list}_node as non-owning ref") * Direct LD of percpu kptr should not have MEM_ALLOC flag (Yonghong) * Patch 6 ("selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld") * Test read from stashed value as well ==================== Link: https://lore.kernel.org/r/20231107085639.3016113-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c11
-rw-r--r--kernel/bpf/helpers.c7
-rw-r--r--kernel/bpf/verifier.c36
3 files changed, 37 insertions, 17 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15d71d2986d3..63cf4128fc05 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3840,9 +3840,6 @@ end:
return ERR_PTR(ret);
}
-#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
-#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
-
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
@@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
+ if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* to check ownership cycle for a type unless it's also a
* node type.
*/
- if (!(rec->field_mask & GRAPH_NODE_MASK))
+ if (!(rec->field_mask & BPF_GRAPH_NODE))
continue;
/* We need to ensure ownership acyclicity among all types. The
@@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* - A is both an root and node.
* - B is only an node.
*/
- if (meta->record->field_mask & GRAPH_ROOT_MASK)
+ if (meta->record->field_mask & BPF_GRAPH_ROOT)
return -ELOOP;
}
return 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 174f02a9e703..03517db5cfb3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
ma = &bpf_global_percpu_ma;
else
ma = &bpf_global_ma;
- if (rec && rec->refcount_off >= 0)
- bpf_mem_free_rcu(ma, p);
- else
- bpf_mem_free(ma, p);
+ bpf_mem_free_rcu(ma, p);
}
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -2520,7 +2517,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 993e4677bbe9..9ae6eae13471 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5557,10 +5557,23 @@ BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
{
if (!btf_is_kernel(btf))
- return false;
+ return true;
return btf_id_set_contains(&rcu_protected_types, btf_id);
}
+static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
+{
+ struct btf_struct_meta *meta;
+
+ if (btf_is_kernel(kptr_field->kptr.btf))
+ return NULL;
+
+ meta = btf_find_struct_meta(kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id);
+
+ return meta ? meta->record : NULL;
+}
+
static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
@@ -5571,12 +5584,25 @@ static bool rcu_safe_kptr(const struct btf_field *field)
static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
{
+ struct btf_record *rec;
+ u32 ret;
+
+ ret = PTR_MAYBE_NULL;
if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
- if (kptr_field->type != BPF_KPTR_PERCPU)
- return PTR_MAYBE_NULL | MEM_RCU;
- return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+ ret |= MEM_RCU;
+ if (kptr_field->type == BPF_KPTR_PERCPU)
+ ret |= MEM_PERCPU;
+ else if (!btf_is_kernel(kptr_field->kptr.btf))
+ ret |= MEM_ALLOC;
+
+ rec = kptr_pointee_btf_record(kptr_field);
+ if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
+ ret |= NON_OWN_REF;
+ } else {
+ ret |= PTR_UNTRUSTED;
}
- return PTR_MAYBE_NULL | PTR_UNTRUSTED;
+
+ return ret;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,