diff options
author | 2024-12-04 08:38:30 -0800 | |
---|---|---|
committer | 2024-12-04 08:38:30 -0800 | |
commit | 932fc2f19b740da117cfbabcbfe5985f16f79abb (patch) | |
tree | b67f6c5e56b31a5fce3869f1ea8c0bf29f42e505 | |
parent | selftests/bpf: ensure proper root namespace cleanup when test fail (diff) | |
parent | selftests/bpf: Add IRQ save/restore tests (diff) | |
download | wireguard-linux-932fc2f19b740da117cfbabcbfe5985f16f79abb.tar.xz wireguard-linux-932fc2f19b740da117cfbabcbfe5985f16f79abb.zip |
Merge branch 'irq-save-restore'
Kumar Kartikeya Dwivedi says:
====================
IRQ save/restore
This set introduces support for managing IRQ state from BPF programs.
Two new kfuncs, bpf_local_irq_save, and bpf_local_irq_restore are
introduced to enable this functionality.
Intended use cases are writing IRQ safe data structures (e.g. memory
allocator) in BPF programs natively, and use in new spin locking
primitives intended to be introduced in the next few weeks.
The set begins with some refactoring patches before the actual
functionality is introduced. Patch 1 consolidates all resource related
state in bpf_verifier_state, and moves it out from bpf_func_state.
Patch 2 refactor acquire and release functions for reference state to
make them reusable without duplication for other resource types.
After this, patch 3 refactors stack slot liveness marking logic to be
shared between dynptr, and iterators, in preparation for introducing
same logic for irq flag object on stack.
Finally, patch 4 and 7 introduce the new kfuncs and their selftests. For
more details, please inspect the patch commit logs. Patch 5 makes the
error message in case of resource leaks under BPF_EXIT a bit clearer.
Patch 6 expands coverage of existing preempt-disable selftest to cover
sleepable kfuncs.
See individual patches for more details.
Changelog:
----------
v5 -> v6
v5: https://lore.kernel.org/bpf/20241129001632.3828611-1-memxor@gmail.com
* Add Eduard's Acked-by on patch 2
* Remove gen_id parameter to acquire_reference_state (Alexei)
* Remove space before REF_TYPE_LOCK (Alexei)
* Fix link to v4 in changelog
v4 -> v5
v4: https://lore.kernel.org/bpf/20241127213535.3657472-1-memxor@gmail.com
* Do regno - 1 when printing argument
* Pass verifier state explicitly into print_{insn,verifier}_state (Eduard)
* Pass frameno instead of bpf_func_state (Eduard)
* Move bpf_reference_state *refs after parent to fill two holes in
bpf_verifier_state (Eduard). The hunk fixing that bug is in the
commit adding IRQ save/restore kfuncs, as it is only needed then.
* Fix bug in release_reference_state breaking stack property (Eduard)
* Add selftest for triggering and reproducing bug found by Eduard
irq_ooo_refs_array in final patch
* Print insn_idx and active_irq_id on error (Eduard)
* Add more acks
v3 -> v4
v3: https://lore.kernel.org/bpf/20241127165846.2001009-1-memxor@gmail.com
* Add yet another missing kfunc declaration to silence s390 CI
v2 -> v3
v2: https://lore.kernel.org/bpf/20241127153306.1484562-1-memxor@gmail.com
* Drop REF_TYPE_LOCK_MASK
* Add kfunc declarations to selftest to silence s390 CI errors
v1 -> v2
v1: https://lore.kernel.org/bpf/20241121005329.408873-1-memxor@gmail.com
* Drop reference -> resource renaming in the verifier (Eduard, Alexei)
* Change verifier log for check_resource_leak for BPF_EXIT (Eduard)
* Remove id parameter from acquire_resource_state, read s->id (Eduard)
* Rename erase to release for reference state (Eduard)
* Move resource state to bpf_verifier_state (Eduard, Alexei)
* Drop unnecessary casting to/from u64 in helpers (Eduard)
* Add test for arg != PTR_TO_STACK (Eduard)
* Drop now redundant tests (Eduard)
* Address some other misc nits
* Add Reviewed-by and Acked-by from Eduard
====================
Link: https://patch.msgid.link/20241204030400.208005-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf_verifier.h | 26 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 17 | ||||
-rw-r--r-- | kernel/bpf/log.c | 21 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 573 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/verifier.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/exceptions_fail.c | 4 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/irq.c | 444 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/preempt_lock.c | 28 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/verifier_spin_lock.c | 2 |
9 files changed, 949 insertions, 168 deletions
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f4290c179bee..de09ac3067ae 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -233,6 +233,7 @@ enum bpf_stack_slot_type { */ STACK_DYNPTR, STACK_ITER, + STACK_IRQ_FLAG, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ @@ -254,8 +255,9 @@ struct bpf_reference_state { * default to pointer reference on zero initialization of a state. */ enum ref_state_type { - REF_TYPE_PTR = 0, - REF_TYPE_LOCK, + REF_TYPE_PTR = 1, + REF_TYPE_IRQ = 2, + REF_TYPE_LOCK = 3, } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). @@ -315,9 +317,6 @@ struct bpf_func_state { u32 callback_depth; /* The following fields should be last. See copy_func_state() */ - int acquired_refs; - int active_locks; - struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. * stack[0] represents bytes [*(r10-8)..*(r10-1)] @@ -370,6 +369,8 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; struct bpf_verifier_state *parent; + /* Acquired reference states */ + struct bpf_reference_state *refs; /* * 'branches' field is the number of branches left to explore: * 0 - all possible paths from this state reached bpf_exit or @@ -419,9 +420,13 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - bool speculative; + u32 acquired_refs; + u32 active_locks; + u32 active_preempt_locks; + u32 active_irq_id; bool active_rcu_lock; - u32 active_preempt_lock; + + bool speculative; /* If this state was ever pointed-to by other state's loop_entry field * this flag would be set to true. Used to avoid freeing such states * while they are still in use. @@ -979,8 +984,9 @@ const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); const char *iter_state_str(enum bpf_iter_state state); -void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, bool print_all); -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 751c150f9e1c..532ea74d4850 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3057,6 +3057,21 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user return ret + 1; } +/* Keep unsinged long in prototype so that kfunc is usable when emitted to + * vmlinux.h in BPF programs directly, but note that while in BPF prog, the + * unsigned long always points to 8-byte region on stack, the kernel may only + * read and write the 4-bytes on 32-bit. + */ +__bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag) +{ + local_irq_save(*flags__irq_flag); +} + +__bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) +{ + local_irq_restore(*flags__irq_flag); +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3149,6 +3164,8 @@ BTF_ID_FLAGS(func, bpf_get_kmem_cache) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_local_irq_save) +BTF_ID_FLAGS(func, bpf_local_irq_restore) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 4a858fdb6476..38050f4ee400 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -537,6 +537,7 @@ static char slot_type_char[] = { [STACK_ZERO] = '0', [STACK_DYNPTR] = 'd', [STACK_ITER] = 'i', + [STACK_IRQ_FLAG] = 'f' }; static void print_liveness(struct bpf_verifier_env *env, @@ -753,9 +754,10 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose(env, ")"); } -void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, - bool print_all) +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all) { + const struct bpf_func_state *state = vstate->frame[frameno]; const struct bpf_reg_state *reg; int i; @@ -843,11 +845,11 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_st break; } } - if (state->acquired_refs && state->refs[0].id) { - verbose(env, " refs=%d", state->refs[0].id); - for (i = 1; i < state->acquired_refs; i++) - if (state->refs[i].id) - verbose(env, ",%d", state->refs[i].id); + if (vstate->acquired_refs && vstate->refs[0].id) { + verbose(env, " refs=%d", vstate->refs[0].id); + for (i = 1; i < vstate->acquired_refs; i++) + if (vstate->refs[i].id) + verbose(env, ",%d", vstate->refs[i].id); } if (state->in_callback_fn) verbose(env, " cb"); @@ -864,7 +866,8 @@ static inline u32 vlog_alignment(u32 pos) BPF_LOG_MIN_ALIGNMENT) - pos - 1; } -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state) +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno) { if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) { /* remove new line character */ @@ -873,5 +876,5 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state } else { verbose(env, "%d:", env->insn_idx); } - print_verifier_state(env, state, false); + print_verifier_state(env, vstate, frameno, false); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1c4ebb326785..31e0d33498ac 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -196,7 +196,8 @@ struct bpf_verifier_stack_elem { #define BPF_PRIV_STACK_MIN_SIZE 64 -static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx); +static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); @@ -660,6 +661,11 @@ static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); } +static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + return stack_slot_obj_get_spi(env, reg, "irq_flag", 1); +} + static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) { switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { @@ -771,7 +777,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (clone_ref_obj_id) id = clone_ref_obj_id; else - id = acquire_reference_state(env, insn_idx); + id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -1033,7 +1039,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, if (spi < 0) return spi; - id = acquire_reference_state(env, insn_idx); + id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -1155,10 +1161,136 @@ static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_s return 0; } +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); +static int release_irq_state(struct bpf_verifier_state *state, int id); + +static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, + struct bpf_reg_state *reg, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, id; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + id = acquire_irq_state(env, insn_idx); + if (id < 0) + return id; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = id; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_IRQ_FLAG; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, err; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + err = release_irq_state(env->cur_state, st->ref_obj_id); + WARN_ON_ONCE(err && err != -EACCES); + if (err) { + int insn_idx = 0; + + for (int i = 0; i < env->cur_state->acquired_refs; i++) { + if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) { + insn_idx = env->cur_state->refs[i].insn_idx; + break; + } + } + + verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n", + env->cur_state->active_irq_id, insn_idx); + return err; + } + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + int spi, i; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = irq_flag_get_spi(env, reg); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + slot = &state->stack[spi]; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] == STACK_IRQ_FLAG) + return false; + return true; +} + +static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return -EINVAL; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + if (!st->ref_obj_id) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] != STACK_IRQ_FLAG) + return -EINVAL; + return 0; +} + /* Check if given stack slot is "special": * - spilled register state (STACK_SPILL); * - dynptr state (STACK_DYNPTR); * - iter state (STACK_ITER). + * - irq flag state (STACK_IRQ_FLAG) */ static bool is_stack_slot_special(const struct bpf_stack_state *stack) { @@ -1168,6 +1300,7 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack) case STACK_SPILL: case STACK_DYNPTR: case STACK_ITER: + case STACK_IRQ_FLAG: return true; case STACK_INVALID: case STACK_MISC: @@ -1279,15 +1412,18 @@ out: return arr ? arr : ZERO_SIZE_PTR; } -static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) { dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, sizeof(struct bpf_reference_state), GFP_KERNEL); if (!dst->refs) return -ENOMEM; - dst->active_locks = src->active_locks; dst->acquired_refs = src->acquired_refs; + dst->active_locks = src->active_locks; + dst->active_preempt_locks = src->active_preempt_locks; + dst->active_rcu_lock = src->active_rcu_lock; + dst->active_irq_id = src->active_irq_id; return 0; } @@ -1304,7 +1440,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st return 0; } -static int resize_reference_state(struct bpf_func_state *state, size_t n) +static int resize_reference_state(struct bpf_verifier_state *state, size_t n) { state->refs = realloc_array(state->refs, state->acquired_refs, n, sizeof(struct bpf_reference_state)); @@ -1347,94 +1483,128 @@ static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state * On success, returns a valid pointer id to associate with the register * On failure, returns a negative errno. */ -static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) +static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; int new_ofs = state->acquired_refs; - int id, err; + int err; err = resize_reference_state(state, state->acquired_refs + 1); if (err) - return err; - id = ++env->id_gen; - state->refs[new_ofs].type = REF_TYPE_PTR; - state->refs[new_ofs].id = id; + return NULL; state->refs[new_ofs].insn_idx = insn_idx; - return id; + return &state->refs[new_ofs]; +} + +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = REF_TYPE_PTR; + s->id = ++env->id_gen; + return s->id; } static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, int id, void *ptr) { - struct bpf_func_state *state = cur_func(env); - int new_ofs = state->acquired_refs; - int err; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reference_state *s; - err = resize_reference_state(state, state->acquired_refs + 1); - if (err) - return err; - state->refs[new_ofs].type = type; - state->refs[new_ofs].id = id; - state->refs[new_ofs].insn_idx = insn_idx; - state->refs[new_ofs].ptr = ptr; + s = acquire_reference_state(env, insn_idx); + s->type = type; + s->id = id; + s->ptr = ptr; state->active_locks++; return 0; } -/* release function corresponding to acquire_reference_state(). Idempotent. */ -static int release_reference_state(struct bpf_func_state *state, int ptr_id) +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = REF_TYPE_IRQ; + s->id = ++env->id_gen; + + state->active_irq_id = s->id; + return s->id; +} + +static void release_reference_state(struct bpf_verifier_state *state, int idx) { - int i, last_idx; + int last_idx; + size_t rem; + /* IRQ state requires the relative ordering of elements remaining the + * same, since it relies on the refs array to behave as a stack, so that + * it can detect out-of-order IRQ restore. Hence use memmove to shift + * the array instead of swapping the final element into the deleted idx. + */ last_idx = state->acquired_refs - 1; + rem = state->acquired_refs - idx - 1; + if (last_idx && idx != last_idx) + memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem); + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); + state->acquired_refs--; + return; +} + +static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) +{ + int i; + for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].type != REF_TYPE_PTR) + if (state->refs[i].type != type) continue; - if (state->refs[i].id == ptr_id) { - if (last_idx && i != last_idx) - memcpy(&state->refs[i], &state->refs[last_idx], - sizeof(*state->refs)); - memset(&state->refs[last_idx], 0, sizeof(*state->refs)); - state->acquired_refs--; + if (state->refs[i].id == id && state->refs[i].ptr == ptr) { + release_reference_state(state, i); + state->active_locks--; return 0; } } return -EINVAL; } -static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr) +static int release_irq_state(struct bpf_verifier_state *state, int id) { - int i, last_idx; + u32 prev_id = 0; + int i; + + if (id != state->active_irq_id) + return -EACCES; - last_idx = state->acquired_refs - 1; for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].type != type) + if (state->refs[i].type != REF_TYPE_IRQ) continue; - if (state->refs[i].id == id && state->refs[i].ptr == ptr) { - if (last_idx && i != last_idx) - memcpy(&state->refs[i], &state->refs[last_idx], - sizeof(*state->refs)); - memset(&state->refs[last_idx], 0, sizeof(*state->refs)); - state->acquired_refs--; - state->active_locks--; + if (state->refs[i].id == id) { + release_reference_state(state, i); + state->active_irq_id = prev_id; return 0; + } else { + prev_id = state->refs[i].id; } } return -EINVAL; } -static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, enum ref_state_type type, +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, int id, void *ptr) { - struct bpf_func_state *state = cur_func(env); int i; for (i = 0; i < state->acquired_refs; i++) { struct bpf_reference_state *s = &state->refs[i]; - if (s->type == REF_TYPE_PTR || s->type != type) + if (s->type != type) continue; if (s->id == id && s->ptr == ptr) @@ -1447,7 +1617,6 @@ static void free_func_state(struct bpf_func_state *state) { if (!state) return; - kfree(state->refs); kfree(state->stack); kfree(state); } @@ -1461,6 +1630,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, free_func_state(state->frame[i]); state->frame[i] = NULL; } + kfree(state->refs); if (free_self) kfree(state); } @@ -1471,12 +1641,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src) { - int err; - - memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); - err = copy_reference_state(dst, src); - if (err) - return err; + memcpy(dst, src, offsetof(struct bpf_func_state, stack)); return copy_stack_state(dst, src); } @@ -1493,9 +1658,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; } + err = copy_reference_state(dst_state, src); + if (err) + return err; dst_state->speculative = src->speculative; - dst_state->active_rcu_lock = src->active_rcu_lock; - dst_state->active_preempt_lock = src->active_preempt_lock; dst_state->in_sleepable = src->in_sleepable; dst_state->curframe = src->curframe; dst_state->branches = src->branches; @@ -3202,10 +3368,27 @@ static int mark_reg_read(struct bpf_verifier_env *env, return 0; } -static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi, int nr_slots) { struct bpf_func_state *state = func(env, reg); - int spi, ret; + int err, i; + + for (i = 0; i < nr_slots; i++) { + struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; + + err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + if (err) + return err; + + mark_stack_slot_scratched(env, spi - i); + } + return 0; +} + +static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + int spi; /* For CONST_PTR_TO_DYNPTR, it must have already been done by * check_reg_arg in check_helper_call and mark_btf_func_reg_size in @@ -3220,31 +3403,23 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state * * bounds and spi is the first dynptr slot. Simply mark stack slot as * read. */ - ret = mark_reg_read(env, &state->stack[spi].spilled_ptr, - state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64); - if (ret) - return ret; - return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr, - state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); + return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS); } static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi, int nr_slots) { - struct bpf_func_state *state = func(env, reg); - int err, i; - - for (i = 0; i < nr_slots; i++) { - struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; - - err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); - if (err) - return err; + return mark_stack_slot_obj_read(env, reg, spi, nr_slots); +} - mark_stack_slot_scratched(env, spi - i); - } +static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + int spi; - return 0; + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + return mark_stack_slot_obj_read(env, reg, spi, 1); } /* This function is supposed to be used by the following 32-bit optimization @@ -4499,7 +4674,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_frame_stack_mask(bt, fr)); verbose(env, "stack=%s: ", env->tmp_str_buf); - print_verifier_state(env, func, true); + print_verifier_state(env, st, fr, true); } } @@ -5496,7 +5671,7 @@ static bool in_sleepable(struct bpf_verifier_env *env) static bool in_rcu_cs(struct bpf_verifier_env *env) { return env->cur_state->active_rcu_lock || - cur_func(env)->active_locks || + env->cur_state->active_locks || !in_sleepable(env); } @@ -7850,15 +8025,15 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg * Since only one bpf_spin_lock is allowed the checks are simpler than * reg_is_refcounted() logic. The verifier needs to remember only * one spin_lock instead of array of acquired_refs. - * cur_func(env)->active_locks remembers which map value element or allocated + * env->cur_state->active_locks remembers which map value element or allocated * object got locked and clears it after bpf_spin_unlock. */ static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_verifier_state *cur = env->cur_state; bool is_const = tnum_is_const(reg->var_off); - struct bpf_func_state *cur = cur_func(env); u64 val = reg->var_off.value; struct bpf_map *map = NULL; struct btf *btf = NULL; @@ -7925,7 +8100,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, return -EINVAL; } - if (release_lock_state(cur_func(env), REF_TYPE_LOCK, reg->id, ptr)) { + if (release_lock_state(env->cur_state, REF_TYPE_LOCK, reg->id, ptr)) { verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } @@ -9669,21 +9844,38 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } +static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id) +{ + int i; + + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].type != REF_TYPE_PTR) + continue; + if (state->refs[i].id == ref_obj_id) { + release_reference_state(state, i); + return 0; + } + } + return -EINVAL; +} + /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. + * + * This is the release function corresponding to acquire_reference(). Idempotent. */ -static int release_reference(struct bpf_verifier_env *env, - int ref_obj_id) +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { + struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state; struct bpf_reg_state *reg; int err; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_nomark(vstate, ref_obj_id); if (err) return err; - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ if (reg->ref_obj_id == ref_obj_id) mark_reg_invalid(env, reg); })); @@ -9757,9 +9949,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls callsite, state->curframe + 1 /* frameno within this callchain */, subprog /* subprog number within this prog */); - /* Transfer references to the callee */ - err = copy_reference_state(callee, caller); - err = err ?: set_callee_state_cb(env, caller, callee, callsite); + err = set_callee_state_cb(env, caller, callee, callsite); if (err) goto err_out; @@ -9992,19 +10182,25 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, const char *sub_name = subprog_name(env, subprog); /* Only global subprogs cannot be called with a lock held. */ - if (cur_func(env)->active_locks) { + if (env->cur_state->active_locks) { verbose(env, "global function calls are not allowed while holding a lock,\n" "use static function instead\n"); return -EINVAL; } /* Only global subprogs cannot be called with preemption disabled. */ - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { verbose(env, "global function calls are not allowed with preemption disabled,\n" "use static function instead\n"); return -EINVAL; } + if (env->cur_state->active_irq_id) { + verbose(env, "global function calls are not allowed with IRQs disabled,\n" + "use static function instead\n"); + return -EINVAL; + } + if (err) { verbose(env, "Caller passes invalid args into func#%d ('%s')\n", subprog, sub_name); @@ -10039,9 +10235,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); verbose(env, "callee:\n"); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); } return 0; @@ -10333,11 +10529,6 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller->regs[BPF_REG_0] = *r0; } - /* Transfer references to the caller */ - err = copy_reference_state(caller, callee); - if (err) - return err; - /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, * there function call logic would reschedule callback visit. If iteration * converges is_state_visited() would prune that visit eventually. @@ -10350,9 +10541,9 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, state, callee->frameno, true); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); } /* clear everything in the callee. In case of exceptional exits using * bpf_throw, this will be done by copy_verifier_state for extra frames. */ @@ -10502,11 +10693,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; bool refs_lingering = false; int i; - if (!exception_exit && state->frameno) + if (!exception_exit && cur_func(env)->frameno) return 0; for (i = 0; i < state->acquired_refs; i++) { @@ -10523,7 +10714,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit { int err; - if (check_lock && cur_func(env)->active_locks) { + if (check_lock && env->cur_state->active_locks) { verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); return -EINVAL; } @@ -10534,12 +10725,17 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit return err; } + if (check_lock && env->cur_state->active_irq_id) { + verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix); + return -EINVAL; + } + if (check_lock && env->cur_state->active_rcu_lock) { verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); return -EINVAL; } - if (check_lock && env->cur_state->active_preempt_lock) { + if (check_lock && env->cur_state->active_preempt_locks) { verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix); return -EINVAL; } @@ -10727,7 +10923,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { if (fn->might_sleep) { verbose(env, "sleepable helper %s#%d in non-preemptible region\n", func_id_name(func_id), func_id); @@ -10738,6 +10934,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } + if (env->cur_state->active_irq_id) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (in_sleepable(env) && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { @@ -10784,7 +10991,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn struct bpf_func_state *state; struct bpf_reg_state *reg; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_nomark(env->cur_state, ref_obj_id); if (!err) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg->ref_obj_id == ref_obj_id) { @@ -11117,7 +11324,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; } else if (is_acquire_function(func_id, meta.map_ptr)) { - int id = acquire_reference_state(env, insn_idx); + int id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -11299,6 +11506,11 @@ static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param return btf_param_match_suffix(btf, arg, "__str"); } +static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__irq_flag"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -11452,6 +11664,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_CONST_STR, KF_ARG_PTR_TO_MAP, KF_ARG_PTR_TO_WORKQUEUE, + KF_ARG_PTR_TO_IRQ_FLAG, }; enum special_kfunc_type { @@ -11483,6 +11696,8 @@ enum special_kfunc_type { KF_bpf_iter_css_task_new, KF_bpf_session_cookie, KF_bpf_get_kmem_cache, + KF_bpf_local_irq_save, + KF_bpf_local_irq_restore, }; BTF_SET_START(special_kfunc_set) @@ -11549,6 +11764,8 @@ BTF_ID(func, bpf_session_cookie) BTF_ID_UNUSED #endif BTF_ID(func, bpf_get_kmem_cache) +BTF_ID(func, bpf_local_irq_save) +BTF_ID(func, bpf_local_irq_restore) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -11639,6 +11856,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_wq(meta->btf, &args[argno])) return KF_ARG_PTR_TO_WORKQUEUE; + if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_IRQ_FLAG; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11742,11 +11962,59 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, return 0; } +static int process_irq_flag(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + bool irq_save; + int err; + + if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save]) { + irq_save = true; + } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore]) { + irq_save = false; + } else { + verbose(env, "verifier internal error: unknown irq flags kfunc\n"); + return -EFAULT; + } + + if (irq_save) { + if (!is_irq_flag_reg_valid_uninit(env, reg)) { + verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1); + return -EINVAL; + } + + err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false); + if (err) + return err; + + err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx); + if (err) + return err; + } else { + err = is_irq_flag_reg_valid_init(env, reg); + if (err) { + verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1); + return err; + } + + err = mark_irq_flag_read(env, reg); + if (err) + return err; + + err = unmark_stack_slot_irq_flag(env, reg); + if (err) + return err; + } + return 0; +} + + static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct btf_record *rec = reg_btf_record(reg); - if (!cur_func(env)->active_locks) { + if (!env->cur_state->active_locks) { verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); return -EFAULT; } @@ -11765,12 +12033,11 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) { - struct bpf_func_state *state, *unused; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *unused; struct bpf_reg_state *reg; int i; - state = cur_func(env); - if (!ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id is zero for " "owning -> non-owning conversion\n"); @@ -11860,9 +12127,9 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ } id = reg->id; - if (!cur_func(env)->active_locks) + if (!env->cur_state->active_locks) return -EINVAL; - s = find_lock_state(env, REF_TYPE_LOCK, id, ptr); + s = find_lock_state(env->cur_state, REF_TYPE_LOCK, id, ptr); if (!s) { verbose(env, "held lock and object are not in the same allocation\n"); return -EINVAL; @@ -12331,6 +12598,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: + case KF_ARG_PTR_TO_IRQ_FLAG: break; default: WARN_ON_ONCE(1); @@ -12625,6 +12893,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_IRQ_FLAG: + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); + return -EINVAL; + } + ret = process_irq_flag(env, regno, meta); + if (ret < 0) + return ret; + break; } } @@ -12789,22 +13066,27 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { if (preempt_disable) { - env->cur_state->active_preempt_lock++; + env->cur_state->active_preempt_locks++; } else if (preempt_enable) { - env->cur_state->active_preempt_lock--; + env->cur_state->active_preempt_locks--; } else if (sleepable) { verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name); return -EACCES; } } else if (preempt_disable) { - env->cur_state->active_preempt_lock++; + env->cur_state->active_preempt_locks++; } else if (preempt_enable) { verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name); return -EINVAL; } + if (env->cur_state->active_irq_id && sleepable) { + verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name); + return -EACCES; + } + /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -13098,7 +13380,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); if (is_kfunc_acquire(&meta)) { - int id = acquire_reference_state(env, insn_idx); + int id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -14495,12 +14777,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -15398,7 +15680,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, * No one could have freed the reference state before * doing the NULL check. */ - WARN_ON_ONCE(release_reference_state(state, id)); + WARN_ON_ONCE(release_reference_nomark(vstate, id)); bpf_for_each_reg_in_vstate(vstate, state, reg, ({ mark_ptr_or_null_reg(state, reg, id, is_null); @@ -15708,7 +15990,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -15722,7 +16004,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } @@ -15839,7 +16121,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } @@ -17738,6 +18020,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; + case STACK_IRQ_FLAG: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -17750,7 +18038,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, +static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, struct bpf_idmap *idmap) { int i; @@ -17758,12 +18046,25 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, if (old->acquired_refs != cur->acquired_refs) return false; + if (old->active_locks != cur->active_locks) + return false; + + if (old->active_preempt_locks != cur->active_preempt_locks) + return false; + + if (old->active_rcu_lock != cur->active_rcu_lock) + return false; + + if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) + return false; + for (i = 0; i < old->acquired_refs; i++) { if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || old->refs[i].type != cur->refs[i].type) return false; switch (old->refs[i].type) { case REF_TYPE_PTR: + case REF_TYPE_IRQ: break; case REF_TYPE_LOCK: if (old->refs[i].ptr != cur->refs[i].ptr) @@ -17820,9 +18121,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) return false; - if (!refsafe(old, cur, &env->idmap_scratch)) - return false; - return true; } @@ -17850,13 +18148,10 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; - if (old->active_rcu_lock != cur->active_rcu_lock) - return false; - - if (old->active_preempt_lock != cur->active_preempt_lock) + if (old->in_sleepable != cur->in_sleepable) return false; - if (old->in_sleepable != cur->in_sleepable) + if (!refsafe(old, cur, &env->idmap_scratch)) return false; /* for states to be equal callsites have to be the same @@ -18249,9 +18544,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "cur state:"); - print_verifier_state(env, cur->frame[cur->curframe], true); + print_verifier_state(env, cur, cur->curframe, true); verbose(env, "old state:"); - print_verifier_state(env, sl->state.frame[cur->curframe], true); + print_verifier_state(env, &sl->state, cur->curframe, true); return -EINVAL; } /* if the verifier is processing a loop, avoid adding new state @@ -18607,7 +18902,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_insn_idx, env->insn_idx, env->cur_state->speculative ? " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); do_print_state = false; } @@ -18619,7 +18914,7 @@ static int do_check(struct bpf_verifier_env *env) }; if (verifier_state_scratched(env)) - print_insn_state(env, state->frame[state->curframe]); + print_insn_state(env, state, state->curframe); verbose_linfo(env, env->insn_idx, "; "); env->prev_log_pos = env->log.end_pos; @@ -18751,7 +19046,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (cur_func(env)->active_locks) { + if (env->cur_state->active_locks) { if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { @@ -18807,7 +19102,7 @@ process_bpf_exit_full: * match caller reference state when it exits. */ err = check_resource_leak(env, exception_exit, !env->cur_state->curframe, - "BPF_EXIT instruction"); + "BPF_EXIT instruction in main prog"); if (err) return err; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d9f65adb456b..b1b4d69c407a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -98,6 +98,7 @@ #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" #include "verifier_lsm.skel.h" +#include "irq.skel.h" #define MAX_ENTRIES 11 @@ -225,6 +226,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } +void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index fe0f3fa5aab6..8a0fdff89927 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c new file mode 100644 index 000000000000..b0b53d980964 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +unsigned long global_flags; + +extern void bpf_local_irq_save(unsigned long *) __weak __ksym; +extern void bpf_local_irq_restore(unsigned long *) __weak __ksym; +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_save_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_save(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_restore_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_restore(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + return 0; +} + +static __noinline void local_irq_save(unsigned long *flags) +{ + bpf_local_irq_save(flags); +} + +static __noinline void local_irq_restore(unsigned long *flags) +{ + bpf_local_irq_restore(flags); +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_1_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + local_irq_save(&flags1); + local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__success +int irq_balance(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__success +int irq_balance_n(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); + return 0; +} + +static __noinline void local_irq_balance(void) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); +} + +static __noinline void local_irq_balance_n(void) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); +} + +SEC("?tc") +__success +int irq_balance_subprog(struct __sk_buff *ctx) +{ + local_irq_balance(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("sleepable helper bpf_copy_from_user#") +int irq_sleepable_helper(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user(&data, sizeof(data), NULL); + local_irq_restore(&flags); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region") +int irq_sleepable_kfunc(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + local_irq_restore(&flags); + return 0; +} + +int __noinline global_local_irq_balance(void) +{ + local_irq_balance_n(); + return 0; +} + +SEC("?tc") +__failure __msg("global function calls are not allowed with IRQs disabled") +int irq_global_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_local_irq_balance(); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags3); + return 0; +} + +static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2, + unsigned long *flags3) +{ + local_irq_save(flags1); + local_irq_save(flags2); + local_irq_save(flags3); +} + +SEC("?tc") +__success +int irq_restore_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_4_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + unsigned long flags4; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_save(&flags4); + bpf_local_irq_restore(&flags4); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags = 0xfaceb00c; + + bpf_local_irq_save(&flags1); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected uninitialized") +int irq_save_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=1") +int irq_save_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + /* Ensure same sized slot has st->ref_obj_id set, so we reject based on + * slot_type != STACK_IRQ_FLAG... + */ + _Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size"); + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_save((unsigned long *)&it); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + flags = 0xdeadbeef; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite_partial(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + *(((char *)&flags) + 1) = 0xff; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_ooo_refs_array(struct __sk_buff *ctx) +{ + unsigned long flags[4]; + struct { int i; } *p; + + /* refs=1 */ + bpf_local_irq_save(&flags[0]); + + /* refs=1,2 */ + p = bpf_obj_new(typeof(*p)); + if (!p) { + bpf_local_irq_restore(&flags[0]); + return 0; + } + + /* refs=1,2,3 */ + bpf_local_irq_save(&flags[1]); + + /* refs=1,2,3,4 */ + bpf_local_irq_save(&flags[2]); + + /* Now when we remove ref=2, the verifier must not break the ordering in + * the refs array between 1,3,4. With an older implementation, the + * verifier would swap the last element with the removed element, but to + * maintain the stack property we need to use memmove. + */ + bpf_obj_drop(p); + + /* Save and restore to reset active_irq_id to 3, as the ordering is now + * refs=1,4,3. When restoring the linear scan will find prev_id in order + * as 3 instead of 4. + */ + bpf_local_irq_save(&flags[3]); + bpf_local_irq_restore(&flags[3]); + + /* With the incorrect implementation, we can release flags[1], flags[2], + * and flags[0], i.e. in the wrong order. + */ + bpf_local_irq_restore(&flags[1]); + bpf_local_irq_restore(&flags[2]); + bpf_local_irq_restore(&flags[0]); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 885377e83607..6c5797bf0ead 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -5,8 +5,10 @@ #include "bpf_misc.h" #include "bpf_experimental.h" +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -14,7 +16,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -23,7 +25,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -33,7 +35,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -55,7 +57,7 @@ static __noinline void preempt_enable(void) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -63,7 +65,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -72,7 +74,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -113,6 +115,18 @@ int preempt_sleepable_helper(void *ctx) return 0; } +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region") +int preempt_sleepable_kfunc(void *ctx) +{ + u32 data; + + bpf_preempt_disable(); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + bpf_preempt_enable(); + return 0; +} + int __noinline preempt_global_subprog(void) { preempt_balance_subprog(); diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 3f679de73229..25599eac9a70 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -187,7 +187,7 @@ l0_%=: r6 = r0; \ SEC("cgroup/skb") __description("spin_lock: test6 missing unlock") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region") __failure_unpriv __msg_unpriv("") __naked void spin_lock_test6_missing_unlock(void) { |