diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 525 |
1 files changed, 312 insertions, 213 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 757476c91c98..be38bb930bf1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -47,7 +47,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * - unreachable insns exist (shouldn't be a forest. program = one function) * - out of bounds or malformed jumps * The second pass is all possible path descent from the 1st insn. - * Since it's analyzing all pathes through the program, the length of the + * Since it's analyzing all paths through the program, the length of the * analysis is limited to 64k insn, which may be hit even if total number of * insn is less then 4K, but there are too many branches that change stack/regs. * Number of 'branches to be analyzed' is limited to 1k @@ -132,7 +132,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * If it's ok, then verifier allows this BPF_CALL insn and looks at * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function - * returns ether pointer to map value or NULL. + * returns either pointer to map value or NULL. * * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' * insn, the register holding that pointer in the true branch changes state to @@ -737,81 +737,104 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "\n"); } -#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int copy_##NAME##_state(struct bpf_func_state *dst, \ - const struct bpf_func_state *src) \ -{ \ - if (!src->FIELD) \ - return 0; \ - if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ - /* internal bug, make state invalid to reject the program */ \ - memset(dst, 0, sizeof(*dst)); \ - return -EFAULT; \ - } \ - memcpy(dst->FIELD, src->FIELD, \ - sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ - return 0; \ -} -/* copy_reference_state() */ -COPY_STATE_FN(reference, acquired_refs, refs, 1) -/* copy_stack_state() */ -COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef COPY_STATE_FN - -#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ - bool copy_old) \ -{ \ - u32 old_size = state->COUNT; \ - struct bpf_##NAME##_state *new_##FIELD; \ - int slot = size / SIZE; \ - \ - if (size <= old_size || !size) { \ - if (copy_old) \ - return 0; \ - state->COUNT = slot * SIZE; \ - if (!size && old_size) { \ - kfree(state->FIELD); \ - state->FIELD = NULL; \ - } \ - return 0; \ - } \ - new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ - GFP_KERNEL); \ - if (!new_##FIELD) \ - return -ENOMEM; \ - if (copy_old) { \ - if (state->FIELD) \ - memcpy(new_##FIELD, state->FIELD, \ - sizeof(*new_##FIELD) * (old_size / SIZE)); \ - memset(new_##FIELD + old_size / SIZE, 0, \ - sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ - } \ - state->COUNT = slot * SIZE; \ - kfree(state->FIELD); \ - state->FIELD = new_##FIELD; \ - return 0; \ -} -/* realloc_reference_state() */ -REALLOC_STATE_FN(reference, acquired_refs, refs, 1) -/* realloc_stack_state() */ -REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef REALLOC_STATE_FN - -/* do_check() starts with zero-sized stack in struct bpf_verifier_state to - * make it consume minimal amount of memory. check_stack_write() access from - * the program calls into realloc_func_state() to grow the stack size. - * Note there is a non-zero 'parent' pointer inside bpf_verifier_state - * which realloc_stack_state() copies over. It points to previous - * bpf_verifier_state which is never reallocated. +/* copy array src of length n * size bytes to dst. dst is reallocated if it's too + * small to hold src. This is different from krealloc since we don't want to preserve + * the contents of dst. + * + * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could + * not be allocated. */ -static int realloc_func_state(struct bpf_func_state *state, int stack_size, - int refs_size, bool copy_old) +static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { - int err = realloc_reference_state(state, refs_size, copy_old); - if (err) - return err; - return realloc_stack_state(state, stack_size, copy_old); + size_t bytes; + + if (ZERO_OR_NULL_PTR(src)) + goto out; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + if (ksize(dst) < bytes) { + kfree(dst); + dst = kmalloc_track_caller(bytes, flags); + if (!dst) + return NULL; + } + + memcpy(dst, src, bytes); +out: + return dst ? dst : ZERO_SIZE_PTR; +} + +/* resize an array from old_n items to new_n items. the array is reallocated if it's too + * small to hold new_n items. new items are zeroed out if the array grows. + * + * Contrary to krealloc_array, does not free arr if new_n is zero. + */ +static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) +{ + if (!new_n || old_n == new_n) + goto out; + + arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + if (!arr) + return NULL; + + if (new_n > old_n) + memset(arr + old_n * size, 0, (new_n - old_n) * size); + +out: + return arr ? arr : ZERO_SIZE_PTR; +} + +static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, + sizeof(struct bpf_reference_state), GFP_KERNEL); + if (!dst->refs) + return -ENOMEM; + + dst->acquired_refs = src->acquired_refs; + return 0; +} + +static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + size_t n = src->allocated_stack / BPF_REG_SIZE; + + dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), + GFP_KERNEL); + if (!dst->stack) + return -ENOMEM; + + dst->allocated_stack = src->allocated_stack; + return 0; +} + +static int resize_reference_state(struct bpf_func_state *state, size_t n) +{ + state->refs = realloc_array(state->refs, state->acquired_refs, n, + sizeof(struct bpf_reference_state)); + if (!state->refs) + return -ENOMEM; + + state->acquired_refs = n; + return 0; +} + +static int grow_stack_state(struct bpf_func_state *state, int size) +{ + size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE; + + if (old_n >= n) + return 0; + + state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); + if (!state->stack) + return -ENOMEM; + + state->allocated_stack = size; + return 0; } /* Acquire a pointer id from the env and update the state->refs to include @@ -825,7 +848,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) int new_ofs = state->acquired_refs; int id, err; - err = realloc_reference_state(state, state->acquired_refs + 1, true); + err = resize_reference_state(state, state->acquired_refs + 1); if (err) return err; id = ++env->id_gen; @@ -854,18 +877,6 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) return -EINVAL; } -static int transfer_reference_state(struct bpf_func_state *dst, - struct bpf_func_state *src) -{ - int err = realloc_reference_state(dst, src->acquired_refs, false); - if (err) - return err; - err = copy_reference_state(dst, src); - if (err) - return err; - return 0; -} - static void free_func_state(struct bpf_func_state *state) { if (!state) @@ -904,10 +915,6 @@ static int copy_func_state(struct bpf_func_state *dst, { int err; - err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, - false); - if (err) - return err; memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); err = copy_reference_state(dst, src); if (err) @@ -919,16 +926,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src) { struct bpf_func_state *dst; - u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; int i, err; - if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { - kfree(dst_state->jmp_history); - dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); - if (!dst_state->jmp_history) - return -ENOMEM; - } - memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); + dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, + src->jmp_history_cnt, sizeof(struct bpf_idx_pair), + GFP_USER); + if (!dst_state->jmp_history) + return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; /* if dst has more stack frames then src frame, free them */ @@ -2590,8 +2594,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; struct bpf_reg_state *reg = NULL; - err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); if (err) return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -2613,7 +2616,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (dst_reg != BPF_REG_FP) { /* The backtracking logic can only recognize explicit * stack slot address like [fp - 8]. Other spill of - * scalar via different register has to be conervative. + * scalar via different register has to be conservative. * Backtrack from here and mark all registers as precise * that contributed into 'reg' being a constant. */ @@ -2753,8 +2756,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, if (value_reg && register_is_null(value_reg)) writing_zero = true; - err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); if (err) return err; @@ -5629,7 +5631,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn subprog /* subprog number within this prog */); /* Transfer references to the callee */ - err = transfer_reference_state(callee, caller); + err = copy_reference_state(callee, caller); if (err) return err; @@ -5780,7 +5782,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* Transfer references to the caller */ - err = transfer_reference_state(caller, callee); + err = copy_reference_state(caller, callee); if (err) return err; @@ -6409,18 +6411,10 @@ enum { }; static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, - const struct bpf_reg_state *off_reg, - u32 *alu_limit, u8 opcode) + u32 *alu_limit, bool mask_to_left) { - bool off_is_neg = off_reg->smin_value < 0; - bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || - (opcode == BPF_SUB && !off_is_neg); u32 max = 0, ptr_limit = 0; - if (!tnum_is_const(off_reg->var_off) && - (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) - return REASON_BOUNDS; - switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the @@ -6486,15 +6480,41 @@ static bool sanitize_needed(u8 opcode) return opcode == BPF_ADD || opcode == BPF_SUB; } +struct bpf_sanitize_info { + struct bpf_insn_aux_data aux; + bool mask_to_left; +}; + +static struct bpf_verifier_state * +sanitize_speculative_path(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + u32 next_idx, u32 curr_idx) +{ + struct bpf_verifier_state *branch; + struct bpf_reg_state *regs; + + branch = push_stack(env, next_idx, curr_idx, true); + if (branch && insn) { + regs = branch->frame[branch->curframe]->regs; + if (BPF_SRC(insn->code) == BPF_K) { + mark_reg_unknown(env, regs, insn->dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X) { + mark_reg_unknown(env, regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->src_reg); + } + } + return branch; +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg, - struct bpf_insn_aux_data *tmp_aux, + struct bpf_sanitize_info *info, const bool commit_window) { - struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux; struct bpf_verifier_state *vstate = env->cur_state; bool off_is_imm = tnum_is_const(off_reg->var_off); bool off_is_neg = off_reg->smin_value < 0; @@ -6515,7 +6535,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); + if (!commit_window) { + if (!tnum_is_const(off_reg->var_off) && + (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) + return REASON_BOUNDS; + + info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + } + + err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left); if (err < 0) return err; @@ -6523,8 +6552,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, /* In commit phase we narrow the masking window based on * the observed pointer move after the simulated operation. */ - alu_state = tmp_aux->alu_state; - alu_limit = abs(tmp_aux->alu_limit - alu_limit); + alu_state = info->aux.alu_state; + alu_limit = abs(info->aux.alu_limit - alu_limit); } else { alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; @@ -6539,8 +6568,12 @@ do_sim: /* If we're in commit phase, we're done here given we already * pushed the truncated dst_reg into the speculative verification * stack. + * + * Also, when register is a known constant, we rewrite register-based + * operation to immediate-based, and thus do not need masking (and as + * a consequence, do not need to simulate the zero-truncation either). */ - if (commit_window) + if (commit_window || off_is_imm) return 0; /* Simulate and find potential out-of-bounds access under @@ -6556,12 +6589,26 @@ do_sim: tmp = *dst_reg; *dst_reg = *ptr_reg; } - ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); + ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, + env->insn_idx); if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? REASON_STACK : 0; } +static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) +{ + struct bpf_verifier_state *vstate = env->cur_state; + + /* If we simulate paths under speculation, we don't update the + * insn as 'seen' such that when we verify unreachable paths in + * the non-speculative domain, sanitize_dead_code() can still + * rewrite/sanitize them. + */ + if (!vstate->speculative) + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; +} + static int sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason, const struct bpf_reg_state *off_reg, @@ -6685,7 +6732,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; - struct bpf_insn_aux_data tmp_aux = {}; + struct bpf_sanitize_info info = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -6754,7 +6801,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (sanitize_needed(opcode)) { ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, - &tmp_aux, false); + &info, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); } @@ -6895,7 +6942,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; if (sanitize_needed(opcode)) { ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, - &tmp_aux, true); + &info, true); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); } @@ -7084,11 +7131,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umax_val = src_reg->u32_max_value; - /* Assuming scalar64_min_max_and will be called so its safe - * to skip updating register for known 32-bit case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. @@ -7108,7 +7154,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; } - } static void scalar_min_max_and(struct bpf_reg_state *dst_reg, @@ -7155,11 +7200,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umin_val = src_reg->u32_min_value; - /* Assuming scalar64_min_max_or will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima @@ -7224,11 +7268,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct tnum var32_off = tnum_subreg(dst_reg->var_off); s32 smin_val = src_reg->s32_min_value; - /* Assuming scalar64_min_max_xor will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get both minimum and maximum from the var32_off. */ dst_reg->u32_min_value = var32_off.value; @@ -8744,14 +8787,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (err) return err; } + if (pred == 1) { - /* only follow the goto, ignore fall-through */ + /* Only follow the goto, ignore fall-through. If needed, push + * the fall-through branch for simulation under speculative + * execution. + */ + if (!env->bypass_spec_v1 && + !sanitize_speculative_path(env, insn, *insn_idx + 1, + *insn_idx)) + return -EFAULT; *insn_idx += insn->off; return 0; } else if (pred == 0) { - /* only follow fall-through branch, since - * that's where the program will go + /* Only follow the fall-through branch, since that's where the + * program will go. If needed, push the goto branch for + * simulation under speculative execution. */ + if (!env->bypass_spec_v1 && + !sanitize_speculative_path(env, insn, + *insn_idx + insn->off + 1, + *insn_idx)) + return -EFAULT; return 0; } @@ -8913,12 +8970,14 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; - if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; if (map_value_has_spin_lock(map)) dst_reg->id = ++env->id_gen; - } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || + insn->src_reg == BPF_PSEUDO_MAP_IDX) { dst_reg->type = CONST_PTR_TO_MAP; } else { verbose(env, "bpf verifier is misconfigured\n"); @@ -9049,7 +9108,7 @@ static int check_return_code(struct bpf_verifier_env *env) !prog->aux->attach_func_proto->type) return 0; - /* eBPF calling convetion is such that R0 is used + /* eBPF calling convention is such that R0 is used * to return the value from eBPF program. * Make sure that it's readable at this time * of bpf_exit, which means that program wrote @@ -9434,7 +9493,7 @@ static int check_abnormal_return(struct bpf_verifier_env *env) static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { const struct btf_type *type, *func_proto, *ret_type; u32 i, nfuncs, urec_size, min_size; @@ -9443,7 +9502,7 @@ static int check_btf_func(struct bpf_verifier_env *env, struct bpf_func_info_aux *info_aux = NULL; struct bpf_prog *prog; const struct btf *btf; - void __user *urecord; + bpfptr_t urecord; u32 prev_offset = 0; bool scalar_return; int ret = -ENOMEM; @@ -9471,7 +9530,7 @@ static int check_btf_func(struct bpf_verifier_env *env, prog = env->prog; btf = prog->aux->btf; - urecord = u64_to_user_ptr(attr->func_info); + urecord = make_bpfptr(attr->func_info, uattr.is_kernel); min_size = min_t(u32, krec_size, urec_size); krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); @@ -9489,13 +9548,15 @@ static int check_btf_func(struct bpf_verifier_env *env, /* set the size kernel expects so loader can zero * out the rest of the record. */ - if (put_user(min_size, &uattr->func_info_rec_size)) + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, func_info_rec_size), + &min_size, sizeof(min_size))) ret = -EFAULT; } goto err_free; } - if (copy_from_user(&krecord[i], urecord, min_size)) { + if (copy_from_bpfptr(&krecord[i], urecord, min_size)) { ret = -EFAULT; goto err_free; } @@ -9547,7 +9608,7 @@ static int check_btf_func(struct bpf_verifier_env *env, } prev_offset = krecord[i].insn_off; - urecord += urec_size; + bpfptr_add(&urecord, urec_size); } prog->aux->func_info = krecord; @@ -9579,14 +9640,14 @@ static void adjust_btf_func(struct bpf_verifier_env *env) static int check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; struct bpf_subprog_info *sub; struct bpf_line_info *linfo; struct bpf_prog *prog; const struct btf *btf; - void __user *ulinfo; + bpfptr_t ulinfo; int err; nr_linfo = attr->line_info_cnt; @@ -9612,7 +9673,7 @@ static int check_btf_line(struct bpf_verifier_env *env, s = 0; sub = env->subprog_info; - ulinfo = u64_to_user_ptr(attr->line_info); + ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel); expected_size = sizeof(struct bpf_line_info); ncopy = min_t(u32, expected_size, rec_size); for (i = 0; i < nr_linfo; i++) { @@ -9620,14 +9681,15 @@ static int check_btf_line(struct bpf_verifier_env *env, if (err) { if (err == -E2BIG) { verbose(env, "nonzero tailing record in line_info"); - if (put_user(expected_size, - &uattr->line_info_rec_size)) + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, line_info_rec_size), + &expected_size, sizeof(expected_size))) err = -EFAULT; } goto err_free; } - if (copy_from_user(&linfo[i], ulinfo, ncopy)) { + if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) { err = -EFAULT; goto err_free; } @@ -9679,7 +9741,7 @@ static int check_btf_line(struct bpf_verifier_env *env, } prev_offset = linfo[i].insn_off; - ulinfo += rec_size; + bpfptr_add(&ulinfo, rec_size); } if (s != env->subprog_cnt) { @@ -9701,7 +9763,7 @@ err_free: static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { struct btf *btf; int err; @@ -9746,13 +9808,6 @@ static bool range_within(struct bpf_reg_state *old, old->s32_max_value >= cur->s32_max_value; } -/* Maximum number of register states that can exist at once */ -#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) -struct idpair { - u32 old; - u32 cur; -}; - /* If in the old state two registers had the same id, then they need to have * the same id in the new state as well. But that id could be different from * the old state, so we need to track the mapping from old to new ids. @@ -9763,11 +9818,11 @@ struct idpair { * So we look through our idmap to see if this old id has been seen before. If * so, we require the new id to match; otherwise, we add the id pair to the map. */ -static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i; - for (i = 0; i < ID_MAP_SIZE; i++) { + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ idmap[i].old = old_id; @@ -9844,7 +9899,7 @@ static void clean_verifier_state(struct bpf_verifier_env *env, * Since the verifier pushes the branch states as it sees them while exploring * the program the condition of walking the branch instruction for the second * time means that all states below this branch were already explored and - * their final liveness markes are already propagated. + * their final liveness marks are already propagated. * Hence when the verifier completes the search of state list in is_state_visited() * we can call this clean_live_states() function to mark all liveness states * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' @@ -9880,7 +9935,7 @@ next: /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct idpair *idmap) + struct bpf_id_pair *idmap) { bool equal; @@ -9998,7 +10053,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, static bool stacksafe(struct bpf_func_state *old, struct bpf_func_state *cur, - struct idpair *idmap) + struct bpf_id_pair *idmap) { int i, spi; @@ -10095,32 +10150,23 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool func_states_equal(struct bpf_func_state *old, +static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur) { - struct idpair *idmap; - bool ret = false; int i; - idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); - /* If we failed to allocate the idmap, just say it's not safe */ - if (!idmap) - return false; - - for (i = 0; i < MAX_BPF_REG; i++) { - if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) - goto out_free; - } + memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); + for (i = 0; i < MAX_BPF_REG; i++) + if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + return false; - if (!stacksafe(old, cur, idmap)) - goto out_free; + if (!stacksafe(old, cur, env->idmap_scratch)) + return false; if (!refsafe(old, cur)) - goto out_free; - ret = true; -out_free: - kfree(idmap); - return ret; + return false; + + return true; } static bool states_equal(struct bpf_verifier_env *env, @@ -10147,7 +10193,7 @@ static bool states_equal(struct bpf_verifier_env *env, for (i = 0; i <= old->curframe; i++) { if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(old->frame[i], cur->frame[i])) + if (!func_states_equal(env, old->frame[i], cur->frame[i])) return false; } return true; @@ -10624,7 +10670,7 @@ static int do_check(struct bpf_verifier_env *env) } regs = cur_regs(env); - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; + sanitize_mark_insn_seen(env); prev_insn_idx = env->insn_idx; if (class == BPF_ALU || class == BPF_ALU64) { @@ -10851,7 +10897,7 @@ process_bpf_exit: return err; env->insn_idx++; - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; + sanitize_mark_insn_seen(env); } else { verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; @@ -11184,6 +11230,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) struct bpf_map *map; struct fd f; u64 addr; + u32 fd; if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || @@ -11213,16 +11260,38 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ - if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && - insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || - (insn[0].src_reg == BPF_PSEUDO_MAP_FD && - insn[1].imm != 0)) { - verbose(env, - "unrecognized bpf_ld_imm64 insn\n"); + switch (insn[0].src_reg) { + case BPF_PSEUDO_MAP_VALUE: + case BPF_PSEUDO_MAP_IDX_VALUE: + break; + case BPF_PSEUDO_MAP_FD: + case BPF_PSEUDO_MAP_IDX: + if (insn[1].imm == 0) + break; + fallthrough; + default: + verbose(env, "unrecognized bpf_ld_imm64 insn\n"); return -EINVAL; } - f = fdget(insn[0].imm); + switch (insn[0].src_reg) { + case BPF_PSEUDO_MAP_IDX_VALUE: + case BPF_PSEUDO_MAP_IDX: + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "fd_idx without fd_array is invalid\n"); + return -EPROTO; + } + if (copy_from_bpfptr_offset(&fd, env->fd_array, + insn[0].imm * sizeof(fd), + sizeof(fd))) + return -EFAULT; + break; + default: + fd = insn[0].imm; + break; + } + + f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { verbose(env, "fd %d is not pointing to valid bpf_map\n", @@ -11237,7 +11306,8 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) } aux = &env->insn_aux_data[i]; - if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || + insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { addr = (unsigned long)map; } else { u32 off = insn[1].imm; @@ -11360,6 +11430,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, { struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; + u32 old_seen = old_data[off].seen; u32 prog_len; int i; @@ -11380,7 +11451,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); for (i = off; i < off + cnt - 1; i++) { - new_data[i].seen = env->pass_cnt; + /* Expand insni[off]'s seen count to the patched range. */ + new_data[i].seen = old_seen; new_data[i].zext_dst = insn_has_def32(env, insn + i); } env->insn_aux_data = new_data; @@ -11402,7 +11474,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len } } -static void adjust_poke_descs(struct bpf_prog *prog, u32 len) +static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) { struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; int i, sz = prog->aux->size_poke_tab; @@ -11410,6 +11482,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 len) for (i = 0; i < sz; i++) { desc = &tab[i]; + if (desc->insn_idx <= off) + continue; desc->insn_idx += len - 1; } } @@ -11430,7 +11504,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of if (adjust_insn_aux_data(env, new_prog, off, len)) return NULL; adjust_subprog_starts(env, off, len); - adjust_poke_descs(new_prog, len); + adjust_poke_descs(new_prog, off, len); return new_prog; } @@ -12449,7 +12523,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) prog->aux->max_pkt_offset = MAX_PACKET_OFF; /* mark bpf_tail_call as different opcode to avoid - * conditional branch in the interpeter for every normal + * conditional branch in the interpreter for every normal * call and to prevent accidental JITing by JIT compiler * that doesn't support bpf_tail_call yet */ @@ -12704,6 +12778,9 @@ static void free_states(struct bpf_verifier_env *env) * insn_aux_data was touched. These variables are compared to clear temporary * data from failed pass. For testing and experiments do_check_common() can be * run multiple times even when prior attempt to verify is unsuccessful. + * + * Note that special handling is needed on !env->bypass_spec_v1 if this is + * ever called outside of error path with subsequent program rejection. */ static void sanitize_insn_aux_data(struct bpf_verifier_env *env) { @@ -13200,6 +13277,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return 0; } +BTF_SET_START(btf_id_deny) +BTF_ID_UNUSED +#ifdef CONFIG_SMP +BTF_ID(func, migrate_disable) +BTF_ID(func, migrate_enable) +#endif +#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU +BTF_ID(func, rcu_read_unlock_strict) +#endif +BTF_SET_END(btf_id_deny) + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -13210,6 +13298,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) int ret; u64 key; + if (prog->type == BPF_PROG_TYPE_SYSCALL) { + if (prog->aux->sleepable) + /* attach_btf_id checked to be zero already */ + return 0; + verbose(env, "Syscall programs can only be sleepable\n"); + return -EINVAL; + } + if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM) { verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); @@ -13259,6 +13355,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ret = bpf_lsm_verify_prog(&env->log, prog); if (ret < 0) return ret; + } else if (prog->type == BPF_PROG_TYPE_TRACING && + btf_id_set_contains(&btf_id_deny, btf_id)) { + return -EINVAL; } key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); @@ -13281,8 +13380,7 @@ struct btf *bpf_get_btf_vmlinux(void) return btf_vmlinux; } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, - union bpf_attr __user *uattr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; @@ -13312,6 +13410,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); is_priv = bpf_capable(); bpf_get_btf_vmlinux(); @@ -13358,12 +13457,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; - if (bpf_prog_is_dev_bound(env->prog->aux)) { - ret = bpf_prog_offload_verifier_prep(env->prog); - if (ret) - goto skip_full_check; - } - env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER); @@ -13391,6 +13484,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (ret < 0) goto skip_full_check; + if (bpf_prog_is_dev_bound(env->prog->aux)) { + ret = bpf_prog_offload_verifier_prep(env->prog); + if (ret) + goto skip_full_check; + } + ret = check_cfg(env); if (ret < 0) goto skip_full_check; |