diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 3054 |
1 files changed, 2165 insertions, 889 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3001937bbb9..225666307bba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4,6 +4,7 @@ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io */ #include <uapi/linux/btf.h> +#include <linux/bpf-cgroup.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> @@ -22,6 +23,7 @@ #include <linux/error-injection.h> #include <linux/bpf_lsm.h> #include <linux/btf_ids.h> +#include <linux/poison.h> #include "disasm.h" @@ -186,6 +188,9 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); + static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; @@ -244,6 +249,7 @@ struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; bool pkt_access; + u8 release_regno; int regno; int access_size; int mem_size; @@ -256,6 +262,8 @@ struct bpf_call_arg_meta { struct btf *ret_btf; u32 ret_btf_id; u32 subprogno; + struct bpf_map_value_off_desc *kptr_off_desc; + u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -293,13 +301,15 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n"); - n = min(log->len_total - log->len_used - 1, n); - log->kbuf[n] = '\0'; - if (log->level == BPF_LOG_KERNEL) { - pr_err("BPF:%s\n", log->kbuf); + bool newline = n > 0 && log->kbuf[n - 1] == '\n'; + + pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); return; } + + n = min(log->len_total - log->len_used - 1, n); + log->kbuf[n] = '\0'; if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) log->len_used += n; else @@ -361,6 +371,7 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, bpf_verifier_vlog(log, fmt, args); va_end(args); } +EXPORT_SYMBOL_GPL(bpf_log); static const char *ltrim(const char *s) { @@ -418,6 +429,7 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, static bool type_is_pkt_pointer(enum bpf_reg_type type) { + type = base_type(type); return type == PTR_TO_PACKET || type == PTR_TO_PACKET_META; } @@ -439,18 +451,6 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON; } -static bool reg_type_may_be_null(enum bpf_reg_type type) -{ - return type == PTR_TO_MAP_VALUE_OR_NULL || - type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_SOCK_COMMON_OR_NULL || - type == PTR_TO_TCP_SOCK_OR_NULL || - type == PTR_TO_BTF_ID_OR_NULL || - type == PTR_TO_MEM_OR_NULL || - type == PTR_TO_RDONLY_BUF_OR_NULL || - type == PTR_TO_RDWR_BUF_OR_NULL; -} - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && @@ -459,47 +459,19 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - return type == PTR_TO_SOCKET || - type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_TCP_SOCK || - type == PTR_TO_TCP_SOCK_OR_NULL || - type == PTR_TO_MEM || - type == PTR_TO_MEM_OR_NULL; -} - -static bool arg_type_may_be_refcounted(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_SOCK_COMMON; -} - -static bool arg_type_may_be_null(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || - type == ARG_PTR_TO_MEM_OR_NULL || - type == ARG_PTR_TO_CTX_OR_NULL || - type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || - type == ARG_PTR_TO_STACK_OR_NULL; + type = base_type(type); + return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || + type == PTR_TO_MEM || type == PTR_TO_BTF_ID; } -/* Determine whether the function releases some resources allocated by another - * function call. The first reference type argument will be assumed to be - * released by release_reference(). - */ -static bool is_release_function(enum bpf_func_id func_id) +static bool type_is_rdonly_mem(u32 type) { - return func_id == BPF_FUNC_sk_release || - func_id == BPF_FUNC_ringbuf_submit || - func_id == BPF_FUNC_ringbuf_discard; + return type & MEM_RDONLY; } -static bool may_be_acquire_function(enum bpf_func_id func_id) +static bool type_may_be_null(u32 type) { - return func_id == BPF_FUNC_sk_lookup_tcp || - func_id == BPF_FUNC_sk_lookup_udp || - func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_map_lookup_elem || - func_id == BPF_FUNC_ringbuf_reserve; + return type & PTR_MAYBE_NULL; } static bool is_acquire_function(enum bpf_func_id func_id, @@ -510,7 +482,8 @@ static bool is_acquire_function(enum bpf_func_id func_id, if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_ringbuf_reserve) + func_id == BPF_FUNC_ringbuf_reserve || + func_id == BPF_FUNC_kptr_xchg) return true; if (func_id == BPF_FUNC_map_lookup_elem && @@ -528,10 +501,31 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_sock || func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock || + func_id == BPF_FUNC_skc_to_mptcp_sock || func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock; } +static bool is_dynptr_ref_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_dynptr_data; +} + +static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, + const struct bpf_map *map) +{ + int ref_obj_uses = 0; + + if (is_ptr_cast_function(func_id)) + ref_obj_uses++; + if (is_acquire_function(func_id, map)) + ref_obj_uses++; + if (is_dynptr_ref_function(func_id)) + ref_obj_uses++; + + return ref_obj_uses > 1; +} + static bool is_cmpxchg_insn(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_STX && @@ -539,45 +533,68 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } -/* string representation of 'enum bpf_reg_type' */ -static const char * const reg_type_str[] = { - [NOT_INIT] = "?", - [SCALAR_VALUE] = "inv", - [PTR_TO_CTX] = "ctx", - [CONST_PTR_TO_MAP] = "map_ptr", - [PTR_TO_MAP_VALUE] = "map_value", - [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", - [PTR_TO_STACK] = "fp", - [PTR_TO_PACKET] = "pkt", - [PTR_TO_PACKET_META] = "pkt_meta", - [PTR_TO_PACKET_END] = "pkt_end", - [PTR_TO_FLOW_KEYS] = "flow_keys", - [PTR_TO_SOCKET] = "sock", - [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", - [PTR_TO_SOCK_COMMON] = "sock_common", - [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", - [PTR_TO_TCP_SOCK] = "tcp_sock", - [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", - [PTR_TO_TP_BUFFER] = "tp_buffer", - [PTR_TO_XDP_SOCK] = "xdp_sock", - [PTR_TO_BTF_ID] = "ptr_", - [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", - [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", - [PTR_TO_MEM] = "mem", - [PTR_TO_MEM_OR_NULL] = "mem_or_null", - [PTR_TO_RDONLY_BUF] = "rdonly_buf", - [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", - [PTR_TO_RDWR_BUF] = "rdwr_buf", - [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", - [PTR_TO_FUNC] = "func", - [PTR_TO_MAP_KEY] = "map_key", -}; +/* string representation of 'enum bpf_reg_type' + * + * Note that reg_type_str() can not appear more than once in a single verbose() + * statement. + */ +static const char *reg_type_str(struct bpf_verifier_env *env, + enum bpf_reg_type type) +{ + char postfix[16] = {0}, prefix[32] = {0}; + static const char * const str[] = { + [NOT_INIT] = "?", + [SCALAR_VALUE] = "scalar", + [PTR_TO_CTX] = "ctx", + [CONST_PTR_TO_MAP] = "map_ptr", + [PTR_TO_MAP_VALUE] = "map_value", + [PTR_TO_STACK] = "fp", + [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", + [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_FLOW_KEYS] = "flow_keys", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TP_BUFFER] = "tp_buffer", + [PTR_TO_XDP_SOCK] = "xdp_sock", + [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_MEM] = "mem", + [PTR_TO_BUF] = "buf", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", + [PTR_TO_DYNPTR] = "dynptr_ptr", + }; + + if (type & PTR_MAYBE_NULL) { + if (base_type(type) == PTR_TO_BTF_ID) + strncpy(postfix, "or_null_", 16); + else + strncpy(postfix, "_or_null", 16); + } + + if (type & MEM_RDONLY) + strncpy(prefix, "rdonly_", 32); + if (type & MEM_ALLOC) + strncpy(prefix, "alloc_", 32); + if (type & MEM_USER) + strncpy(prefix, "user_", 32); + if (type & MEM_PERCPU) + strncpy(prefix, "percpu_", 32); + if (type & PTR_UNTRUSTED) + strncpy(prefix, "untrusted_", 32); + + snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", + prefix, str[base_type(type)], postfix); + return env->type_str_buf; +} static char slot_type_char[] = { [STACK_INVALID] = '?', [STACK_SPILL] = 'r', [STACK_MISC] = 'm', [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', }; static void print_liveness(struct bpf_verifier_env *env, @@ -593,6 +610,25 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +static int get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) +{ + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; +} + static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { @@ -606,6 +642,183 @@ static const char *kernel_type_name(const struct btf* btf, u32 id) return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } +static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) +{ + env->scratched_regs |= 1U << regno; +} + +static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) +{ + env->scratched_stack_slots |= 1ULL << spi; +} + +static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +{ + return (env->scratched_regs >> regno) & 1; +} + +static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) +{ + return (env->scratched_stack_slots >> regno) & 1; +} + +static bool verifier_state_scratched(const struct bpf_verifier_env *env) +{ + return env->scratched_regs || env->scratched_stack_slots; +} + +static void mark_verifier_state_clean(struct bpf_verifier_env *env) +{ + env->scratched_regs = 0U; + env->scratched_stack_slots = 0ULL; +} + +/* Used for printing the entire verifier state. */ +static void mark_verifier_state_scratched(struct bpf_verifier_env *env) +{ + env->scratched_regs = ~0U; + env->scratched_stack_slots = ~0ULL; +} + +static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) +{ + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + return BPF_DYNPTR_TYPE_LOCAL; + case DYNPTR_TYPE_RINGBUF: + return BPF_DYNPTR_TYPE_RINGBUF; + default: + return BPF_DYNPTR_TYPE_INVALID; + } +} + +static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +{ + return type == BPF_DYNPTR_TYPE_RINGBUF; +} + +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type type; + int spi, i, id; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_DYNPTR; + state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; + } + + type = arg_to_dynptr_type(arg_type); + if (type == BPF_DYNPTR_TYPE_INVALID) + return -EINVAL; + + state->stack[spi].spilled_ptr.dynptr.first_slot = true; + state->stack[spi].spilled_ptr.dynptr.type = type; + state->stack[spi - 1].spilled_ptr.dynptr.type = type; + + if (dynptr_type_refcounted(type)) { + /* The id is used to track proper releasing */ + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + state->stack[spi].spilled_ptr.id = id; + state->stack[spi - 1].spilled_ptr.id = id; + } + + return 0; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; + } + + /* Invalidate any slices associated with this dynptr */ + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + release_reference(env, state->stack[spi].spilled_ptr.id); + state->stack[spi].spilled_ptr.id = 0; + state->stack[spi - 1].spilled_ptr.id = 0; + } + + state->stack[spi].spilled_ptr.dynptr.first_slot = false; + state->stack[spi].spilled_ptr.dynptr.type = 0; + state->stack[spi - 1].spilled_ptr.dynptr.type = 0; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return true; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) + return false; + } + + return true; +} + +bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, + struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.dynptr.first_slot) + return false; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] != STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) + return false; + } + + return true; +} + +bool is_dynptr_type_expected(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type dynptr_type; + int spi = get_spi(reg->off); + + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ + if (arg_type == ARG_PTR_TO_DYNPTR) + return true; + + dynptr_type = arg_to_dynptr_type(arg_type); + + return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -621,7 +834,8 @@ static void scrub_spilled_slot(u8 *stype) } static void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state) + const struct bpf_func_state *state, + bool print_all) { const struct bpf_reg_state *reg; enum bpf_reg_type t; @@ -634,78 +848,83 @@ static void print_verifier_state(struct bpf_verifier_env *env, t = reg->type; if (t == NOT_INIT) continue; + if (!print_all && !reg_scratched(env, i)) + continue; verbose(env, " R%d", i); print_liveness(env, reg->live); - verbose(env, "=%s", reg_type_str[t]); + verbose(env, "="); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ + verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (t == PTR_TO_BTF_ID || - t == PTR_TO_BTF_ID_OR_NULL || - t == PTR_TO_PERCPU_BTF_ID) + const char *sep = ""; + + verbose(env, "%s", reg_type_str(env, t)); + if (base_type(t) == PTR_TO_BTF_ID) verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); - verbose(env, "(id=%d", reg->id); - if (reg_type_may_be_refcounted_or_null(t)) - verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); + verbose(env, "("); +/* + * _a stands for append, was shortened to avoid multiline statements below. + * This macro is used to output a comma separated list of attributes. + */ +#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; }) + + if (reg->id) + verbose_a("id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id) + verbose_a("ref_obj_id=%d", reg->ref_obj_id); if (t != SCALAR_VALUE) - verbose(env, ",off=%d", reg->off); + verbose_a("off=%d", reg->off); if (type_is_pkt_pointer(t)) - verbose(env, ",r=%d", reg->range); - else if (t == CONST_PTR_TO_MAP || - t == PTR_TO_MAP_KEY || - t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL) - verbose(env, ",ks=%d,vs=%d", - reg->map_ptr->key_size, - reg->map_ptr->value_size); + verbose_a("r=%d", reg->range); + else if (base_type(t) == CONST_PTR_TO_MAP || + base_type(t) == PTR_TO_MAP_KEY || + base_type(t) == PTR_TO_MAP_VALUE) + verbose_a("ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); if (tnum_is_const(reg->var_off)) { /* Typically an immediate SCALAR_VALUE, but * could be a pointer whose offset is too big * for reg->off */ - verbose(env, ",imm=%llx", reg->var_off.value); + verbose_a("imm=%llx", reg->var_off.value); } else { if (reg->smin_value != reg->umin_value && reg->smin_value != S64_MIN) - verbose(env, ",smin_value=%lld", - (long long)reg->smin_value); + verbose_a("smin=%lld", (long long)reg->smin_value); if (reg->smax_value != reg->umax_value && reg->smax_value != S64_MAX) - verbose(env, ",smax_value=%lld", - (long long)reg->smax_value); + verbose_a("smax=%lld", (long long)reg->smax_value); if (reg->umin_value != 0) - verbose(env, ",umin_value=%llu", - (unsigned long long)reg->umin_value); + verbose_a("umin=%llu", (unsigned long long)reg->umin_value); if (reg->umax_value != U64_MAX) - verbose(env, ",umax_value=%llu", - (unsigned long long)reg->umax_value); + verbose_a("umax=%llu", (unsigned long long)reg->umax_value); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, ",var_off=%s", tn_buf); + verbose_a("var_off=%s", tn_buf); } if (reg->s32_min_value != reg->smin_value && reg->s32_min_value != S32_MIN) - verbose(env, ",s32_min_value=%d", - (int)(reg->s32_min_value)); + verbose_a("s32_min=%d", (int)(reg->s32_min_value)); if (reg->s32_max_value != reg->smax_value && reg->s32_max_value != S32_MAX) - verbose(env, ",s32_max_value=%d", - (int)(reg->s32_max_value)); + verbose_a("s32_max=%d", (int)(reg->s32_max_value)); if (reg->u32_min_value != reg->umin_value && reg->u32_min_value != U32_MIN) - verbose(env, ",u32_min_value=%d", - (int)(reg->u32_min_value)); + verbose_a("u32_min=%d", (int)(reg->u32_min_value)); if (reg->u32_max_value != reg->umax_value && reg->u32_max_value != U32_MAX) - verbose(env, ",u32_max_value=%d", - (int)(reg->u32_max_value)); + verbose_a("u32_max=%d", (int)(reg->u32_max_value)); } +#undef verbose_a + verbose(env, ")"); } } @@ -723,12 +942,14 @@ static void print_verifier_state(struct bpf_verifier_env *env, types_buf[BPF_REG_SIZE] = 0; if (!valid) continue; + if (!print_all && !stack_slot_scratched(env, i)) + continue; verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); print_liveness(env, state->stack[i].spilled_ptr.live); if (is_spilled_reg(&state->stack[i])) { reg = &state->stack[i].spilled_ptr; t = reg->type; - verbose(env, "=%s", reg_type_str[t]); + verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) @@ -748,6 +969,26 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); + mark_verifier_state_clean(env); +} + +static inline u32 vlog_alignment(u32 pos) +{ + return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), + BPF_LOG_MIN_ALIGNMENT) - pos - 1; +} + +static void print_insn_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state) +{ + if (env->prev_log_len && env->prev_log_len == env->log.len_used) { + /* remove new line character */ + bpf_vlog_reset(&env->log, env->prev_log_len - 1); + verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' '); + } else { + verbose(env, "%d:", env->insn_idx); + } + print_verifier_state(env, state, false); } /* copy array src of length n * size bytes to dst. dst is reallocated if it's too @@ -786,12 +1027,17 @@ out: */ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { + void *new_arr; + if (!new_n || old_n == new_n) goto out; - arr = krealloc_array(arr, new_n, size, GFP_KERNEL); - if (!arr) + new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + if (!new_arr) { + kfree(arr); return NULL; + } + arr = new_arr; if (new_n > old_n) memset(arr + old_n * size, 0, (new_n - old_n) * size); @@ -867,6 +1113,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) id = ++env->id_gen; state->refs[new_ofs].id = id; state->refs[new_ofs].insn_idx = insn_idx; + state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; return id; } @@ -879,6 +1126,9 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) last_idx = state->acquired_refs - 1; for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].id == ptr_id) { + /* Cannot release caller references in callbacks */ + if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) + return -EINVAL; if (last_idx && i != last_idx) memcpy(&state->refs[i], &state->refs[last_idx], sizeof(*state->refs)); @@ -1141,8 +1391,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) { - switch (reg->type) { - case PTR_TO_MAP_VALUE_OR_NULL: { + if (base_type(reg->type) == PTR_TO_MAP_VALUE) { const struct bpf_map *map = reg->map_ptr; if (map->inner_map_meta) { @@ -1161,32 +1410,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) } else { reg->type = PTR_TO_MAP_VALUE; } - break; - } - case PTR_TO_SOCKET_OR_NULL: - reg->type = PTR_TO_SOCKET; - break; - case PTR_TO_SOCK_COMMON_OR_NULL: - reg->type = PTR_TO_SOCK_COMMON; - break; - case PTR_TO_TCP_SOCK_OR_NULL: - reg->type = PTR_TO_TCP_SOCK; - break; - case PTR_TO_BTF_ID_OR_NULL: - reg->type = PTR_TO_BTF_ID; - break; - case PTR_TO_MEM_OR_NULL: - reg->type = PTR_TO_MEM; - break; - case PTR_TO_RDONLY_BUF_OR_NULL: - reg->type = PTR_TO_RDONLY_BUF; - break; - case PTR_TO_RDWR_BUF_OR_NULL: - reg->type = PTR_TO_RDWR_BUF; - break; - default: - WARN_ONCE(1, "unknown nullable register type"); + return; } + + reg->type &= ~PTR_MAYBE_NULL; } static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) @@ -1366,22 +1593,43 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static void reg_bounds_sync(struct bpf_reg_state *reg) +{ + /* We might have learned new bounds from the var_off. */ + __update_reg_bounds(reg); + /* We might have learned something about the sign bit. */ + __reg_deduce_bounds(reg); + /* We might have learned some bits from the bounds. */ + __reg_bound_offset(reg); + /* Intersecting with the old var_off might have improved our bounds + * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. + */ + __update_reg_bounds(reg); +} + +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) @@ -1401,16 +1649,8 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) * so they do not impact tnum bounds calculation. */ __mark_reg64_unbounded(reg); - __update_reg_bounds(reg); } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } static bool __reg64_bound_s32(s64 a) @@ -1426,7 +1666,6 @@ static bool __reg64_bound_u32(u64 a) static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { reg->s32_min_value = (s32)reg->smin_value; reg->s32_max_value = (s32)reg->smax_value; @@ -1435,14 +1674,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) reg->u32_min_value = (u32)reg->umin_value; reg->u32_max_value = (u32)reg->umax_value; } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } /* Mark a register as having a completely unknown (scalar) value. */ @@ -1497,14 +1729,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, enum bpf_reg_type reg_type, - struct btf *btf, u32 btf_id) + struct btf *btf, u32 btf_id, + enum bpf_type_flag flag) { if (reg_type == SCALAR_VALUE) { mark_reg_unknown(env, regs, regno); return; } mark_reg_known_zero(env, regs, regno); - regs[regno].type = PTR_TO_BTF_ID; + regs[regno].type = PTR_TO_BTF_ID | flag; regs[regno].btf = btf; regs[regno].btf_id = btf_id; } @@ -1537,7 +1770,9 @@ static void init_func_state(struct bpf_verifier_env *env, state->callsite = callsite; state->frameno = frameno; state->subprogno = subprogno; + state->callback_ret_range = tnum_range(0, 0); init_reg_state(env, state); + mark_verifier_state_scratched(env); } /* Similar to push_stack(), but for async callbacks */ @@ -1693,7 +1928,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) } static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, - s16 offset, struct module **btf_modp) + s16 offset) { struct bpf_kfunc_btf kf_btf = { .offset = offset }; struct bpf_kfunc_btf_tab *tab; @@ -1747,8 +1982,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); } - if (btf_modp) - *btf_modp = b->module; return b->btf; } @@ -1764,9 +1997,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) kfree(tab); } -static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset, - struct module **btf_modp) +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) { if (offset) { if (offset < 0) { @@ -1777,7 +2008,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EINVAL); } - return __find_kfunc_desc_btf(env, offset, btf_modp); + return __find_kfunc_desc_btf(env, offset); } return btf_vmlinux ?: ERR_PTR(-ENOENT); } @@ -1791,6 +2022,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) struct bpf_kfunc_desc *desc; const char *func_name; struct btf *desc_btf; + unsigned long call_imm; unsigned long addr; int err; @@ -1840,7 +2072,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + desc_btf = find_kfunc_desc_btf(env, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -1875,9 +2107,17 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } + call_imm = BPF_CALL_IMM(addr); + /* Check whether or not the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = BPF_CALL_IMM(addr); + desc->imm = call_imm; desc->offset = offset; err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, @@ -2041,7 +2281,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, break; if (parent->live & REG_LIVE_DONE) { verbose(env, "verifier BUG type %s var_off %lld off %d\n", - reg_type_str[parent->type], + reg_type_str(env, parent->type), parent->var_off.value, parent->off); return -EFAULT; } @@ -2225,6 +2465,8 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return -EINVAL; } + mark_reg_scratched(env, regno); + reg = ®s[regno]; rw64 = is_reg64(env, insn, regno, reg, t); if (t == SRC_OP) { @@ -2299,7 +2541,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + desc_btf = find_kfunc_desc_btf(data, insn->off); if (IS_ERR(desc_btf)) return "<error>"; @@ -2329,7 +2571,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (insn->code == 0) return 0; - if (env->log.level & BPF_LOG_LEVEL) { + if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); verbose(env, "%d: ", idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); @@ -2379,8 +2621,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2403,8 +2643,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); @@ -2587,7 +2825,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; - if (env->log.level & BPF_LOG_LEVEL) + if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); for (i = last_idx;;) { if (skip_first) { @@ -2674,11 +2912,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, new_marks = true; reg->precise = true; } - if (env->log.level & BPF_LOG_LEVEL) { - print_verifier_state(env, func); - verbose(env, "parent %s regs=%x stack=%llx marks\n", + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "parent %s regs=%x stack=%llx marks:", new_marks ? "didn't have" : "already had", reg_mask, stack_mask); + print_verifier_state(env, func, true); } if (!reg_mask && !stack_mask) @@ -2692,7 +2930,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, return 0; } -static int mark_chain_precision(struct bpf_verifier_env *env, int regno) +int mark_chain_precision(struct bpf_verifier_env *env, int regno) { return __mark_chain_precision(env, regno, -1); } @@ -2704,9 +2942,8 @@ static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) static bool is_spillable_regtype(enum bpf_reg_type type) { - switch (type) { + switch (base_type(type)) { case PTR_TO_MAP_VALUE: - case PTR_TO_MAP_VALUE_OR_NULL: case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: @@ -2715,21 +2952,12 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: - case PTR_TO_BTF_ID_OR_NULL: - case PTR_TO_RDONLY_BUF: - case PTR_TO_RDONLY_BUF_OR_NULL: - case PTR_TO_RDWR_BUF: - case PTR_TO_RDWR_BUF_OR_NULL: - case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_BUF: case PTR_TO_MEM: - case PTR_TO_MEM_OR_NULL: case PTR_TO_FUNC: case PTR_TO_MAP_KEY: return true; @@ -2834,6 +3062,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, env->insn_aux_data[insn_idx].sanitize_stack_spill = true; } + mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { if (dst_reg != BPF_REG_FP) { @@ -2955,6 +3184,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, slot = -i - 1; spi = slot / BPF_REG_SIZE; stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; + mark_stack_slot_scratched(env, spi); if (!env->allow_ptr_leaks && *stype != NOT_INIT @@ -3156,7 +3386,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, return 0; } -enum stack_access_src { +enum bpf_access_src { ACCESS_DIRECT = 1, /* the access is performed by an instruction */ ACCESS_HELPER = 2, /* the access is performed by a helper */ }; @@ -3164,7 +3394,7 @@ enum stack_access_src { static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum stack_access_src type, + enum bpf_access_src type, struct bpf_call_arg_meta *meta); static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) @@ -3371,11 +3601,8 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, /* We may have adjusted the register pointing to memory region, so we * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. - */ - if (env->log.level & BPF_LOG_LEVEL) - print_verifier_state(env, state); - - /* The minimum value is only important with signed + * + * The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use @@ -3417,9 +3644,175 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, return 0; } +static int __check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + bool fixed_off_ok) +{ + /* Access to this pointer-typed register or passing it to a helper + * is only allowed in its original, unmodified form. + */ + + if (reg->off < 0) { + verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); + return -EACCES; + } + + if (!fixed_off_ok && reg->off) { + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "variable %s access var_off=%s disallowed\n", + reg_type_str(env, reg->type), tn_buf); + return -EACCES; + } + + return 0; +} + +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + return __check_ptr_off_reg(env, reg, regno, false); +} + +static int map_kptr_match_type(struct bpf_verifier_env *env, + struct bpf_map_value_off_desc *off_desc, + struct bpf_reg_state *reg, u32 regno) +{ + const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id); + int perm_flags = PTR_MAYBE_NULL; + const char *reg_name = ""; + + /* Only unreferenced case accepts untrusted pointers */ + if (off_desc->type == BPF_KPTR_UNREF) + perm_flags |= PTR_UNTRUSTED; + + if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) + goto bad_type; + + if (!btf_is_kernel(reg->btf)) { + verbose(env, "R%d must point to kernel BTF\n", regno); + return -EINVAL; + } + /* We need to verify reg->type and reg->btf, before accessing reg->btf */ + reg_name = kernel_type_name(reg->btf, reg->btf_id); + + /* For ref_ptr case, release function check should ensure we get one + * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the + * normal store of unreferenced kptr, we must ensure var_off is zero. + * Since ref_ptr cannot be accessed directly by BPF insns, checks for + * reg->off and reg->ref_obj_id are not needed here. + */ + if (__check_ptr_off_reg(env, reg, regno, true)) + return -EACCES; + + /* A full type match is needed, as BTF can be vmlinux or module BTF, and + * we also need to take into account the reg->off. + * + * We want to support cases like: + * + * struct foo { + * struct bar br; + * struct baz bz; + * }; + * + * struct foo *v; + * v = func(); // PTR_TO_BTF_ID + * val->foo = v; // reg->off is zero, btf and btf_id match type + * val->bar = &v->br; // reg->off is still zero, but we need to retry with + * // first member type of struct after comparison fails + * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked + * // to match type + * + * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off + * is zero. We must also ensure that btf_struct_ids_match does not walk + * the struct to match type against first member of struct, i.e. reject + * second case from above. Hence, when type is BPF_KPTR_REF, we set + * strict mode to true for type match. + */ + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + off_desc->kptr.btf, off_desc->kptr.btf_id, + off_desc->type == BPF_KPTR_REF)) + goto bad_type; + return 0; +bad_type: + verbose(env, "invalid kptr access, R%d type=%s%s ", regno, + reg_type_str(env, reg->type), reg_name); + verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name); + if (off_desc->type == BPF_KPTR_UNREF) + verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED), + targ_name); + else + verbose(env, "\n"); + return -EINVAL; +} + +static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, + int value_regno, int insn_idx, + struct bpf_map_value_off_desc *off_desc) +{ + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; + int class = BPF_CLASS(insn->code); + struct bpf_reg_state *val_reg; + + /* Things we already checked for in check_map_access and caller: + * - Reject cases where variable offset may touch kptr + * - size of access (must be BPF_DW) + * - tnum_is_const(reg->var_off) + * - off_desc->offset == off + reg->var_off.value + */ + /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */ + if (BPF_MODE(insn->code) != BPF_MEM) { + verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n"); + return -EACCES; + } + + /* We only allow loading referenced kptr, since it will be marked as + * untrusted, similar to unreferenced kptr. + */ + if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) { + verbose(env, "store to referenced kptr disallowed\n"); + return -EACCES; + } + + if (class == BPF_LDX) { + val_reg = reg_state(env, value_regno); + /* We can simply mark the value_regno receiving the pointer + * value from map as PTR_TO_BTF_ID, with the correct type. + */ + mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf, + off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED); + /* For mark_ptr_or_null_reg */ + val_reg->id = ++env->id_gen; + } else if (class == BPF_STX) { + val_reg = reg_state(env, value_regno); + if (!register_is_null(val_reg) && + map_kptr_match_type(env, off_desc, val_reg, value_regno)) + return -EACCES; + } else if (class == BPF_ST) { + if (insn->imm) { + verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n", + off_desc->offset); + return -EACCES; + } + } else { + verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n"); + return -EACCES; + } + return 0; +} + /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, - int off, int size, bool zero_size_allowed) + int off, int size, bool zero_size_allowed, + enum bpf_access_src src) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; @@ -3455,16 +3848,41 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } } + if (map_value_has_kptrs(map)) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + for (i = 0; i < tab->nr_off; i++) { + u32 p = tab->off[i].offset; + + if (reg->smin_value + off < p + sizeof(u64) && + p < reg->umax_value + off + size) { + if (src != ACCESS_DIRECT) { + verbose(env, "kptr cannot be accessed indirectly by helper\n"); + return -EACCES; + } + if (!tnum_is_const(reg->var_off)) { + verbose(env, "kptr access cannot have variable offset\n"); + return -EACCES; + } + if (p != off + reg->var_off.value) { + verbose(env, "kptr access misaligned expected=%u off=%llu\n", + p, off + reg->var_off.value); + return -EACCES; + } + if (size != bpf_size_to_bytes(BPF_DW)) { + verbose(env, "kptr access size must be BPF_DW\n"); + return -EACCES; + } + break; + } + } + } return err; } #define MAX_PACKET_OFF 0xffff -static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) -{ - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; -} - static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t) @@ -3570,7 +3988,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { + if (base_type(*reg_type) == PTR_TO_BTF_ID) { *btf = info.btf; *btf_id = info.btf_id; } else { @@ -3638,7 +4056,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, } verbose(env, "R%d invalid %s access off=%d size=%d\n", - regno, reg_type_str[reg->type], off, size); + regno, reg_type_str(env, reg->type), off, size); return -EACCES; } @@ -3933,30 +4351,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) -{ - /* Access to ctx or passing it to a helper is only allowed in - * its original, unmodified form. - */ - - if (reg->off) { - verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", - regno, reg->off); - return -EACCES; - } - - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); - return -EACCES; - } - - return 0; -} - static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -4001,9 +4395,9 @@ static int check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off, int size, bool zero_size_allowed, - const char *buf_info, u32 *max_access) { + const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr"; int err; err = __check_buffer_access(env, buf_info, reg, regno, off, size); @@ -4113,6 +4507,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg = regs + regno; const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); const char *tname = btf_name_by_offset(reg->btf, t->name_off); + enum bpf_type_flag flag = 0; u32 btf_id; int ret; @@ -4132,9 +4527,23 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } + if (reg->type & MEM_USER) { + verbose(env, + "R%d is ptr_%s access user memory: off=%d\n", + regno, tname, off); + return -EACCES; + } + + if (reg->type & MEM_PERCPU) { + verbose(env, + "R%d is ptr_%s access percpu memory: off=%d\n", + regno, tname, off); + return -EACCES; + } + if (env->ops->btf_struct_access) { ret = env->ops->btf_struct_access(&env->log, reg->btf, t, - off, size, atype, &btf_id); + off, size, atype, &btf_id, &flag); } else { if (atype != BPF_READ) { verbose(env, "only read is supported\n"); @@ -4142,14 +4551,20 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } ret = btf_struct_access(&env->log, reg->btf, t, off, size, - atype, &btf_id); + atype, &btf_id, &flag); } if (ret < 0) return ret; + /* If this is an untrusted pointer, all pointers formed by walking it + * also inherit the untrusted flag. + */ + if (type_flag(reg->type) & PTR_UNTRUSTED) + flag |= PTR_UNTRUSTED; + if (atype == BPF_READ && value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); return 0; } @@ -4162,6 +4577,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, { struct bpf_reg_state *reg = regs + regno; struct bpf_map *map = reg->map_ptr; + enum bpf_type_flag flag = 0; const struct btf_type *t; const char *tname; u32 btf_id; @@ -4199,12 +4615,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return -EACCES; } - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id); + ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag); if (ret < 0) return ret; if (value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag); return 0; } @@ -4239,7 +4655,7 @@ static int check_stack_slot_within_bounds(int off, static int check_stack_access_within_bounds( struct bpf_verifier_env *env, int regno, int off, int access_size, - enum stack_access_src src, enum bpf_access_type type) + enum bpf_access_src src, enum bpf_access_type type) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; @@ -4335,6 +4751,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_MAP_VALUE) { + struct bpf_map_value_off_desc *kptr_off_desc = NULL; + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@ -4343,8 +4761,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_map_access_type(env, regno, off, size, t); if (err) return err; - err = check_map_access(env, regno, off, size, false); - if (!err && t == BPF_READ && value_regno >= 0) { + err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT); + if (err) + return err; + if (tnum_is_const(reg->var_off)) + kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr, + off + reg->var_off.value); + if (kptr_off_desc) { + err = check_map_kptr_access(env, regno, value_regno, insn_idx, + kptr_off_desc); + } else if (t == BPF_READ && value_regno >= 0) { struct bpf_map *map = reg->map_ptr; /* if map is read-only, track its contents as scalars */ @@ -4365,15 +4791,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn mark_reg_unknown(env, regs, value_regno); } } - } else if (reg->type == PTR_TO_MEM) { + } else if (base_type(reg->type) == PTR_TO_MEM) { + bool rdonly_mem = type_is_rdonly_mem(reg->type); + + if (type_may_be_null(reg->type)) { + verbose(env, "R%d invalid mem access '%s'\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + + if (t == BPF_WRITE && rdonly_mem) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into mem\n", value_regno); return -EACCES; } + err = check_mem_region_access(env, regno, off, size, reg->mem_size, false); - if (!err && t == BPF_READ && value_regno >= 0) + if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -4386,11 +4827,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_ctx_reg(env, reg, regno); + err = check_ptr_off_reg(env, reg, regno); if (err < 0) return err; - err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, &btf_id); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, + &btf_id); if (err) verbose_linfo(env, insn_idx, "; "); if (!err && t == BPF_READ && value_regno >= 0) { @@ -4403,7 +4845,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else { mark_reg_known_zero(env, regs, value_regno); - if (reg_type_may_be_null(reg_type)) + if (type_may_be_null(reg_type)) regs[value_regno].id = ++env->id_gen; /* A load of ctx field could have different * actual load size with the one encoded in the @@ -4411,8 +4853,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; - if (reg_type == PTR_TO_BTF_ID || - reg_type == PTR_TO_BTF_ID_OR_NULL) { + if (base_type(reg_type) == PTR_TO_BTF_ID) { regs[value_regno].btf = btf; regs[value_regno].btf_id = btf_id; } @@ -4465,7 +4906,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str[reg->type]); + regno, reg_type_str(env, reg->type)); return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); @@ -4475,32 +4916,36 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_tp_buffer_access(env, reg, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_BTF_ID) { + } else if (base_type(reg->type) == PTR_TO_BTF_ID && + !type_may_be_null(reg->type)) { err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno); } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); - } else if (reg->type == PTR_TO_RDONLY_BUF) { - if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str[reg->type]); - return -EACCES; + } else if (base_type(reg->type) == PTR_TO_BUF) { + bool rdonly_mem = type_is_rdonly_mem(reg->type); + u32 *max_access; + + if (rdonly_mem) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + max_access = &env->prog->aux->max_rdonly_access; + } else { + max_access = &env->prog->aux->max_rdwr_access; } + err = check_buffer_access(env, reg, regno, off, size, false, - "rdonly", - &env->prog->aux->max_rdonly_access); - if (!err && value_regno >= 0) - mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_RDWR_BUF) { - err = check_buffer_access(env, reg, regno, off, size, false, - "rdwr", - &env->prog->aux->max_rdwr_access); - if (!err && t == BPF_READ && value_regno >= 0) + max_access); + + if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EACCES; } @@ -4551,9 +4996,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (insn->imm == BPF_CMPXCHG) { /* Check comparison of R0 with memory location */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + const u32 aux_reg = BPF_REG_0; + + err = check_reg_arg(env, aux_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, aux_reg)) { + verbose(env, "R%d leaks addr into mem\n", aux_reg); + return -EACCES; + } } if (is_pointer_value(env, insn->src_reg)) { @@ -4567,7 +5019,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str[reg_state(env, insn->dst_reg)->type]); + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); return -EACCES; } @@ -4588,13 +5040,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i load_reg = -1; } - /* check whether we can read the memory */ + /* Check whether we can read the memory, with second call for fetch + * case to simulate the register fill. + */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, true); + BPF_SIZE(insn->code), BPF_READ, -1, true); + if (!err && load_reg >= 0) + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, load_reg, + true); if (err) return err; - /* check whether we can write into the same memory */ + /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); if (err) @@ -4615,7 +5073,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i static int check_stack_range_initialized( struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum stack_access_src type, struct bpf_call_arg_meta *meta) + enum bpf_access_src type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); @@ -4702,7 +5160,7 @@ static int check_stack_range_initialized( } if (is_spilled_reg(&state->stack[spi]) && - state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) + base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID) goto mark; if (is_spilled_reg(&state->stack[spi]) && @@ -4744,13 +5202,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + u32 *max_access; - switch (reg->type) { + switch (base_type(reg->type)) { case PTR_TO_PACKET: case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } return check_mem_region_access(env, regno, reg->off, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: @@ -4759,62 +5223,183 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, BPF_READ)) return -EACCES; return check_map_access(env, regno, reg->off, access_size, - zero_size_allowed); + zero_size_allowed, ACCESS_HELPER); case PTR_TO_MEM: + if (type_is_rdonly_mem(reg->type)) { + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + } return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); - case PTR_TO_RDONLY_BUF: - if (meta && meta->raw_mode) - return -EACCES; - return check_buffer_access(env, reg, regno, reg->off, - access_size, zero_size_allowed, - "rdonly", - &env->prog->aux->max_rdonly_access); - case PTR_TO_RDWR_BUF: + case PTR_TO_BUF: + if (type_is_rdonly_mem(reg->type)) { + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + + max_access = &env->prog->aux->max_rdonly_access; + } else { + max_access = &env->prog->aux->max_rdwr_access; + } return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, - "rdwr", - &env->prog->aux->max_rdwr_access); + max_access); case PTR_TO_STACK: return check_stack_range_initialized( env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER, meta); + case PTR_TO_CTX: + /* in case the function doesn't know how to access the context, + * (because we are in a program of type SYSCALL for example), we + * can not statically check its size. + * Dynamically check it now. + */ + if (!env->ops->convert_ctx_access) { + enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ; + int offset = access_size - 1; + + /* Allow zero-byte read from PTR_TO_CTX */ + if (access_size == 0) + return zero_size_allowed ? 0 : -EACCES; + + return check_mem_access(env, env->insn_idx, regno, offset, BPF_B, + atype, -1, false); + } + + fallthrough; default: /* scalar_value or invalid ptr */ /* Allow zero-byte read from NULL, regardless of pointer type */ if (zero_size_allowed && access_size == 0 && register_is_null(reg)) return 0; - verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[reg->type], - reg_type_str[PTR_TO_STACK]); + verbose(env, "R%d type=%s ", regno, + reg_type_str(env, reg->type)); + verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); return -EACCES; } } +static int check_mem_size_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + bool zero_size_allowed, + struct bpf_call_arg_meta *meta) +{ + int err; + + /* This is used to refine r0 return value bounds for helpers + * that enforce this value as an upper bound on return values. + * See do_refine_retval_range() for helpers that can refine + * the return value. C type of helper is u32 so we pull register + * bound from umax_value however, if negative verifier errors + * out. Only upper bounds can be learned because retval is an + * int type and negative retvals are allowed. + */ + meta->msize_max_value = reg->umax_value; + + /* The register is SCALAR_VALUE; the access check + * happens using its boundaries. + */ + if (!tnum_is_const(reg->var_off)) + /* For unprivileged variable accesses, disable raw + * mode so that the program is required to + * initialize all the memory that the helper could + * just partially fill up. + */ + meta = NULL; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", + regno); + return -EACCES; + } + + if (reg->umin_value == 0) { + err = check_helper_mem_access(env, regno - 1, 0, + zero_size_allowed, + meta); + if (err) + return err; + } + + if (reg->umax_value >= BPF_MAX_VAR_SIZ) { + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + regno); + return -EACCES; + } + err = check_helper_mem_access(env, regno - 1, + reg->umax_value, + zero_size_allowed, meta); + if (!err) + err = mark_chain_precision(env, regno); + return err; +} + int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size) { + bool may_be_null = type_may_be_null(reg->type); + struct bpf_reg_state saved_reg; + struct bpf_call_arg_meta meta; + int err; + if (register_is_null(reg)) return 0; - if (reg_type_may_be_null(reg->type)) { - /* Assuming that the register contains a value check if the memory - * access is safe. Temporarily save and restore the register's state as - * the conversion shouldn't be visible to a caller. - */ - const struct bpf_reg_state saved_reg = *reg; - int rv; - + memset(&meta, 0, sizeof(meta)); + /* Assuming that the register contains a value check if the memory + * access is safe. Temporarily save and restore the register's state as + * the conversion shouldn't be visible to a caller. + */ + if (may_be_null) { + saved_reg = *reg; mark_ptr_not_null_reg(reg); - rv = check_helper_mem_access(env, regno, mem_size, true, NULL); + } + + err = check_helper_mem_access(env, regno, mem_size, true, &meta); + /* Check access for BPF_WRITE */ + meta.raw_mode = true; + err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta); + + if (may_be_null) *reg = saved_reg; - return rv; + + return err; +} + +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) +{ + struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; + bool may_be_null = type_may_be_null(mem_reg->type); + struct bpf_reg_state saved_reg; + struct bpf_call_arg_meta meta; + int err; + + WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); + + memset(&meta, 0, sizeof(meta)); + + if (may_be_null) { + saved_reg = *mem_reg; + mark_ptr_not_null_reg(mem_reg); } - return check_helper_mem_access(env, regno, mem_size, true, NULL); + err = check_mem_size_reg(env, reg, regno, true, &meta); + /* Check access for BPF_WRITE */ + meta.raw_mode = true; + err = err ?: check_mem_size_reg(env, reg, regno, true, &meta); + + if (may_be_null) + *mem_reg = saved_reg; + return err; } /* Implementation details: @@ -4946,11 +5531,51 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, return 0; } -static bool arg_type_is_mem_ptr(enum bpf_arg_type type) +static int process_kptr_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) { - return type == ARG_PTR_TO_MEM || - type == ARG_PTR_TO_MEM_OR_NULL || - type == ARG_PTR_TO_UNINIT_MEM; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map_value_off_desc *off_desc; + struct bpf_map *map_ptr = reg->map_ptr; + u32 kptr_off; + int ret; + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. kptr has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + if (!map_value_has_kptrs(map_ptr)) { + ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab); + if (ret == -E2BIG) + verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, + BPF_MAP_VALUE_OFF_MAX); + else if (ret == -EEXIST) + verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name); + else + verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + return -EINVAL; + } + + meta->map_ptr = map_ptr; + kptr_off = reg->off + reg->var_off.value; + off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off); + if (!off_desc) { + verbose(env, "off=%d doesn't point to kptr\n", kptr_off); + return -EACCES; + } + if (off_desc->type != BPF_KPTR_REF) { + verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); + return -EACCES; + } + meta->kptr_off_desc = off_desc; + return 0; } static bool arg_type_is_mem_size(enum bpf_arg_type type) @@ -4959,15 +5584,14 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } -static bool arg_type_is_alloc_size(enum bpf_arg_type type) +static bool arg_type_is_release(enum bpf_arg_type type) { - return type == ARG_CONST_ALLOC_SIZE_OR_ZERO; + return type & OBJ_RELEASE; } -static bool arg_type_is_int_ptr(enum bpf_arg_type type) +static bool arg_type_is_dynptr(enum bpf_arg_type type) { - return type == ARG_PTR_TO_INT || - type == ARG_PTR_TO_LONG; + return base_type(type) == ARG_PTR_TO_DYNPTR; } static int int_ptr_type_to_size(enum bpf_arg_type type) @@ -5055,8 +5679,8 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, - PTR_TO_RDONLY_BUF, - PTR_TO_RDWR_BUF, + PTR_TO_MEM | MEM_ALLOC, + PTR_TO_BUF, }, }; @@ -5073,64 +5697,82 @@ static const struct bpf_reg_types int_ptr_types = { static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; +static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } }; static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types dynptr_types = { + .types = { + PTR_TO_STACK, + PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL, + } +}; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_MAP_PTR] = &const_map_ptr_types, [ARG_PTR_TO_CTX] = &context_types, - [ARG_PTR_TO_CTX_OR_NULL] = &context_types, [ARG_PTR_TO_SOCK_COMMON] = &sock_types, #ifdef CONFIG_NET [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, #endif [ARG_PTR_TO_SOCKET] = &fullsock_types, - [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, - [ARG_PTR_TO_UNINIT_MEM] = &mem_types, [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, - [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, - [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, + [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, + [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_PTR_TO_DYNPTR] = &dynptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, - const u32 *arg_btf_id) + const u32 *arg_btf_id, + struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected, type = reg->type; const struct bpf_reg_types *compatible; int i, j; - compatible = compatible_reg_types[arg_type]; + compatible = compatible_reg_types[base_type(arg_type)]; if (!compatible) { verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); return -EFAULT; } + /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, + * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY + * + * Same for MAYBE_NULL: + * + * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, + * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL + * + * Therefore we fold these flags depending on the arg_type before comparison. + */ + if (arg_type & MEM_RDONLY) + type &= ~MEM_RDONLY; + if (arg_type & PTR_MAYBE_NULL) + type &= ~PTR_MAYBE_NULL; + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; if (expected == NOT_INIT) @@ -5140,14 +5782,21 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, goto found; } - verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); + verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); for (j = 0; j + 1 < i; j++) - verbose(env, "%s, ", reg_type_str[compatible->types[j]]); - verbose(env, "%s\n", reg_type_str[compatible->types[j]]); + verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); + verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); return -EACCES; found: - if (type == PTR_TO_BTF_ID) { + if (reg->type == PTR_TO_BTF_ID) { + /* For bpf_sk_release, it needs to match against first member + * 'struct sock_common', hence make an exception for it. This + * allows bpf_sk_release to work for multiple socket types. + */ + bool strict_type_match = arg_type_is_release(arg_type) && + meta->func_id != BPF_FUNC_sk_release; + if (!arg_btf_id) { if (!compatible->btf_id) { verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); @@ -5156,22 +5805,93 @@ found: arg_btf_id = compatible->btf_id; } - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, - btf_vmlinux, *arg_btf_id)) { - verbose(env, "R%d is of type %s but %s is expected\n", - regno, kernel_type_name(reg->btf, reg->btf_id), - kernel_type_name(btf_vmlinux, *arg_btf_id)); - return -EACCES; + if (meta->func_id == BPF_FUNC_kptr_xchg) { + if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno)) + return -EACCES; + } else { + if (arg_btf_id == BPF_PTR_POISON) { + verbose(env, "verifier internal error:"); + verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n", + regno); + return -EACCES; + } + + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + btf_vmlinux, *arg_btf_id, + strict_type_match)) { + verbose(env, "R%d is of type %s but %s is expected\n", + regno, kernel_type_name(reg->btf, reg->btf_id), + kernel_type_name(btf_vmlinux, *arg_btf_id)); + return -EACCES; + } } + } + + return 0; +} + +int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + enum bpf_arg_type arg_type) +{ + enum bpf_reg_type type = reg->type; + bool fixed_off_ok = false; - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", + switch ((u32)type) { + /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_STACK: + if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset\n"); + return -EINVAL; + } + fallthrough; + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: + case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case SCALAR_VALUE: + /* Some of the argument types nevertheless require a + * zero register offset. + */ + if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM) + return 0; + break; + /* All the rest must be rejected, except PTR_TO_BTF_ID which allows + * fixed offset. + */ + case PTR_TO_BTF_ID: + /* When referenced PTR_TO_BTF_ID is passed to release function, + * it's fixed offset must be 0. In the other cases, fixed offset + * can be non-zero. + */ + if (arg_type_is_release(arg_type) && reg->off) { + verbose(env, "R%d must have zero offset when passed to release func\n", regno); - return -EACCES; + return -EINVAL; } + /* For arg is release pointer, fixed_off_ok must be false, but + * we already checked and rejected reg->off != 0 above, so set + * to true to allow fixed offset for all other cases. + */ + fixed_off_ok = true; + break; + default: + break; } + return __check_ptr_off_reg(env, reg, regno, fixed_off_ok); +} - return 0; +static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + return state->stack[spi].spilled_ptr.id; } static int check_func_arg(struct bpf_verifier_env *env, u32 arg, @@ -5182,6 +5902,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_reg_type type = reg->type; + u32 *arg_btf_id = NULL; int err = 0; if (arg_type == ARG_DONTCARE) @@ -5206,31 +5927,53 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (arg_type == ARG_PTR_TO_MAP_VALUE || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || - arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { + if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { err = resolve_map_arg_type(env, meta, &arg_type); if (err) return err; } - if (register_is_null(reg) && arg_type_may_be_null(arg_type)) + if (register_is_null(reg) && type_may_be_null(arg_type)) /* A NULL register has a SCALAR_VALUE type, so skip * type checking. */ goto skip_type_check; - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); + /* arg_btf_id and arg_size are in a union. */ + if (base_type(arg_type) == ARG_PTR_TO_BTF_ID) + arg_btf_id = fn->arg_btf_id[arg]; + + err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); if (err) return err; - if (type == PTR_TO_CTX) { - err = check_ctx_reg(env, reg, regno); - if (err < 0) - return err; - } + err = check_func_arg_reg_off(env, reg, regno, arg_type); + if (err) + return err; skip_type_check: + if (arg_type_is_release(arg_type)) { + if (arg_type_is_dynptr(arg_type)) { + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.id) { + verbose(env, "arg %d is an unacquired reference\n", regno); + return -EINVAL; + } + } else if (!reg->ref_obj_id && !register_is_null(reg)) { + verbose(env, "R%d must be referenced when passed to release function\n", + regno); + return -EINVAL; + } + if (meta->release_regno) { + verbose(env, "verifier internal error: more than one release argument\n"); + return -EFAULT; + } + meta->release_regno = regno; + } + if (reg->ref_obj_id) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", @@ -5241,7 +5984,8 @@ skip_type_check: meta->ref_obj_id = reg->ref_obj_id; } - if (arg_type == ARG_CONST_MAP_PTR) { + switch (base_type(arg_type)) { + case ARG_CONST_MAP_PTR: /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ if (meta->map_ptr) { /* Use map_uid (which is unique id of inner map) to reject: @@ -5266,7 +6010,8 @@ skip_type_check: } meta->map_ptr = reg->map_ptr; meta->map_uid = reg->map_uid; - } else if (arg_type == ARG_PTR_TO_MAP_KEY) { + break; + case ARG_PTR_TO_MAP_KEY: /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within * stack limits and initialized @@ -5283,10 +6028,11 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (arg_type == ARG_PTR_TO_MAP_VALUE || - (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && - !register_is_null(reg)) || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { + break; + case ARG_PTR_TO_MAP_VALUE: + if (type_may_be_null(arg_type) && register_is_null(reg)) + return 0; + /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ @@ -5295,18 +6041,20 @@ skip_type_check: verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); + meta->raw_mode = arg_type & MEM_UNINIT; err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); - } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { + break; + case ARG_PTR_TO_PERCPU_BTF_ID: if (!reg->btf_id) { verbose(env, "Helper has invalid btf_id in R%d\n", regno); return -EACCES; } meta->ret_btf = reg->btf; meta->ret_btf_id = reg->btf_id; - } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + break; + case ARG_PTR_TO_SPIN_LOCK: if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) return -EACCES; @@ -5317,79 +6065,103 @@ skip_type_check: verbose(env, "verifier internal error\n"); return -EFAULT; } - } else if (arg_type == ARG_PTR_TO_TIMER) { + break; + case ARG_PTR_TO_TIMER: if (process_timer_func(env, regno, meta)) return -EACCES; - } else if (arg_type == ARG_PTR_TO_FUNC) { + break; + case ARG_PTR_TO_FUNC: meta->subprogno = reg->subprogno; - } else if (arg_type_is_mem_ptr(arg_type)) { + break; + case ARG_PTR_TO_MEM: /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. */ - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM); - } else if (arg_type_is_mem_size(arg_type)) { - bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - - /* This is used to refine r0 return value bounds for helpers - * that enforce this value as an upper bound on return values. - * See do_refine_retval_range() for helpers that can refine - * the return value. C type of helper is u32 so we pull register - * bound from umax_value however, if negative verifier errors - * out. Only upper bounds can be learned because retval is an - * int type and negative retvals are allowed. + meta->raw_mode = arg_type & MEM_UNINIT; + if (arg_type & MEM_FIXED_SIZE) { + err = check_helper_mem_access(env, regno, + fn->arg_size[arg], false, + meta); + } + break; + case ARG_CONST_SIZE: + err = check_mem_size_reg(env, reg, regno, false, meta); + break; + case ARG_CONST_SIZE_OR_ZERO: + err = check_mem_size_reg(env, reg, regno, true, meta); + break; + case ARG_PTR_TO_DYNPTR: + /* We only need to check for initialized / uninitialized helper + * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the + * assumption is that if it is, that a helper function + * initialized the dynptr on behalf of the BPF program. */ - meta->msize_max_value = reg->umax_value; + if (base_type(reg->type) == PTR_TO_DYNPTR) + break; + if (arg_type & MEM_UNINIT) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { + verbose(env, "Dynptr has to be an uninitialized dynptr\n"); + return -EINVAL; + } - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. - */ - if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. + /* We only support one dynptr being uninitialized at the moment, + * which is sufficient for the helper functions we have right now. */ - meta = NULL; - - if (reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", - regno); - return -EACCES; - } + if (meta->uninit_dynptr_regno) { + verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); + return -EFAULT; + } - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; - } + meta->uninit_dynptr_regno = regno; + } else if (!is_dynptr_reg_valid_init(env, reg)) { + verbose(env, + "Expected an initialized dynptr as arg #%d\n", + arg + 1); + return -EINVAL; + } else if (!is_dynptr_type_expected(env, reg, arg_type)) { + const char *err_extra = ""; - if (reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", - regno); - return -EACCES; + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + err_extra = "local"; + break; + case DYNPTR_TYPE_RINGBUF: + err_extra = "ringbuf"; + break; + default: + err_extra = "<unknown>"; + break; + } + verbose(env, + "Expected a dynptr of type %s as arg #%d\n", + err_extra, arg + 1); + return -EINVAL; } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); - if (!err) - err = mark_chain_precision(env, regno); - } else if (arg_type_is_alloc_size(arg_type)) { + break; + case ARG_CONST_ALLOC_SIZE_OR_ZERO: if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", regno); return -EACCES; } meta->mem_size = reg->var_off.value; - } else if (arg_type_is_int_ptr(arg_type)) { + err = mark_chain_precision(env, regno); + if (err) + return err; + break; + case ARG_PTR_TO_INT: + case ARG_PTR_TO_LONG: + { int size = int_ptr_type_to_size(arg_type); err = check_helper_mem_access(env, regno, size, false, meta); if (err) return err; err = check_ptr_alignment(env, reg, 0, size, true); - } else if (arg_type == ARG_PTR_TO_CONST_STR) { + break; + } + case ARG_PTR_TO_CONST_STR: + { struct bpf_map *map = reg->map_ptr; int map_off; u64 map_addr; @@ -5411,7 +6183,8 @@ skip_type_check: } err = check_map_access(env, regno, reg->off, - map->value_size - reg->off, false); + map->value_size - reg->off, false, + ACCESS_HELPER); if (err) return err; @@ -5427,6 +6200,12 @@ skip_type_check: verbose(env, "string is not zero-terminated\n"); return -EINVAL; } + break; + } + case ARG_PTR_TO_KPTR: + if (process_kptr_func(env, regno, meta)) + return -EACCES; + break; } return err; @@ -5466,7 +6245,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) { - return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); + return env->prog->jit_requested && + bpf_jit_supports_subprog_tailcalls(); } static int check_map_func_compatibility(struct bpf_verifier_env *env, @@ -5492,7 +6272,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_query) + func_id != BPF_FUNC_ringbuf_query && + func_id != BPF_FUNC_ringbuf_reserve_dynptr && + func_id != BPF_FUNC_ringbuf_submit_dynptr && + func_id != BPF_FUNC_ringbuf_discard_dynptr) + goto error; + break; + case BPF_MAP_TYPE_USER_RINGBUF: + if (func_id != BPF_FUNC_user_ringbuf_drain) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -5608,9 +6395,16 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_ringbuf_output: case BPF_FUNC_ringbuf_reserve: case BPF_FUNC_ringbuf_query: + case BPF_FUNC_ringbuf_reserve_dynptr: + case BPF_FUNC_ringbuf_submit_dynptr: + case BPF_FUNC_ringbuf_discard_dynptr: if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; + case BPF_FUNC_user_ringbuf_drain: + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) + goto error; + break; case BPF_FUNC_get_stackid: if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; @@ -5662,6 +6456,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) goto error; break; + case BPF_FUNC_map_lookup_percpu_elem: + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -5710,13 +6510,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) return count <= 1; } -static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, - enum bpf_arg_type arg_next) +static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg) { - return (arg_type_is_mem_ptr(arg_curr) && - !arg_type_is_mem_size(arg_next)) || - (!arg_type_is_mem_ptr(arg_curr) && - arg_type_is_mem_size(arg_next)); + bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE; + bool has_size = fn->arg_size[arg] != 0; + bool is_next_size = false; + + if (arg + 1 < ARRAY_SIZE(fn->arg_type)) + is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]); + + if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM) + return is_next_size; + + return has_size == is_next_size || is_next_size == is_fixed; } static bool check_arg_pair_ok(const struct bpf_func_proto *fn) @@ -5727,52 +6533,28 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) * helper function specification. */ if (arg_type_is_mem_size(fn->arg1_type) || - arg_type_is_mem_ptr(fn->arg5_type) || - check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || - check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || - check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || - check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) + check_args_pair_invalid(fn, 0) || + check_args_pair_invalid(fn, 1) || + check_args_pair_invalid(fn, 2) || + check_args_pair_invalid(fn, 3) || + check_args_pair_invalid(fn, 4)) return false; return true; } -static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) -{ - int count = 0; - - if (arg_type_may_be_refcounted(fn->arg1_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg2_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg3_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg4_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg5_type)) - count++; - - /* A reference acquiring function cannot acquire - * another refcounted ptr. - */ - if (may_be_acquire_function(func_id) && count) - return false; - - /* We only support one arg being unreferenced at the moment, - * which is sufficient for the helper functions we have right now. - */ - return count <= 1; -} - static bool check_btf_id_ok(const struct bpf_func_proto *fn) { int i; for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) return false; - if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && + /* arg_btf_id and arg_size are in a union. */ + (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || + !(fn->arg_type[i] & MEM_FIXED_SIZE))) return false; } @@ -5783,38 +6565,21 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && - check_btf_id_ok(fn) && - check_refcount_ok(fn, func_id) ? 0 : -EINVAL; + check_btf_id_ok(fn) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. */ -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, - struct bpf_func_state *state) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (reg_is_pkt_pointer_any(®s[i])) - mark_reg_unknown(env, regs, i); + struct bpf_func_state *state; + struct bpf_reg_state *reg; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg_is_pkt_pointer_any(reg)) __mark_reg_unknown(env, reg); - } -} - -static void clear_all_pkt_pointers(struct bpf_verifier_env *env) -{ - struct bpf_verifier_state *vstate = env->cur_state; - int i; - - for (i = 0; i <= vstate->curframe; i++) - __clear_all_pkt_pointers(env, vstate->frame[i]); + })); } enum { @@ -5843,41 +6608,28 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } -static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, - int ref_obj_id) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].ref_obj_id == ref_obj_id) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); - } -} - /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { - struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state; + struct bpf_reg_state *reg; int err; - int i; err = release_reference_state(cur_func(env), ref_obj_id); if (err) return err; - for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], ref_obj_id); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); + } + })); return 0; } @@ -5925,7 +6677,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn func_info_aux = env->prog->aux->func_info_aux; if (func_info_aux) is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_subprog_arg_match(env, subprog, caller->regs); + err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; if (is_global) { @@ -5950,6 +6702,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn } if (insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) { struct bpf_verifier_state *async_cb; @@ -6008,9 +6761,9 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller); + print_verifier_state(env, caller, true); verbose(env, "callee:\n"); - print_verifier_state(env, callee); + print_verifier_state(env, callee, true); } return 0; } @@ -6098,6 +6851,29 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return err; callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); + return 0; +} + +static int set_loop_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, + * u64 flags); + * callback_fn(u32 index, void *callback_ctx); + */ + callee->regs[BPF_REG_1].type = SCALAR_VALUE; + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } @@ -6127,6 +6903,59 @@ static int set_timer_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_async_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); + return 0; +} + +static int set_find_vma_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_find_vma(struct task_struct *task, u64 addr, + * void *callback_fn, void *callback_ctx, u64 flags) + * (callback_fn)(struct task_struct *task, + * struct vm_area_struct *vma, void *callback_ctx); + */ + callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; + + callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].btf = btf_vmlinux; + callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA], + + /* pointer to stack or null */ + callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); + return 0; +} + +static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void + * callback_ctx, u64 flags); + * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx); + */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_0]); + callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } @@ -6154,7 +6983,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller = state->frame[state->curframe]; if (callee->in_callback_fn) { /* enforce R0 return value range [0, 1]. */ - struct tnum range = tnum_range(0, 1); + struct tnum range = callee->callback_ret_range; if (r0->type != SCALAR_VALUE) { verbose(env, "R0 not a scalar value\n"); @@ -6169,17 +6998,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller->regs[BPF_REG_0] = *r0; } - /* Transfer references to the caller */ - err = copy_reference_state(caller, callee); - if (err) - return err; + /* callback_fn frame should have released its own additions to parent's + * reference state at this point, or check_reference_leak would + * complain, hence it must be the same as the caller. There is no need + * to copy it back. + */ + if (!callee->in_callback_fn) { + /* Transfer references to the caller */ + err = copy_reference_state(caller, callee); + if (err) + return err; + } *insn_idx = callee->callsite + 1; if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee); + print_verifier_state(env, callee, true); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller); + print_verifier_state(env, caller, true); } /* clear everything in the callee */ free_func_state(callee); @@ -6205,9 +7041,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, ret_reg->s32_max_value = meta->msize_max_value; ret_reg->smin_value = -MAX_ERRNO; ret_reg->s32_min_value = -MAX_ERRNO; - __reg_deduce_bounds(ret_reg); - __reg_bound_offset(ret_reg); - __update_reg_bounds(ret_reg); + reg_bounds_sync(ret_reg); } static int @@ -6225,7 +7059,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_pop_elem && func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_for_each_map_elem && - func_id != BPF_FUNC_redirect_map) + func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_percpu_elem) return 0; if (map == NULL) { @@ -6262,8 +7097,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; struct bpf_reg_state *regs = cur_regs(env), *reg; struct bpf_map *map = meta->map_ptr; - struct tnum range; - u64 val; + u64 val, max; int err; if (func_id != BPF_FUNC_tail_call) @@ -6273,10 +7107,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return -EINVAL; } - range = tnum_range(0, map->max_entries - 1); reg = ®s[BPF_REG_3]; + val = reg->var_off.value; + max = map->max_entries; - if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { + if (!(register_is_const(reg) && val < max)) { bpf_map_key_store(aux, BPF_MAP_KEY_POISON); return 0; } @@ -6284,8 +7119,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, err = mark_chain_precision(env, BPF_REG_3); if (err) return err; - - val = reg->var_off.value; if (bpf_map_key_unseen(aux)) bpf_map_key_store(aux, val); else if (!bpf_map_key_poisoned(aux) && @@ -6297,13 +7130,20 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, static int check_reference_leak(struct bpf_verifier_env *env) { struct bpf_func_state *state = cur_func(env); + bool refs_lingering = false; int i; + if (state->frameno && !state->in_callback_fn) + return 0; + for (i = 0; i < state->acquired_refs; i++) { + if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) + continue; verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx); + refs_lingering = true; } - return state->acquired_refs ? -EINVAL : 0; + return refs_lingering ? -EINVAL : 0; } static int check_bpf_snprintf_call(struct bpf_verifier_env *env, @@ -6345,13 +7185,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, static int check_get_func_ip(struct bpf_verifier_env *env) { - enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); int func_id = BPF_FUNC_get_func_ip; if (type == BPF_PROG_TYPE_TRACING) { - if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && - eatype != BPF_MODIFY_RETURN) { + if (!bpf_prog_has_trampoline(env->prog)) { verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", func_id_name(func_id), func_id); return -ENOTSUPP; @@ -6366,10 +7204,48 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static bool loop_flag_is_zero(struct bpf_verifier_env *env) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[BPF_REG_4]; + bool reg_is_null = register_is_null(reg); + + if (reg_is_null) + mark_chain_precision(env, BPF_REG_4); + + return reg_is_null; +} + +static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) +{ + struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; + + if (!state->initialized) { + state->initialized = 1; + state->fit_for_inline = loop_flag_is_zero(env); + state->callback_subprogno = subprogno; + return; + } + + if (!state->fit_for_inline) + return; + + state->fit_for_inline = (loop_flag_is_zero(env) && + state->callback_subprogno == subprogno); +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); const struct bpf_func_proto *fn = NULL; + enum bpf_return_type ret_type; + enum bpf_type_flag ret_flag; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; int insn_idx = *insn_idx_p; @@ -6447,14 +7323,35 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (func_id == BPF_FUNC_tail_call) { - err = check_reference_leak(env); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); - return err; + regs = cur_regs(env); + + if (meta.uninit_dynptr_regno) { + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; } - } else if (is_release_function(func_id)) { - err = release_reference(env, meta.ref_obj_id); + + err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], + fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], + insn_idx); + if (err) + return err; + } + + if (meta.release_regno) { + err = -EINVAL; + if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) + err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + else if (meta.ref_obj_id) + err = release_reference(env, meta.ref_obj_id); + /* meta.ref_obj_id can only be 0 if register that is meant to be + * released is NULL, which must be > R0. + */ + else if (register_is_null(®s[meta.release_regno])) + err = 0; if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id); @@ -6462,37 +7359,94 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } - regs = cur_regs(env); - - /* check that flags argument in get_local_storage(map, flags) is 0, - * this is required because get_local_storage() can't return an error. - */ - if (func_id == BPF_FUNC_get_local_storage && - !register_is_null(®s[BPF_REG_2])) { - verbose(env, "get_local_storage() doesn't support non-zero flags\n"); - return -EINVAL; - } - - if (func_id == BPF_FUNC_for_each_map_elem) { + switch (func_id) { + case BPF_FUNC_tail_call: + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + break; + case BPF_FUNC_get_local_storage: + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (!register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + break; + case BPF_FUNC_for_each_map_elem: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_map_elem_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_timer_set_callback) { + break; + case BPF_FUNC_timer_set_callback: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_timer_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_snprintf) { + break; + case BPF_FUNC_find_vma: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_find_vma_callback_state); + break; + case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); - if (err < 0) - return err; + break; + case BPF_FUNC_loop: + update_loop_inline_state(env, meta.subprogno); + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_loop_callback_state); + break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { + verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", + reg_type_str(env, regs[BPF_REG_1].type)); + return -EACCES; + } + break; + case BPF_FUNC_set_retval: + if (prog_type == BPF_PROG_TYPE_LSM && + env->prog->expected_attach_type == BPF_LSM_CGROUP) { + if (!env->prog->aux->attach_func_proto->type) { + /* Make sure programs that attach to void + * hooks don't try to modify return value. + */ + verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); + return -EINVAL; + } + } + break; + case BPF_FUNC_dynptr_data: + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + struct bpf_reg_state *reg = ®s[BPF_REG_1 + i]; + + if (meta.ref_obj_id) { + verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); + return -EFAULT; + } + + if (base_type(reg->type) != PTR_TO_DYNPTR) + /* Find the id of the dynptr we're + * tracking the reference of + */ + meta.ref_obj_id = stack_slot_get_id(env, reg); + break; + } + } + if (i == MAX_BPF_FUNC_REG_ARGS) { + verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n"); + return -EFAULT; + } + break; + case BPF_FUNC_user_ringbuf_drain: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_user_ringbuf_callback_state); + break; } + if (err) + return err; + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -6503,13 +7457,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; /* update return register (already marked as written above) */ - if (fn->ret_type == RET_INTEGER) { + ret_type = fn->ret_type; + ret_flag = type_flag(ret_type); + + switch (base_type(ret_type)) { + case RET_INTEGER: /* sets type to SCALAR_VALUE */ mark_reg_unknown(env, regs, BPF_REG_0); - } else if (fn->ret_type == RET_VOID) { + break; + case RET_VOID: regs[BPF_REG_0].type = NOT_INIT; - } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || - fn->ret_type == RET_PTR_TO_MAP_VALUE) { + break; + case RET_PTR_TO_MAP_VALUE: /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); /* remember map_ptr, so that check_map_access() @@ -6523,28 +7482,31 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; - if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; - if (map_value_has_spin_lock(meta.map_ptr)) - regs[BPF_REG_0].id = ++env->id_gen; - } else { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; + if (!type_may_be_null(ret_type) && + map_value_has_spin_lock(meta.map_ptr)) { + regs[BPF_REG_0].id = ++env->id_gen; } - } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { + break; + case RET_PTR_TO_SOCKET: mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; + break; + case RET_PTR_TO_SOCK_COMMON: mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; + break; + case RET_PTR_TO_TCP_SOCK: mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; + break; + case RET_PTR_TO_ALLOC_MEM: mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = meta.mem_size; - } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { + break; + case RET_PTR_TO_MEM_OR_BTF_ID: + { const struct btf_type *t; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -6562,46 +7524,68 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn tname, PTR_ERR(ret)); return -EINVAL; } - regs[BPF_REG_0].type = - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? - PTR_TO_MEM : PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = tsize; } else { - regs[BPF_REG_0].type = - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? - PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; + /* MEM_RDONLY may be carried from ret_flag, but it + * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise + * it will confuse the check of PTR_TO_BTF_ID in + * check_mem_access(). + */ + ret_flag &= ~MEM_RDONLY; + + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || - fn->ret_type == RET_PTR_TO_BTF_ID) { + break; + } + case RET_PTR_TO_BTF_ID: + { + struct btf *ret_btf; int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? - PTR_TO_BTF_ID : - PTR_TO_BTF_ID_OR_NULL; - ret_btf_id = *fn->ret_btf_id; + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + if (func_id == BPF_FUNC_kptr_xchg) { + ret_btf = meta.kptr_off_desc->kptr.btf; + ret_btf_id = meta.kptr_off_desc->kptr.btf_id; + } else { + if (fn->ret_btf_id == BPF_PTR_POISON) { + verbose(env, "verifier internal error:"); + verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n", + func_id_name(func_id)); + return -EINVAL; + } + ret_btf = btf_vmlinux; + ret_btf_id = *fn->ret_btf_id; + } if (ret_btf_id == 0) { - verbose(env, "invalid return type %d of func %s#%d\n", - fn->ret_type, func_id_name(func_id), func_id); + verbose(env, "invalid return type %u of func %s#%d\n", + base_type(ret_type), func_id_name(func_id), + func_id); return -EINVAL; } - /* current BPF helper definitions are only coming from - * built-in code with type IDs from vmlinux BTF - */ - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; - } else { - verbose(env, "unknown return type %d of func %s#%d\n", - fn->ret_type, func_id_name(func_id), func_id); + break; + } + default: + verbose(env, "unknown return type %u of func %s#%d\n", + base_type(ret_type), func_id_name(func_id), func_id); return -EINVAL; } - if (reg_type_may_be_null(regs[BPF_REG_0].type)) + if (type_may_be_null(regs[BPF_REG_0].type)) regs[BPF_REG_0].id = ++env->id_gen; - if (is_ptr_cast_function(func_id)) { + if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) { + verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n", + func_id_name(func_id), func_id); + return -EFAULT; + } + + if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; } else if (is_acquire_function(func_id, meta.map_ptr)) { @@ -6679,22 +7663,25 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); + struct bpf_kfunc_arg_meta meta = { 0 }; const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; - struct module *btf_mod = NULL; + int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; - int err; + u32 *kfunc_flags; + bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -6703,23 +7690,48 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id, btf_mod)) { + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + if (!kfunc_flags) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } + if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) { + verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n"); + return -EACCES; + } + + acq = *kfunc_flags & KF_ACQUIRE; + + meta.flags = *kfunc_flags; /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); - if (err) + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta); + if (err < 0) return err; + /* In case of release function, we get register number of refcounted + * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + */ + if (err) { + err = release_reference(env, regs[err].ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, func_id); + return err; + } + } for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + + if (acq && !btf_type_is_struct_ptr(desc_btf, t)) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } + if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); @@ -6727,18 +7739,47 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(desc_btf, - ptr_type->name_off); - verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", - func_name, btf_type_str(ptr_type), - ptr_type_name); - return -EINVAL; + if (!meta.r0_size) { + ptr_type_name = btf_name_by_offset(desc_btf, + ptr_type->name_off); + verbose(env, + "kernel function %s returns pointer type %s %s is not supported\n", + func_name, + btf_type_str(ptr_type), + ptr_type_name); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_MEM; + regs[BPF_REG_0].mem_size = meta.r0_size; + + if (meta.r0_rdonly) + regs[BPF_REG_0].type |= MEM_RDONLY; + + /* Ensures we don't access the memory after a release_reference() */ + if (meta.ref_obj_id) + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + } else { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].type = PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = ptr_type_id; + } + if (*kfunc_flags & KF_RET_NULL) { + regs[BPF_REG_0].type |= PTR_MAYBE_NULL; + /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ + regs[BPF_REG_0].id = ++env->id_gen; } - mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = desc_btf; - regs[BPF_REG_0].type = PTR_TO_BTF_ID; - regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + if (acq) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + regs[BPF_REG_0].id = id; + regs[BPF_REG_0].ref_obj_id = id; + } } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -6807,36 +7848,31 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { verbose(env, "math between %s pointer and %lld is not allowed\n", - reg_type_str[type], val); + reg_type_str(env, type), val); return false; } if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { verbose(env, "%s pointer offset %d is not allowed\n", - reg_type_str[type], reg->off); + reg_type_str(env, type), reg->off); return false; } if (smin == S64_MIN) { verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", - reg_type_str[type]); + reg_type_str(env, type)); return false; } if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { verbose(env, "value %lld makes %s pointer be out of bounds\n", - smin, reg_type_str[type]); + smin, reg_type_str(env, type)); return false; } return true; } -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) -{ - return &env->insn_aux_data[env->insn_idx]; -} - enum { REASON_BOUNDS = -1, REASON_TYPE = -2, @@ -7142,7 +8178,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env, return -EACCES; break; case PTR_TO_MAP_VALUE: - if (check_map_access(env, dst, dst_reg->off, 1, false)) { + if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) { verbose(env, "R%d pointer arithmetic of map value goes out of range, " "prohibited for !root\n", dst); return -EACCES; @@ -7202,11 +8238,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } - switch (ptr_reg->type) { - case PTR_TO_MAP_VALUE_OR_NULL: + if (ptr_reg->type & PTR_MAYBE_NULL) { verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", - dst, reg_type_str[ptr_reg->type]); + dst, reg_type_str(env, ptr_reg->type)); return -EACCES; + } + + switch (base_type(ptr_reg->type)) { case CONST_PTR_TO_MAP: /* smin_val represents the known value */ if (known && smin_val == 0 && opcode == BPF_ADD) @@ -7214,14 +8252,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, fallthrough; case PTR_TO_PACKET_END: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: verbose(env, "R%d pointer arithmetic on %s prohibited\n", - dst, reg_type_str[ptr_reg->type]); + dst, reg_type_str(env, ptr_reg->type)); return -EACCES; default: break; @@ -7374,11 +8409,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) return -EINVAL; - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); - + reg_bounds_sync(dst_reg); if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; if (sanitize_needed(opcode)) { @@ -8116,10 +9147,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* ALU32 ops are zero extended into 64bit register */ if (alu32) zext_32_to_64(dst_reg); - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); + reg_bounds_sync(dst_reg); return 0; } @@ -8194,12 +9222,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state); + print_verifier_state(env, state, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state); + print_verifier_state(env, state, true); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -8215,7 +9243,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (opcode == BPF_END || opcode == BPF_NEG) { if (opcode == BPF_NEG) { - if (BPF_SRC(insn->code) != 0 || + if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) { verbose(env, "BPF_NEG uses reserved fields\n"); @@ -8308,6 +9336,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + reg_bounds_sync(dst_reg); } } else { /* case: R = imm @@ -8379,34 +9408,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static void __find_good_pkt_pointers(struct bpf_func_state *state, - struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, int new_range) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) { - reg = &state->regs[i]; - if (reg->type == type && reg->id == dst_reg->id) - /* keep the maximum range already checked */ - reg->range = max(reg->range, new_range); - } - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } -} - static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - int new_range, i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int new_range; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -8471,9 +9480,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i <= vstate->curframe; i++) - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, - new_range); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + })); } static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) @@ -8745,26 +9756,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, return; switch (opcode) { + /* JEQ/JNE comparison doesn't change the register equivalence. + * + * r1 = r2; + * if (r1 == 42) goto label; + * ... + * label: // here both r1 and r2 are known to be 42. + * + * Hence when marking register as known preserve it's ID. + */ case BPF_JEQ: + if (is_jmp32) { + __mark_reg32_known(true_reg, val32); + true_32off = tnum_subreg(true_reg->var_off); + } else { + ___mark_reg_known(true_reg, val); + true_64off = true_reg->var_off; + } + break; case BPF_JNE: - { - struct bpf_reg_state *reg = - opcode == BPF_JEQ ? true_reg : false_reg; - - /* JEQ/JNE comparison doesn't change the register equivalence. - * r1 = r2; - * if (r1 == 42) goto label; - * ... - * label: // here both r1 and r2 are known to be 42. - * - * Hence when marking register as known preserve it's ID. - */ - if (is_jmp32) - __mark_reg32_known(reg, val32); - else - ___mark_reg_known(reg, val); + if (is_jmp32) { + __mark_reg32_known(false_reg, val32); + false_32off = tnum_subreg(false_reg->var_off); + } else { + ___mark_reg_known(false_reg, val); + false_64off = false_reg->var_off; + } break; - } case BPF_JSET: if (is_jmp32) { false_32off = tnum_and(false_32off, tnum_const(~val32)); @@ -8903,21 +9921,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, dst_reg->smax_value); src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off); - /* We might have learned new bounds from the var_off. */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); - /* We might have learned something about the sign bit. */ - __reg_deduce_bounds(src_reg); - __reg_deduce_bounds(dst_reg); - /* We might have learned some bits from the bounds. */ - __reg_bound_offset(src_reg); - __reg_bound_offset(dst_reg); - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); + reg_bounds_sync(src_reg); + reg_bounds_sync(dst_reg); } static void reg_combine_min_max(struct bpf_reg_state *true_src, @@ -8940,17 +9945,17 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null) { - if (reg_type_may_be_null(reg->type) && reg->id == id && + if (type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { - /* Old offset (both fixed and variable parts) should - * have been known-zero, because we don't allow pointer - * arithmetic on pointers that might be NULL. - */ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0) || reg->off)) { - __mark_reg_known_zero(reg); - reg->off = 0; + /* Old offset (both fixed and variable parts) should + * have been known-zero, because we don't allow pointer + * arithmetic on pointers that might be NULL. If we + * see this happening, don't convert the register. + */ + return; } if (is_null) { reg->type = SCALAR_VALUE; @@ -8968,7 +9973,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, if (!reg_may_point_to_spin_lock(reg)) { /* For not-NULL ptr, reg->ref_obj_id will be reset - * in release_reg_references(). + * in release_reference(). * * reg->id is still used by spin_lock ptr. Other * than spin_lock ptr type, reg->id can be reset. @@ -8978,22 +9983,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } -static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, - bool is_null) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } -} - /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -9001,10 +9990,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs, *reg; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -9013,8 +10001,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i <= vstate->curframe; i++) - __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + mark_ptr_or_null_reg(state, reg, id, is_null); + })); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@ -9127,23 +10116,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, { struct bpf_func_state *state; struct bpf_reg_state *reg; - int i, j; - for (i = 0; i <= vstate->curframe; i++) { - state = vstate->frame[i]; - for (j = 0; j < MAX_BPF_REG; j++) { - reg = &state->regs[j]; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - - bpf_for_each_spilled_reg(j, state, reg) { - if (!reg) - continue; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - } + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + })); } static int check_cond_jmp_op(struct bpf_verifier_env *env, @@ -9318,7 +10295,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && - reg_type_may_be_null(dst_reg->type)) { + type_may_be_null(dst_reg->type)) { /* Mark all identical registers in each branch as either * safe or unknown depending R == 0 or R != 0 conditional. */ @@ -9334,7 +10311,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_verifier_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -9369,16 +10346,19 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } - if (insn->src_reg == BPF_PSEUDO_BTF_ID) { - mark_reg_known_zero(env, regs, insn->dst_reg); + /* All special src_reg cases are listed below. From this point onwards + * we either succeed and assign a corresponding dst_reg->type after + * zeroing the offset, or fail and reject the program. + */ + mark_reg_known_zero(env, regs, insn->dst_reg); + if (insn->src_reg == BPF_PSEUDO_BTF_ID) { dst_reg->type = aux->btf_var.reg_type; - switch (dst_reg->type) { + switch (base_type(dst_reg->type)) { case PTR_TO_MEM: dst_reg->mem_size = aux->btf_var.mem_size; break; case PTR_TO_BTF_ID: - case PTR_TO_PERCPU_BTF_ID: dst_reg->btf = aux->btf_var.btf; dst_reg->btf_id = aux->btf_var.btf_id; break; @@ -9409,7 +10389,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) } map = env->used_maps[aux->map_index]; - mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || @@ -9513,7 +10492,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); if (err < 0) return err; @@ -9545,11 +10524,21 @@ static int check_return_code(struct bpf_verifier_env *env) const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if (!is_subprog && - (prog_type == BPF_PROG_TYPE_STRUCT_OPS || - prog_type == BPF_PROG_TYPE_LSM) && - !prog->aux->attach_func_proto->type) - return 0; + if (!is_subprog) { + switch (prog_type) { + case BPF_PROG_TYPE_LSM: + if (prog->expected_attach_type == BPF_LSM_CGROUP) + /* See below, can be 0 or 0-1 depending on hook. */ + break; + fallthrough; + case BPF_PROG_TYPE_STRUCT_OPS: + if (!prog->aux->attach_func_proto->type) + return 0; + break; + default: + break; + } + } /* eBPF calling convention is such that R0 is used * to return the value from eBPF program. @@ -9572,7 +10561,7 @@ static int check_return_code(struct bpf_verifier_env *env) /* enforce return zero from async callbacks like timer */ if (reg->type != SCALAR_VALUE) { verbose(env, "In async callback the register R0 is not a known value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } @@ -9586,7 +10575,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } return 0; @@ -9640,6 +10629,22 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_SK_LOOKUP: range = tnum_range(SK_DROP, SK_PASS); break; + + case BPF_PROG_TYPE_LSM: + if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { + /* Regular BPF_PROG_TYPE_LSM programs can return + * any value. + */ + return 0; + } + if (!env->prog->aux->attach_func_proto->type) { + /* Make sure programs that attach to void + * hooks don't try to modify return value. + */ + range = tnum_range(1, 1); + } + break; + case BPF_PROG_TYPE_EXT: /* freplace program can return anything as its return value * depends on the to-be-replaced kernel func or bpf program. @@ -9650,12 +10655,16 @@ static int check_return_code(struct bpf_verifier_env *env) if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } if (!tnum_in(range, reg->var_off)) { verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); + if (prog->expected_attach_type == BPF_LSM_CGROUP && + prog_type == BPF_PROG_TYPE_LSM && + !prog->aux->attach_func_proto->type) + verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); return -EINVAL; } @@ -10067,7 +11076,7 @@ static int check_btf_func(struct bpf_verifier_env *env, goto err_free; ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); scalar_return = - btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); + btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type); if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); goto err_free; @@ -10104,8 +11113,7 @@ static void adjust_btf_func(struct bpf_verifier_env *env) aux->func_info[i].insn_off = env->subprog_info[i].start; } -#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ - sizeof(((struct bpf_line_info *)(0))->line_col)) +#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col) #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE static int check_btf_line(struct bpf_verifier_env *env, @@ -10233,6 +11241,78 @@ err_free: return err; } +#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) +#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE + +static int check_core_relo(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +{ + u32 i, nr_core_relo, ncopy, expected_size, rec_size; + struct bpf_core_relo core_relo = {}; + struct bpf_prog *prog = env->prog; + const struct btf *btf = prog->aux->btf; + struct bpf_core_ctx ctx = { + .log = &env->log, + .btf = btf, + }; + bpfptr_t u_core_relo; + int err; + + nr_core_relo = attr->core_relo_cnt; + if (!nr_core_relo) + return 0; + if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) + return -EINVAL; + + rec_size = attr->core_relo_rec_size; + if (rec_size < MIN_CORE_RELO_SIZE || + rec_size > MAX_CORE_RELO_SIZE || + rec_size % sizeof(u32)) + return -EINVAL; + + u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); + expected_size = sizeof(struct bpf_core_relo); + ncopy = min_t(u32, expected_size, rec_size); + + /* Unlike func_info and line_info, copy and apply each CO-RE + * relocation record one at a time. + */ + for (i = 0; i < nr_core_relo; i++) { + /* future proofing when sizeof(bpf_core_relo) changes */ + err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); + if (err) { + if (err == -E2BIG) { + verbose(env, "nonzero tailing record in core_relo"); + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, core_relo_rec_size), + &expected_size, sizeof(expected_size))) + err = -EFAULT; + } + break; + } + + if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { + err = -EFAULT; + break; + } + + if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { + verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", + i, core_relo.insn_off, prog->len); + err = -EINVAL; + break; + } + + err = bpf_core_apply(&ctx, &core_relo, i, + &prog->insnsi[core_relo.insn_off / 8]); + if (err) + break; + bpfptr_add(&u_core_relo, rec_size); + } + return err; +} + static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, bpfptr_t uattr) @@ -10263,6 +11343,10 @@ static int check_btf_info(struct bpf_verifier_env *env, if (err) return err; + err = check_core_relo(env, attr, uattr); + if (err) + return err; + return 0; } @@ -10431,7 +11515,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return true; if (rcur->type == NOT_INIT) return false; - switch (rold->type) { + switch (base_type(rold->type)) { case SCALAR_VALUE: if (env->explore_alu_limits) return false; @@ -10453,6 +11537,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: + /* a PTR_TO_MAP_VALUE could be safe to use as a + * PTR_TO_MAP_VALUE_OR_NULL into the same map. + * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- + * checked, doing so could have affected others with the same + * id, and we can't check for that because we lost the id when + * we converted to a PTR_TO_MAP_VALUE. + */ + if (type_may_be_null(rold->type)) { + if (!type_may_be_null(rcur->type)) + return false; + if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) + return false; + /* Check our ids match any regs they're supposed to */ + return check_ids(rold->id, rcur->id, idmap); + } + /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. * 'id' is not compared, since it's only used for maps with @@ -10464,20 +11564,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); - case PTR_TO_MAP_VALUE_OR_NULL: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: if (rcur->type != rold->type) @@ -10506,11 +11592,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: /* Only valid matches are exact, which memcmp() above * would have accepted @@ -11036,17 +12119,13 @@ next: /* Return true if it's OK to have the same insn return a different type. */ static bool reg_type_mismatch_ok(enum bpf_reg_type type) { - switch (type) { + switch (base_type(type)) { case PTR_TO_CTX: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: - case PTR_TO_BTF_ID_OR_NULL: return false; default: return true; @@ -11126,16 +12205,12 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (env->log.level & BPF_LOG_LEVEL2 || - (env->log.level & BPF_LOG_LEVEL && do_print_state)) { - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "%d:", env->insn_idx); - else - verbose(env, "\nfrom %d to %d%s:", - env->prev_insn_idx, env->insn_idx, - env->cur_state->speculative ? - " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe]); + if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); + print_verifier_state(env, state->frame[state->curframe], true); do_print_state = false; } @@ -11146,9 +12221,15 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; + if (verifier_state_scratched(env)) + print_insn_state(env, state->frame[state->curframe]); + verbose_linfo(env, env->insn_idx, "; "); + env->prev_log_len = env->log.len_used; verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + env->prev_insn_print_len = env->log.len_used - env->prev_log_len; + env->prev_log_len = env->log.len_used; } if (bpf_prog_is_dev_bound(env->prog->aux)) { @@ -11270,7 +12351,7 @@ static int do_check(struct bpf_verifier_env *env) if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str[reg_state(env, insn->dst_reg)->type]); + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); return -EACCES; } @@ -11307,7 +12388,7 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) - err = check_kfunc_call(env, insn); + err = check_kfunc_call(env, insn, &env->insn_idx); else err = check_helper_call(env, insn, &env->insn_idx); if (err) @@ -11340,6 +12421,16 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + /* We must do check_reference_leak here before + * prepare_func_exit to handle the case when + * state->curframe > 0, it may be a callback + * function, for which reference_state must + * match caller reference state when it exits. + */ + err = check_reference_leak(env); + if (err) + return err; + if (state->curframe) { /* exit from nested function */ err = prepare_func_exit(env, &env->insn_idx); @@ -11349,14 +12440,11 @@ static int do_check(struct bpf_verifier_env *env) continue; } - err = check_reference_leak(env); - if (err) - return err; - err = check_return_code(env); if (err) return err; process_bpf_exit: + mark_verifier_state_scratched(env); update_branch_counts(env, env->cur_state); err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log); @@ -11505,7 +12593,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, type = t->type; t = btf_type_skip_modifiers(btf, type, NULL); if (percpu) { - aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID; + aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU; aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } else if (!btf_type_is_struct(t)) { @@ -11522,7 +12610,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, err = -EINVAL; goto err_put; } - aux->btf_var.reg_type = PTR_TO_MEM; + aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; aux->btf_var.mem_size = tsize; } else { aux->btf_var.reg_type = PTR_TO_BTF_ID; @@ -11564,14 +12652,6 @@ err_put: return err; } -static int check_map_prealloc(struct bpf_map *map) -{ - return (map->map_type != BPF_MAP_TYPE_HASH && - map->map_type != BPF_MAP_TYPE_PERCPU_HASH && - map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || - !(map->map_flags & BPF_F_NO_PREALLOC); -} - static bool is_tracing_prog_type(enum bpf_prog_type type) { switch (type) { @@ -11579,56 +12659,19 @@ static bool is_tracing_prog_type(enum bpf_prog_type type) case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: return true; default: return false; } } -static bool is_preallocated_map(struct bpf_map *map) -{ - if (!check_map_prealloc(map)) - return false; - if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) - return false; - return true; -} - static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) { enum bpf_prog_type prog_type = resolve_prog_type(prog); - /* - * Validate that trace type programs use preallocated hash maps. - * - * For programs attached to PERF events this is mandatory as the - * perf NMI can hit any arbitrary code sequence. - * - * All other trace types using preallocated hash maps are unsafe as - * well because tracepoint or kprobes can be inside locked regions - * of the memory allocator or at a place where a recursion into the - * memory allocator would see inconsistent state. - * - * On RT enabled kernels run-time allocation of all trace type - * programs is strictly prohibited due to lock type constraints. On - * !RT kernels it is allowed for backwards compatibility reasons for - * now, but warnings are emitted so developers are made aware of - * the unsafety and can fix their programs before this is enforced. - */ - if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) { - if (prog_type == BPF_PROG_TYPE_PERF_EVENT) { - verbose(env, "perf_event programs can only use preallocated hash map\n"); - return -EINVAL; - } - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - verbose(env, "trace type programs can only use preallocated hash map\n"); - return -EINVAL; - } - WARN_ONCE(1, "trace type BPF program uses run-time allocation\n"); - verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); - } if (map_value_has_spin_lock(map)) { if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { @@ -11675,13 +12718,11 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: - if (!is_preallocated_map(map)) { - verbose(env, - "Sleepable programs can only use preallocated maps\n"); - return -EINVAL; - } - break; case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: break; default: verbose(env, @@ -12319,7 +13360,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, aux[adj_idx].ptr_type == PTR_TO_CTX) continue; - imm_rnd = get_random_int(); + imm_rnd = get_random_u32(); rnd_hi32_patch[0] = insn; rnd_hi32_patch[1].imm = imm_rnd; rnd_hi32_patch[3].dst_reg = load_reg; @@ -12450,7 +13491,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (!ctx_access) continue; - switch (env->insn_aux_data[i + delta].ptr_type) { + switch ((int)env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) continue; @@ -12467,13 +13508,11 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) convert_ctx_access = bpf_xdp_sock_convert_ctx_access; break; case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID | PTR_UNTRUSTED: if (type == BPF_READ) { insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); env->prog->aux->num_exentries++; - } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { - verbose(env, "Writes through BTF pointers are not allowed\n"); - return -EINVAL; } continue; default: @@ -12633,6 +13672,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; @@ -12645,12 +13685,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) poke->aux = func[i]->aux; } - /* Use bpf_prog_F_tag to indicate functions in stack traces. - * Long term would need debug info to populate names - */ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + func[i]->blinding_requested = prog->blinding_requested; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; @@ -12746,6 +13784,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->jited = 1; prog->bpf_func = func[0]->bpf_func; + prog->jited_len = func[0]->jited_len; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; bpf_prog_jit_attempt_done(prog); @@ -12773,6 +13812,7 @@ out_free: out_undo_insn: /* cleanup main prog to be interpreted */ prog->jit_requested = 0; + prog->blinding_requested = 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (!bpf_pseudo_call(insn)) continue; @@ -12865,7 +13905,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; - bool expect_blinding = bpf_jit_blinding_enabled(prog); + enum bpf_attach_type eatype = prog->expected_attach_type; enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; @@ -13029,7 +14069,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->code = BPF_JMP | BPF_TAIL_CALL; aux = &env->insn_aux_data[i + delta]; - if (env->bpf_capable && !expect_blinding && + if (env->bpf_capable && !prog->blinding_requested && prog->jit_requested && !bpf_map_key_poisoned(aux) && !bpf_map_ptr_poisoned(aux) && @@ -13117,6 +14157,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_call_imm; } + if (insn->imm == BPF_FUNC_task_storage_get || + insn->imm == BPF_FUNC_sk_storage_get || + insn->imm == BPF_FUNC_inode_storage_get) { + if (env->prog->aux->sleepable) + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); + else + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); + insn_buf[1] = *insn; + cnt = 2; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto patch_call_imm; + } + /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * and other inlining handlers are currently limited to 64 bit * only. @@ -13129,7 +14189,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_for_each_map_elem || + insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13178,6 +14239,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, + (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13205,6 +14268,9 @@ patch_map_ops_generic: case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; + case BPF_FUNC_map_lookup_percpu_elem: + insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); + continue; } goto patch_call_imm; @@ -13235,11 +14301,79 @@ patch_map_ops_generic: continue; } + /* Implement bpf_get_func_arg inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); + insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); + insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); + insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); + insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); + insn_buf[7] = BPF_JMP_A(1); + insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); + cnt = 9; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement bpf_get_func_ret inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ret) { + if (eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); + insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); + cnt = 6; + } else { + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); + cnt = 1; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement get_func_arg_cnt inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg_cnt) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) + return -ENOMEM; + + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + /* Implement bpf_get_func_ip inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ip) { - /* Load IP address from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + /* Load IP address from ctx - 16 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); if (!new_prog) @@ -13286,6 +14420,142 @@ patch_call_imm: return 0; } +static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, + int position, + s32 stack_base, + u32 callback_subprogno, + u32 *cnt) +{ + s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; + s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; + s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; + int reg_loop_max = BPF_REG_6; + int reg_loop_cnt = BPF_REG_7; + int reg_loop_ctx = BPF_REG_8; + + struct bpf_prog *new_prog; + u32 callback_start; + u32 call_insn_offset; + s32 callback_offset; + + /* This represents an inlined version of bpf_iter.c:bpf_loop, + * be careful to modify this code in sync. + */ + struct bpf_insn insn_buf[] = { + /* Return error and jump to the end of the patch if + * expected number of iterations is too big. + */ + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2), + BPF_MOV32_IMM(BPF_REG_0, -E2BIG), + BPF_JMP_IMM(BPF_JA, 0, 0, 16), + /* spill R6, R7, R8 to use these as loop vars */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset), + /* initialize loop vars */ + BPF_MOV64_REG(reg_loop_max, BPF_REG_1), + BPF_MOV32_IMM(reg_loop_cnt, 0), + BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3), + /* loop header, + * if reg_loop_cnt >= reg_loop_max skip the loop body + */ + BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5), + /* callback call, + * correct callback offset would be set after patching + */ + BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt), + BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx), + BPF_CALL_REL(0), + /* increment loop counter */ + BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1), + /* jump to loop header if callback returned 0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6), + /* return value of bpf_loop, + * set R0 to the number of iterations + */ + BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt), + /* restore original values of R6, R7, R8 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset), + }; + + *cnt = ARRAY_SIZE(insn_buf); + new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt); + if (!new_prog) + return new_prog; + + /* callback start is known only after patching */ + callback_start = env->subprog_info[callback_subprogno].start; + /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ + call_insn_offset = position + 12; + callback_offset = callback_start - call_insn_offset - 1; + new_prog->insnsi[call_insn_offset].imm = callback_offset; + + return new_prog; +} + +static bool is_bpf_loop_call(struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0 && + insn->imm == BPF_FUNC_loop; +} + +/* For all sub-programs in the program (including main) check + * insn_aux_data to see if there are bpf_loop calls that require + * inlining. If such calls are found the calls are replaced with a + * sequence of instructions produced by `inline_bpf_loop` function and + * subprog stack_depth is increased by the size of 3 registers. + * This stack space is used to spill values of the R6, R7, R8. These + * registers are used to store the loop bound, counter and context + * variables. + */ +static int optimize_bpf_loop(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *subprogs = env->subprog_info; + int i, cur_subprog = 0, cnt, delta = 0; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + u16 stack_depth = subprogs[cur_subprog].stack_depth; + u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + u16 stack_depth_extra = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + struct bpf_loop_inline_state *inline_state = + &env->insn_aux_data[i + delta].loop_inline_state; + + if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { + struct bpf_prog *new_prog; + + stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; + new_prog = inline_bpf_loop(env, + i + delta, + -(stack_depth + stack_depth_extra), + inline_state->callback_subprogno, + &cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + } + + if (subprogs[cur_subprog + 1].start == i + delta + 1) { + subprogs[cur_subprog].stack_depth += stack_depth_extra; + cur_subprog++; + stack_depth = subprogs[cur_subprog].stack_depth; + stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + stack_depth_extra = 0; + } + } + + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -13353,7 +14623,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_known_zero(env, regs, i); else if (regs[i].type == SCALAR_VALUE) mark_reg_unknown(env, regs, i); - else if (regs[i].type == PTR_TO_MEM_OR_NULL) { + else if (base_type(regs[i].type) == PTR_TO_MEM) { const u32 mem_size = regs[i].mem_size; mark_reg_known_zero(env, regs, i); @@ -13705,6 +14975,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, fallthrough; case BPF_MODIFY_RETURN: case BPF_LSM_MAC: + case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) { @@ -13821,8 +15092,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && - prog->type != BPF_PROG_TYPE_LSM) { - verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); + prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) { + verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n"); return -EINVAL; } @@ -13941,13 +15212,15 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) log->ubuf = (char __user *) (unsigned long) attr->log_buf; log->len_total = attr->log_size; - ret = -EINVAL; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || - !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) + if (!bpf_verifier_log_attr_valid(log)) { + ret = -EINVAL; goto err_unlock; + } } + mark_verifier_state_clean(env); + if (IS_ERR(btf_vmlinux)) { /* Either gcc or pahole or kernel are broken. */ verbose(env, "in-kernel BTF is malformed\n"); @@ -14021,6 +15294,9 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ + if (ret == 0) + ret = optimize_bpf_loop(env); + if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); |