aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-10-03 02:53:49 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-10-03 02:53:50 +0200
commit33d9a7fd675678e7ea739ccb104abc1139caf19e (patch)
tree09c60a0021239cf3718da9d91bbe59b2e2e6361b /kernel
parentMerge branch 'bpf-big-map-entries' (diff)
parentDocumentation: Describe bpf reference tracking (diff)
downloadlinux-dev-33d9a7fd675678e7ea739ccb104abc1139caf19e.tar.xz
linux-dev-33d9a7fd675678e7ea739ccb104abc1139caf19e.zip
Merge branch 'bpf-sk-lookup'
Joe Stringer says: ==================== This series proposes a new helper for the BPF API which allows BPF programs to perform lookups for sockets in a network namespace. This would allow programs to determine early on in processing whether the stack is expecting to receive the packet, and perform some action (eg drop, forward somewhere) based on this information. The series is structured roughly into: * Misc refactor * Add the socket pointer type * Add reference tracking to ensure that socket references are freed * Extend the BPF API to add sk_lookup_xxx() / sk_release() functions * Add tests/documentation The helper proposed in this series includes a parameter for a tuple which must be filled in by the caller to determine the socket to look up. The simplest case would be filling with the contents of the packet, ie mapping the packet's 5-tuple into the parameter. In common cases, it may alternatively be useful to reverse the direction of the tuple and perform a lookup, to find the socket that initiates this connection; and if the BPF program ever performs a form of IP address translation, it may further be useful to be able to look up arbitrary tuples that are not based upon the packet, but instead based on state held in BPF maps or hardcoded in the BPF program. Currently, access into the socket's fields are limited to those which are otherwise already accessible, and are restricted to read-only access. Changes since v3: * New patch: "bpf: Reuse canonical string formatter for ctx errs" * Add PTR_TO_SOCKET to is_ctx_reg(). * Add a few new checks to prevent mixing of socket/non-socket pointers. * Swap order of checks in sock_filter_is_valid_access(). * Prefix register spill macros with "bpf_". * Add acks from previous round * Rebase Changes since v2: * New patch: "selftests/bpf: Generalize dummy program types". This enables adding verifier tests for socket lookup with tail calls. * Define the semantics of the new helpers more clearly in uAPI header. * Fix release of caller_net when netns is not specified. * Use skb->sk to find caller net when skb->dev is unavailable. * Fix build with !CONFIG_NET. * Replace ptr_id defensive coding when releasing reference state with an internal error (-EFAULT). * Remove flags argument to sk_release(). * Add several new assembly tests suggested by Daniel. * Add a few new C tests. * Fix typo in verifier error message. Changes since v1: * Limit netns_id field to 32 bits * Reuse reg_type_mismatch() in more places * Reduce the number of passes at convert_ctx_access() * Replace ptr_id defensive coding when releasing reference state with an internal error (-EFAULT) * Rework 'struct bpf_sock_tuple' to allow passing a packet pointer * Allow direct packet access from helper * Fix compile error with CONFIG_IPV6 enabled * Improve commit messages Changes since RFC: * Split up sk_lookup() into sk_lookup_tcp(), sk_lookup_udp(). * Only take references on the socket when necessary. * Make sk_release() only free the socket reference in this case. * Fix some runtime reference leaks: * Disallow BPF_LD_[ABS|IND] instructions while holding a reference. * Disallow bpf_tail_call() while holding a reference. * Prevent the same instruction being used for reference and other pointer type. * Simplify locating copies of a reference during helper calls by caching the pointer id from the caller. * Fix kbuild compilation warnings with particular configs. * Improve code comments describing the new verifier pieces. * Tested by Nitin ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c604
1 files changed, 493 insertions, 111 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a8cc83a970d1..73c81bef6ae8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1,5 +1,6 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -80,8 +81,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
* (like pointer plus pointer becomes SCALAR_VALUE type)
*
* When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
- * types recognized by check_mem_access() function.
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
+ * four pointer types recognized by check_mem_access() function.
*
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
* and the range of [ptr, ptr + map's value_size) is accessible.
@@ -140,6 +141,24 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
*
* After the call R0 is set to return type of the function and registers R1-R5
* are set to NOT_INIT to indicate that they are no longer readable.
+ *
+ * The following reference types represent a potential reference to a kernel
+ * resource which, after first being allocated, must be checked and freed by
+ * the BPF program:
+ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
+ *
+ * When the verifier sees a helper call return a reference type, it allocates a
+ * pointer id for the reference and stores it in the current function state.
+ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
+ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
+ * passes through a NULL-check conditional. For the branch wherein the state is
+ * changed to CONST_IMM, the verifier releases the reference.
+ *
+ * For each helper function that allocates a reference, such as
+ * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
+ * bpf_sk_release(). When a reference type passes into the release function,
+ * the verifier also releases the reference. If any unchecked or unreleased
+ * reference remains at the end of the program, the verifier rejects it.
*/
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
@@ -189,6 +208,7 @@ struct bpf_call_arg_meta {
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
+ int ptr_id;
};
static DEFINE_MUTEX(bpf_verifier_lock);
@@ -249,6 +269,46 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
type == PTR_TO_PACKET_META;
}
+static bool reg_type_may_be_null(enum bpf_reg_type type)
+{
+ return type == PTR_TO_MAP_VALUE_OR_NULL ||
+ type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool type_is_refcounted(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET;
+}
+
+static bool type_is_refcounted_or_null(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool reg_is_refcounted(const struct bpf_reg_state *reg)
+{
+ return type_is_refcounted(reg->type);
+}
+
+static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+{
+ return type_is_refcounted_or_null(reg->type);
+}
+
+static bool arg_type_is_refcounted(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_SOCKET;
+}
+
+/* Determine whether the function releases some resources allocated by another
+ * function call. The first reference type argument will be assumed to be
+ * released by release_reference().
+ */
+static bool is_release_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_sk_release;
+}
+
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@@ -262,6 +322,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
[PTR_TO_FLOW_KEYS] = "flow_keys",
+ [PTR_TO_SOCKET] = "sock",
+ [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
};
static char slot_type_char[] = {
@@ -378,62 +440,158 @@ static void print_verifier_state(struct bpf_verifier_env *env,
else
verbose(env, "=%s", types_buf);
}
+ if (state->acquired_refs && state->refs[0].id) {
+ verbose(env, " refs=%d", state->refs[0].id);
+ for (i = 1; i < state->acquired_refs; i++)
+ if (state->refs[i].id)
+ verbose(env, ",%d", state->refs[i].id);
+ }
verbose(env, "\n");
}
-static int copy_stack_state(struct bpf_func_state *dst,
- const struct bpf_func_state *src)
-{
- if (!src->stack)
- return 0;
- if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
- /* internal bug, make state invalid to reject the program */
- memset(dst, 0, sizeof(*dst));
- return -EFAULT;
- }
- memcpy(dst->stack, src->stack,
- sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
- return 0;
-}
+#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
+static int copy_##NAME##_state(struct bpf_func_state *dst, \
+ const struct bpf_func_state *src) \
+{ \
+ if (!src->FIELD) \
+ return 0; \
+ if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
+ /* internal bug, make state invalid to reject the program */ \
+ memset(dst, 0, sizeof(*dst)); \
+ return -EFAULT; \
+ } \
+ memcpy(dst->FIELD, src->FIELD, \
+ sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
+ return 0; \
+}
+/* copy_reference_state() */
+COPY_STATE_FN(reference, acquired_refs, refs, 1)
+/* copy_stack_state() */
+COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+#undef COPY_STATE_FN
+
+#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
+static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
+ bool copy_old) \
+{ \
+ u32 old_size = state->COUNT; \
+ struct bpf_##NAME##_state *new_##FIELD; \
+ int slot = size / SIZE; \
+ \
+ if (size <= old_size || !size) { \
+ if (copy_old) \
+ return 0; \
+ state->COUNT = slot * SIZE; \
+ if (!size && old_size) { \
+ kfree(state->FIELD); \
+ state->FIELD = NULL; \
+ } \
+ return 0; \
+ } \
+ new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
+ GFP_KERNEL); \
+ if (!new_##FIELD) \
+ return -ENOMEM; \
+ if (copy_old) { \
+ if (state->FIELD) \
+ memcpy(new_##FIELD, state->FIELD, \
+ sizeof(*new_##FIELD) * (old_size / SIZE)); \
+ memset(new_##FIELD + old_size / SIZE, 0, \
+ sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
+ } \
+ state->COUNT = slot * SIZE; \
+ kfree(state->FIELD); \
+ state->FIELD = new_##FIELD; \
+ return 0; \
+}
+/* realloc_reference_state() */
+REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
+/* realloc_stack_state() */
+REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+#undef REALLOC_STATE_FN
/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
* make it consume minimal amount of memory. check_stack_write() access from
* the program calls into realloc_func_state() to grow the stack size.
- * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state
- * which this function copies over. It points to corresponding reg in previous
- * bpf_verifier_state which is never reallocated
+ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
+ * which realloc_stack_state() copies over. It points to previous
+ * bpf_verifier_state which is never reallocated.
*/
-static int realloc_func_state(struct bpf_func_state *state, int size,
- bool copy_old)
+static int realloc_func_state(struct bpf_func_state *state, int stack_size,
+ int refs_size, bool copy_old)
{
- u32 old_size = state->allocated_stack;
- struct bpf_stack_state *new_stack;
- int slot = size / BPF_REG_SIZE;
+ int err = realloc_reference_state(state, refs_size, copy_old);
+ if (err)
+ return err;
+ return realloc_stack_state(state, stack_size, copy_old);
+}
+
+/* Acquire a pointer id from the env and update the state->refs to include
+ * this new pointer reference.
+ * On success, returns a valid pointer id to associate with the register
+ * On failure, returns a negative errno.
+ */
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int new_ofs = state->acquired_refs;
+ int id, err;
- if (size <= old_size || !size) {
- if (copy_old)
+ err = realloc_reference_state(state, state->acquired_refs + 1, true);
+ if (err)
+ return err;
+ id = ++env->id_gen;
+ state->refs[new_ofs].id = id;
+ state->refs[new_ofs].insn_idx = insn_idx;
+
+ return id;
+}
+
+/* release function corresponding to acquire_reference_state(). Idempotent. */
+static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+{
+ int i, last_idx;
+
+ if (!ptr_id)
+ return -EFAULT;
+
+ last_idx = state->acquired_refs - 1;
+ for (i = 0; i < state->acquired_refs; i++) {
+ if (state->refs[i].id == ptr_id) {
+ if (last_idx && i != last_idx)
+ memcpy(&state->refs[i], &state->refs[last_idx],
+ sizeof(*state->refs));
+ memset(&state->refs[last_idx], 0, sizeof(*state->refs));
+ state->acquired_refs--;
return 0;
- state->allocated_stack = slot * BPF_REG_SIZE;
- if (!size && old_size) {
- kfree(state->stack);
- state->stack = NULL;
}
- return 0;
}
- new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
- GFP_KERNEL);
- if (!new_stack)
- return -ENOMEM;
- if (copy_old) {
- if (state->stack)
- memcpy(new_stack, state->stack,
- sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
- memset(new_stack + old_size / BPF_REG_SIZE, 0,
- sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
- }
- state->allocated_stack = slot * BPF_REG_SIZE;
- kfree(state->stack);
- state->stack = new_stack;
+ return -EFAULT;
+}
+
+/* variation on the above for cases where we expect that there must be an
+ * outstanding reference for the specified ptr_id.
+ */
+static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int err;
+
+ err = __release_reference_state(state, ptr_id);
+ if (WARN_ON_ONCE(err != 0))
+ verbose(env, "verifier internal error: can't release reference\n");
+ return err;
+}
+
+static int transfer_reference_state(struct bpf_func_state *dst,
+ struct bpf_func_state *src)
+{
+ int err = realloc_reference_state(dst, src->acquired_refs, false);
+ if (err)
+ return err;
+ err = copy_reference_state(dst, src);
+ if (err)
+ return err;
return 0;
}
@@ -441,6 +599,7 @@ static void free_func_state(struct bpf_func_state *state)
{
if (!state)
return;
+ kfree(state->refs);
kfree(state->stack);
kfree(state);
}
@@ -466,10 +625,14 @@ static int copy_func_state(struct bpf_func_state *dst,
{
int err;
- err = realloc_func_state(dst, src->allocated_stack, false);
+ err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
+ false);
+ if (err)
+ return err;
+ memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
+ err = copy_reference_state(dst, src);
if (err)
return err;
- memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
return copy_stack_state(dst, src);
}
@@ -968,6 +1131,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
case CONST_PTR_TO_MAP:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
return true;
default:
return false;
@@ -992,7 +1157,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
enum bpf_reg_type type;
err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
- true);
+ state->acquired_refs, true);
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -1336,6 +1501,28 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
return 0;
}
+static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
+ int size, enum bpf_access_type t)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = &regs[regno];
+ struct bpf_insn_access_aux info;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (!bpf_sock_is_valid_access(off, size, t, &info)) {
+ verbose(env, "invalid bpf_sock access off=%d size=%d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
@@ -1354,7 +1541,8 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
{
const struct bpf_reg_state *reg = cur_regs(env) + regno;
- return reg->type == PTR_TO_CTX;
+ return reg->type == PTR_TO_CTX ||
+ reg->type == PTR_TO_SOCKET;
}
static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1454,6 +1642,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
*/
strict = true;
break;
+ case PTR_TO_SOCKET:
+ pointer_desc = "sock ";
+ break;
default:
break;
}
@@ -1721,6 +1912,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_flow_keys_access(env, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_SOCKET) {
+ if (t == BPF_WRITE) {
+ verbose(env, "cannot write into socket\n");
+ return -EACCES;
+ }
+ err = check_sock_access(env, regno, off, size, t);
+ if (!err && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -1763,8 +1962,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg)) {
verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
- insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
- "context" : "packet");
+ insn->dst_reg, reg_type_str[insn->dst_reg]);
return -EACCES;
}
@@ -1944,6 +2142,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
+ } else if (arg_type == ARG_PTR_TO_SOCKET) {
+ expected_type = PTR_TO_SOCKET;
+ if (type != expected_type)
+ goto err_type;
+ if (meta->ptr_id || !reg->id) {
+ verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
+ meta->ptr_id, reg->id);
+ return -EFAULT;
+ }
+ meta->ptr_id = reg->id;
} else if (arg_type_is_mem_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
/* One exception here. In case function allows for NULL to be
@@ -2233,10 +2441,32 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
+static bool check_refcount_ok(const struct bpf_func_proto *fn)
+{
+ int count = 0;
+
+ if (arg_type_is_refcounted(fn->arg1_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg2_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg3_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg4_type))
+ count++;
+ if (arg_type_is_refcounted(fn->arg5_type))
+ count++;
+
+ /* We only support one arg being unreferenced at the moment,
+ * which is sufficient for the helper functions we have right now.
+ */
+ return count <= 1;
+}
+
static int check_func_proto(const struct bpf_func_proto *fn)
{
return check_raw_mode_ok(fn) &&
- check_arg_pair_ok(fn) ? 0 : -EINVAL;
+ check_arg_pair_ok(fn) &&
+ check_refcount_ok(fn) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2252,10 +2482,9 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
if (reg_is_pkt_pointer_any(&regs[i]))
mark_reg_unknown(env, regs, i);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- reg = &state->stack[i].spilled_ptr;
if (reg_is_pkt_pointer_any(reg))
__mark_reg_unknown(reg);
}
@@ -2270,12 +2499,45 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
__clear_all_pkt_pointers(env, vstate->frame[i]);
}
+static void release_reg_references(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int id)
+{
+ struct bpf_reg_state *regs = state->regs, *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (regs[i].id == id)
+ mark_reg_unknown(env, regs, i);
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ if (reg_is_refcounted(reg) && reg->id == id)
+ __mark_reg_unknown(reg);
+ }
+}
+
+/* The pointer with the specified id has released its reference to kernel
+ * resources. Identify all copies of the same pointer and clear the reference.
+ */
+static int release_reference(struct bpf_verifier_env *env,
+ struct bpf_call_arg_meta *meta)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ int i;
+
+ for (i = 0; i <= vstate->curframe; i++)
+ release_reg_references(env, vstate->frame[i], meta->ptr_id);
+
+ return release_reference_state(env, meta->ptr_id);
+}
+
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller, *callee;
- int i, subprog, target_insn;
+ int i, err, subprog, target_insn;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep\n",
@@ -2313,6 +2575,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
state->curframe + 1 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
+ /* Transfer references to the callee */
+ err = transfer_reference_state(callee, caller);
+ if (err)
+ return err;
+
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
@@ -2345,6 +2612,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller, *callee;
struct bpf_reg_state *r0;
+ int err;
callee = state->frame[state->curframe];
r0 = &callee->regs[BPF_REG_0];
@@ -2364,6 +2632,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
+ /* Transfer references to the caller */
+ err = transfer_reference_state(caller, callee);
+ if (err)
+ return err;
+
*insn_idx = callee->callsite + 1;
if (env->log.level) {
verbose(env, "returning from callee:\n");
@@ -2420,6 +2693,18 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
}
+static int check_reference_leak(struct bpf_verifier_env *env)
+{
+ struct bpf_func_state *state = cur_func(env);
+ int i;
+
+ for (i = 0; i < state->acquired_refs; i++) {
+ verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
+ state->refs[i].id, state->refs[i].insn_idx);
+ }
+ return state->acquired_refs ? -EINVAL : 0;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -2498,6 +2783,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err;
}
+ if (func_id == BPF_FUNC_tail_call) {
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "tail_call would lead to reference leak\n");
+ return err;
+ }
+ } else if (is_release_function(func_id)) {
+ err = release_reference(env, &meta);
+ if (err)
+ return err;
+ }
+
regs = cur_regs(env);
/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -2540,6 +2837,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].id = ++env->id_gen;
+ } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+ int id = acquire_reference_state(env, insn_idx);
+ if (id < 0)
+ return id;
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+ regs[BPF_REG_0].id = id;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
@@ -2670,20 +2974,20 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
- if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
- verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
- dst);
- return -EACCES;
- }
- if (ptr_reg->type == CONST_PTR_TO_MAP) {
- verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
- dst);
+ switch (ptr_reg->type) {
+ case PTR_TO_MAP_VALUE_OR_NULL:
+ verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
+ dst, reg_type_str[ptr_reg->type]);
return -EACCES;
- }
- if (ptr_reg->type == PTR_TO_PACKET_END) {
- verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
- dst);
+ case CONST_PTR_TO_MAP:
+ case PTR_TO_PACKET_END:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
+ verbose(env, "R%d pointer arithmetic on %s prohibited\n",
+ dst, reg_type_str[ptr_reg->type]);
return -EACCES;
+ default:
+ break;
}
/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
@@ -3395,10 +3699,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
for (j = 0; j <= vstate->curframe; j++) {
state = vstate->frame[j];
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- reg = &state->stack[i].spilled_ptr;
if (reg->type == type && reg->id == dst_reg->id)
reg->range = max(reg->range, new_range);
}
@@ -3604,12 +3907,11 @@ static void reg_combine_min_max(struct bpf_reg_state *true_src,
}
}
-static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
- bool is_null)
+static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+ struct bpf_reg_state *reg, u32 id,
+ bool is_null)
{
- struct bpf_reg_state *reg = &regs[regno];
-
- if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
+ if (reg_type_may_be_null(reg->type) && reg->id == id) {
/* Old offset (both fixed and variable parts) should
* have been known-zero, because we don't allow pointer
* arithmetic on pointers that might be NULL.
@@ -3622,40 +3924,49 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
}
if (is_null) {
reg->type = SCALAR_VALUE;
- } else if (reg->map_ptr->inner_map_meta) {
- reg->type = CONST_PTR_TO_MAP;
- reg->map_ptr = reg->map_ptr->inner_map_meta;
- } else {
- reg->type = PTR_TO_MAP_VALUE;
+ } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+ if (reg->map_ptr->inner_map_meta) {
+ reg->type = CONST_PTR_TO_MAP;
+ reg->map_ptr = reg->map_ptr->inner_map_meta;
+ } else {
+ reg->type = PTR_TO_MAP_VALUE;
+ }
+ } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
+ reg->type = PTR_TO_SOCKET;
+ }
+ if (is_null || !reg_is_refcounted(reg)) {
+ /* We don't need id from this point onwards anymore,
+ * thus we should better reset it, so that state
+ * pruning has chances to take effect.
+ */
+ reg->id = 0;
}
- /* We don't need id from this point onwards anymore, thus we
- * should better reset it, so that state pruning has chances
- * to take effect.
- */
- reg->id = 0;
}
}
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
-static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
- bool is_null)
+static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
+ bool is_null)
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg, *regs = state->regs;
u32 id = regs[regno].id;
int i, j;
+ if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
+ __release_reference_state(state, id);
+
for (i = 0; i < MAX_BPF_REG; i++)
- mark_map_reg(regs, i, id, is_null);
+ mark_ptr_or_null_reg(state, &regs[i], id, is_null);
for (j = 0; j <= vstate->curframe; j++) {
state = vstate->frame[j];
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
continue;
- mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ mark_ptr_or_null_reg(state, reg, id, is_null);
}
}
}
@@ -3857,12 +4168,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
- dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
- /* Mark all identical map registers in each branch as either
+ reg_type_may_be_null(dst_reg->type)) {
+ /* Mark all identical registers in each branch as either
* safe or unknown depending R == 0 or R != 0 conditional.
*/
- mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
- mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
+ mark_ptr_or_null_regs(this_branch, insn->dst_reg,
+ opcode == BPF_JNE);
+ mark_ptr_or_null_regs(other_branch, insn->dst_reg,
+ opcode == BPF_JEQ);
} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
this_branch, other_branch) &&
is_pointer_value(env, insn->dst_reg)) {
@@ -3985,6 +4298,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
+ * gen_ld_abs() may terminate the program at runtime, leading to
+ * reference leak.
+ */
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
+ return err;
+ }
+
if (regs[BPF_REG_6].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -4400,6 +4723,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case CONST_PTR_TO_MAP:
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
@@ -4475,6 +4800,14 @@ static bool stacksafe(struct bpf_func_state *old,
return true;
}
+static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
+{
+ if (old->acquired_refs != cur->acquired_refs)
+ return false;
+ return !memcmp(old->refs, cur->refs,
+ sizeof(*old->refs) * old->acquired_refs);
+}
+
/* compare two verifier states
*
* all states stored in state_list are known to be valid, since
@@ -4520,6 +4853,9 @@ static bool func_states_equal(struct bpf_func_state *old,
if (!stacksafe(old, cur, idmap))
goto out_free;
+
+ if (!refsafe(old, cur))
+ goto out_free;
ret = true;
out_free:
kfree(idmap);
@@ -4677,6 +5013,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return 0;
}
+/* Return true if it's OK to have the same insn return a different type. */
+static bool reg_type_mismatch_ok(enum bpf_reg_type type)
+{
+ switch (type) {
+ case PTR_TO_CTX:
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCKET_OR_NULL:
+ return false;
+ default:
+ return true;
+ }
+}
+
+/* If an instruction was previously used with particular pointer types, then we
+ * need to be careful to avoid cases such as the below, where it may be ok
+ * for one branch accessing the pointer, but not ok for the other branch:
+ *
+ * R1 = sock_ptr
+ * goto X;
+ * ...
+ * R1 = some_other_valid_ptr;
+ * goto X;
+ * ...
+ * R2 = *(u32 *)(R1 + 0);
+ */
+static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
+{
+ return src != prev && (!reg_type_mismatch_ok(src) ||
+ !reg_type_mismatch_ok(prev));
+}
+
static int do_check(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *state;
@@ -4770,6 +5137,7 @@ static int do_check(struct bpf_verifier_env *env)
regs = cur_regs(env);
env->insn_aux_data[insn_idx].seen = true;
+
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
@@ -4809,9 +5177,7 @@ static int do_check(struct bpf_verifier_env *env)
*/
*prev_src_type = src_reg_type;
- } else if (src_reg_type != *prev_src_type &&
- (src_reg_type == PTR_TO_CTX ||
- *prev_src_type == PTR_TO_CTX)) {
+ } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -4856,9 +5222,7 @@ static int do_check(struct bpf_verifier_env *env)
if (*prev_dst_type == NOT_INIT) {
*prev_dst_type = dst_reg_type;
- } else if (dst_reg_type != *prev_dst_type &&
- (dst_reg_type == PTR_TO_CTX ||
- *prev_dst_type == PTR_TO_CTX)) {
+ } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -4875,8 +5239,8 @@ static int do_check(struct bpf_verifier_env *env)
return err;
if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_ST stores into R%d context is not allowed\n",
- insn->dst_reg);
+ verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
+ insn->dst_reg, reg_type_str[insn->dst_reg]);
return -EACCES;
}
@@ -4938,6 +5302,10 @@ static int do_check(struct bpf_verifier_env *env)
continue;
}
+ err = check_reference_leak(env);
+ if (err)
+ return err;
+
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
@@ -5284,8 +5652,10 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
}
}
-/* convert load instructions that access fields of 'struct __sk_buff'
- * into sequence of instructions that access fields of 'struct sk_buff'
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ * struct __sk_buff -> struct sk_buff
+ * struct bpf_sock_ops -> struct sock
*/
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
@@ -5314,12 +5684,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
}
- if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
+ if (bpf_prog_is_dev_bound(env->prog->aux))
return 0;
insn = env->prog->insnsi + delta;
for (i = 0; i < insn_cnt; i++, insn++) {
+ bpf_convert_ctx_access_t convert_ctx_access;
+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -5361,8 +5733,18 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
}
- if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+ switch (env->insn_aux_data[i + delta].ptr_type) {
+ case PTR_TO_CTX:
+ if (!ops->convert_ctx_access)
+ continue;
+ convert_ctx_access = ops->convert_ctx_access;
+ break;
+ case PTR_TO_SOCKET:
+ convert_ctx_access = bpf_sock_convert_ctx_access;
+ break;
+ default:
continue;
+ }
ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
size = BPF_LDST_BYTES(insn);
@@ -5394,8 +5776,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
target_size = 0;
- cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
- &target_size);
+ cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+ &target_size);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
(ctx_field_size && !target_size)) {
verbose(env, "bpf verifier is misconfigured\n");