From d893dc26e3f42e12ae75703c52cc6de5578ff1f5 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 23 Aug 2017 15:09:46 +0100
Subject: selftests/bpf: add a test for a bug in liveness-based pruning

Writes in straight-line code should not prevent reads from propagating
 along jumps.  With current verifier code, the jump from 3 to 5 does not
 add a read mark on 3:R0 (because 5:R0 has a write mark), meaning that
 the jump from 1 to 3 gets pruned as safe even though R0 is NOT_INIT.

Verifier output:
0: (61) r2 = *(u32 *)(r1 +0)
1: (35) if r2 >= 0x0 goto pc+1
 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
2: (b7) r0 = 0
3: (35) if r2 >= 0x0 goto pc+1
 R0=inv0 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
4: (b7) r0 = 0
5: (95) exit

from 3 to 5: safe

from 1 to 3: safe
processed 8 insns, stack depth 0

Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c03542c417db..c912734d2364 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -6487,6 +6487,22 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
 	},
+	{
+		"liveness pruning and write screening",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* branch conditions teach us nothing about R2 */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3-59-g8ed1b


From 63f45f840634ab5fd71bbc07acff915277764068 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 23 Aug 2017 15:10:03 +0100
Subject: bpf/verifier: when pruning a branch, ignore its write marks

The fact that writes occurred in reaching the continuation state does
 not screen off its reads from us, because we're not really its parent.
So detect 'not really the parent' in do_propagate_liveness, and ignore
 write marks in that case.

Fixes: dc503a8ad984 ("bpf/verifier: track liveness for pruning")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e42c096ba20d..fdbaa6086559 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3436,6 +3436,7 @@ out_free:
 static bool do_propagate_liveness(const struct bpf_verifier_state *state,
 				  struct bpf_verifier_state *parent)
 {
+	bool writes = parent == state->parent; /* Observe write marks */
 	bool touched = false; /* any changes made? */
 	int i;
 
@@ -3447,7 +3448,9 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state,
 	for (i = 0; i < BPF_REG_FP; i++) {
 		if (parent->regs[i].live & REG_LIVE_READ)
 			continue;
-		if (state->regs[i].live == REG_LIVE_READ) {
+		if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
+			continue;
+		if (state->regs[i].live & REG_LIVE_READ) {
 			parent->regs[i].live |= REG_LIVE_READ;
 			touched = true;
 		}
@@ -3460,7 +3463,9 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state,
 			continue;
 		if (parent->spilled_regs[i].live & REG_LIVE_READ)
 			continue;
-		if (state->spilled_regs[i].live == REG_LIVE_READ) {
+		if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN))
+			continue;
+		if (state->spilled_regs[i].live & REG_LIVE_READ) {
 			parent->spilled_regs[i].live |= REG_LIVE_READ;
 			touched = true;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From df20cb7ec17577c94ef93fa86c7c80958046a01e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 23 Aug 2017 15:10:26 +0100
Subject: selftests/bpf: add a test for a pruning bug in the verifier

The test makes a read through a map value pointer, then considers pruning
 a branch where the register holds an adjusted map value pointer.  It
 should not prune, but currently it does.

Signed-off-by: Alexei Starovoitov <ast@fb.com>
[ecree@solarflare.com: added test-name and patch description]
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c912734d2364..353d17015641 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -6503,6 +6503,34 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
 	},
+	{
+		"varlen_map_value_access pruning",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3-59-g8ed1b


From 1b688a19a92223cf2d1892c9d05d64dc397b33e3 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 23 Aug 2017 15:10:50 +0100
Subject: bpf/verifier: remove varlen_map_value_access flag

The optimisation it does is broken when the 'new' register value has a
 variable offset and the 'old' was constant.  I broke it with my pointer
 types unification (see Fixes tag below), before which the 'new' value
 would have type PTR_TO_MAP_VALUE_ADJ and would thus not compare equal;
 other changes in that patch mean that its original behaviour (ignore
 min/max values) cannot be restored.
Tests on a sample set of cilium programs show no change in count of
 processed instructions.

Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h |  1 -
 kernel/bpf/verifier.c        | 41 ++++++++++++-----------------------------
 2 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 91d07efed2ba..d8f131a36fd0 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -125,7 +125,6 @@ struct bpf_verifier_env {
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
-	bool varlen_map_value_access;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fdbaa6086559..711bdbd22cea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -832,11 +832,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 	 */
 	if (log_level)
 		print_verifier_state(state);
-	/* If the offset is variable, we will need to be stricter in state
-	 * pruning from now on.
-	 */
-	if (!tnum_is_const(reg->var_off))
-		env->varlen_map_value_access = true;
 	/* The minimum value is only important with signed
 	 * comparisons where we can't assume the floor of a
 	 * value is 0.  If we are using signed variables for our
@@ -3247,9 +3242,8 @@ static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
 }
 
 /* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold,
-		    struct bpf_reg_state *rcur,
-		    bool varlen_map_access, struct idpair *idmap)
+static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
+		    struct idpair *idmap)
 {
 	if (!(rold->live & REG_LIVE_READ))
 		/* explored state didn't use this */
@@ -3281,22 +3275,14 @@ static bool regsafe(struct bpf_reg_state *rold,
 			       tnum_is_unknown(rold->var_off);
 		}
 	case PTR_TO_MAP_VALUE:
-		if (varlen_map_access) {
-			/* If the new min/max/var_off satisfy the old ones and
-			 * everything else matches, we are OK.
-			 * We don't care about the 'id' value, because nothing
-			 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
-			 */
-			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
-			       range_within(rold, rcur) &&
-			       tnum_in(rold->var_off, rcur->var_off);
-		} else {
-			/* If the ranges/var_off were not the same, but
-			 * everything else was and we didn't do a variable
-			 * access into a map then we are a-ok.
-			 */
-			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0;
-		}
+		/* If the new min/max/var_off satisfy the old ones and
+		 * everything else matches, we are OK.
+		 * We don't care about the 'id' value, because nothing
+		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+		 */
+		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+		       range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
 	case PTR_TO_MAP_VALUE_OR_NULL:
 		/* a PTR_TO_MAP_VALUE could be safe to use as a
 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
@@ -3380,7 +3366,6 @@ static bool states_equal(struct bpf_verifier_env *env,
 			 struct bpf_verifier_state *old,
 			 struct bpf_verifier_state *cur)
 {
-	bool varlen_map_access = env->varlen_map_value_access;
 	struct idpair *idmap;
 	bool ret = false;
 	int i;
@@ -3391,8 +3376,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 		return false;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
-		if (!regsafe(&old->regs[i], &cur->regs[i], varlen_map_access,
-			     idmap))
+		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
 			goto out_free;
 	}
 
@@ -3412,7 +3396,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 		if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
 			     &cur->spilled_regs[i / BPF_REG_SIZE],
-			     varlen_map_access, idmap))
+			     idmap))
 			/* when explored and current stack slot are both storing
 			 * spilled registers, check that stored pointers types
 			 * are the same as well.
@@ -3555,7 +3539,6 @@ static int do_check(struct bpf_verifier_env *env)
 	init_reg_state(regs);
 	state->parent = NULL;
 	insn_idx = 0;
-	env->varlen_map_value_access = false;
 	for (;;) {
 		struct bpf_insn *insn;
 		u8 class;
-- 
cgit v1.2.3-59-g8ed1b


From 8e9cd9ce90d48369b2c5ddd79fe3d4a4cb1ccb56 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 23 Aug 2017 15:11:21 +0100
Subject: bpf/verifier: document liveness analysis

The liveness tracking algorithm is quite subtle; add comments to explain it.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h | 13 +++++++++++++
 kernel/bpf/verifier.c        | 28 +++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d8f131a36fd0..b8d200f60a40 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -21,6 +21,19 @@
  */
 #define BPF_MAX_VAR_SIZ	INT_MAX
 
+/* Liveness marks, used for registers and spilled-regs (in stack slots).
+ * Read marks propagate upwards until they find a write mark; they record that
+ * "one of this state's descendants read this reg" (and therefore the reg is
+ * relevant for states_equal() checks).
+ * Write marks collect downwards and do not propagate; they record that "the
+ * straight-line code that reached this state (from its parent) wrote this reg"
+ * (and therefore that reads propagated from this state or its descendants
+ * should not propagate to its parent).
+ * A state with a write mark can receive read marks; it just won't propagate
+ * them to its parent, since the write mark is a property, not of the state,
+ * but of the link between it and its parent.  See mark_reg_read() and
+ * mark_stack_slot_read() in kernel/bpf/verifier.c.
+ */
 enum bpf_reg_liveness {
 	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
 	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 711bdbd22cea..d690c7dd1f1a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3417,6 +3417,12 @@ out_free:
 	return ret;
 }
 
+/* A write screens off any subsequent reads; but write marks come from the
+ * straight-line code between a state and its parent.  When we arrive at a
+ * jump target (in the first iteration of the propagate_liveness() loop),
+ * we didn't arrive by the straight-line code, so read marks in state must
+ * propagate to parent regardless of state's write marks.
+ */
 static bool do_propagate_liveness(const struct bpf_verifier_state *state,
 				  struct bpf_verifier_state *parent)
 {
@@ -3457,6 +3463,15 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state,
 	return touched;
 }
 
+/* "parent" is "a state from which we reach the current state", but initially
+ * it is not the state->parent (i.e. "the state whose straight-line code leads
+ * to the current state"), instead it is the state that happened to arrive at
+ * a (prunable) equivalent of the current state.  See comment above
+ * do_propagate_liveness() for consequences of this.
+ * This function is just a more efficient way of calling mark_reg_read() or
+ * mark_stack_slot_read() on each reg in "parent" that is read in "state",
+ * though it requires that parent != state->parent in the call arguments.
+ */
 static void propagate_liveness(const struct bpf_verifier_state *state,
 			       struct bpf_verifier_state *parent)
 {
@@ -3485,6 +3500,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 			/* reached equivalent register/stack state,
 			 * prune the search.
 			 * Registers read by the continuation are read by us.
+			 * If we have any write marks in env->cur_state, they
+			 * will prevent corresponding reads in the continuation
+			 * from reaching our parent (an explored_state).  Our
+			 * own state will get the read marks recorded, but
+			 * they'll be immediately forgotten as we're pruning
+			 * this state and will pop a new one.
 			 */
 			propagate_liveness(&sl->state, &env->cur_state);
 			return 1;
@@ -3508,7 +3529,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	env->explored_states[insn_idx] = new_sl;
 	/* connect new state to parentage chain */
 	env->cur_state.parent = &new_sl->state;
-	/* clear liveness marks in current state */
+	/* clear write marks in current state: the writes we did are not writes
+	 * our child did, so they don't screen off its reads from us.
+	 * (There are no read marks in current state, because reads always mark
+	 * their parent and current state never has children yet.  Only
+	 * explored_states can get read marks.)
+	 */
 	for (i = 0; i < BPF_REG_FP; i++)
 		env->cur_state.regs[i].live = REG_LIVE_NONE;
 	for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++)
-- 
cgit v1.2.3-59-g8ed1b