aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/filter.txt2
-rw-r--r--arch/arm/net/bpf_jit_32.c8
-rw-r--r--arch/arm64/net/bpf_jit_comp.c13
-rw-r--r--arch/mips/net/ebpf_jit.c29
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c8
-rw-r--r--arch/s390/net/bpf_jit_comp.c10
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c18
-rw-r--r--arch/x86/net/bpf_jit_comp.c20
-rw-r--r--include/linux/filter.h12
-rw-r--r--include/linux/tcp.h11
-rw-r--r--include/net/tcp.h42
-rw-r--r--include/uapi/linux/bpf.h84
-rw-r--r--kernel/bpf/core.c258
-rw-r--r--kernel/bpf/lpm_trie.c28
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c62
-rw-r--r--lib/test_bpf.c8
-rw-r--r--net/core/filter.c303
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/sockmap/sockmap_user.c392
-rw-r--r--tools/include/uapi/linux/bpf.h86
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py51
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py83
-rw-r--r--tools/testing/selftests/bpf/test_align.c30
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c95
-rw-r--r--tools/testing/selftests/bpf/test_maps.c32
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h16
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c115
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c126
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c344
37 files changed, 1969 insertions, 378 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 87814859cfc2..a4508ec1816b 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1134,7 +1134,7 @@ The verifier's knowledge about the variable offset consists of:
mask and value; no bit should ever be 1 in both. For example, if a byte is read
into a register from memory, the register's top 56 bits are known zero, while
the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
-then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
+then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
0x1ff), because of potential carries.
Besides arithmetic, the register state can also be updated by conditional
branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 41e2feb0cf4f..b5030e1a41d8 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -363,15 +363,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
const u8 *tmp = bpf2a32[TMP_REG_1];
- s32 jmp_offset;
- /* checks if divisor is zero or not. If it is, then
- * exit directly.
- */
- emit(ARM_CMP_I(rn, 0), ctx);
- _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
if (op == BPF_DIV)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0775d5ab8ee9..1d4f1da7c58f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -390,18 +390,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- {
- const u8 r0 = bpf2a64[BPF_REG_0];
-
- /* if (src == 0) return 0 */
- jmp_offset = 3; /* skip ahead to else path */
- check_imm19(jmp_offset);
- emit(A64_CBNZ(is64, src, jmp_offset), ctx);
- emit(A64_MOVZ(1, r0, 0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- check_imm26(jmp_offset);
- emit(A64_B(jmp_offset), ctx);
- /* else */
switch (BPF_OP(code)) {
case BPF_DIV:
emit(A64_UDIV(is64, dst, dst, src), ctx);
@@ -413,7 +401,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
break;
- }
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(A64_LSLV(is64, dst, dst, src), ctx);
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 4e347030ed2c..3e2798bfea4f 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -741,16 +741,11 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
/* sign extend */
@@ -770,19 +765,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-
if (insn->imm == 1) {
/* div by 1 is a nop, mod by 1 is zero */
if (bpf_op == BPF_MOD)
@@ -860,11 +849,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, ddivu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -943,11 +927,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, divu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 217a78e84865..0a34b0cec7b7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -381,10 +381,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
- PPC_CMPWI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULW(b2p[TMP_REG_1], src_reg,
@@ -395,10 +391,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
- PPC_CMPDI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULD(b2p[TMP_REG_1], src_reg,
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e50188773ff3..78a19c93b380 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -610,11 +610,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltr %src,%src (if src == 0 goto fail) */
- EMIT2(0x1200, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
@@ -630,11 +625,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltgr %src,%src (if src == 0 goto fail) */
- EMIT4(0xb9020000, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 50a24d7bd4c5..48a25869349b 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -967,31 +967,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu(MULX, src, dst, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
break;
-
case BPF_ALU64 | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu(UDIVX, src, dst, ctx);
break;
-
case BPF_ALU | BPF_MOD | BPF_X: {
const u8 tmp = bpf2sparc[TMP_REG_1];
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu3(DIV, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
@@ -1003,10 +989,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu3(UDIVX, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
emit_alu3(SUB, dst, tmp, dst, ctx);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5acee5139e28..4923d92f918d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,26 +568,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
*/
EMIT2(0x31, 0xd2);
- if (BPF_SRC(insn->code) == BPF_X) {
- /* if (src_reg == 0) return 0 */
-
- /* cmp r11, 0 */
- EMIT4(0x49, 0x83, 0xFB, 0x00);
-
- /* jne .+9 (skip over pop, pop, xor and jmp) */
- EMIT2(X86_JNE, 1 + 1 + 2 + 5);
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
- EMIT2(0x31, 0xc0); /* xor eax, eax */
-
- /* jmp cleanup_addr
- * addrs[i] - 11, because there are 11 bytes
- * after this insn: div, mov, pop, pop, mov
- */
- jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
- EMIT1_off32(0xE9, jmp_offset);
- }
-
if (BPF_CLASS(insn->code) == BPF_ALU64)
/* div r11 */
EMIT3(0x49, 0xF7, 0xF3);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 425056c7f96c..276932d75975 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -688,6 +688,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
void bpf_prog_free(struct bpf_prog *fp);
+bool bpf_opcode_in_insntable(u8 code);
+
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags);
@@ -1003,10 +1005,20 @@ struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
union {
+ u32 args[4];
u32 reply;
u32 replylong[4];
};
u32 is_fullsock;
+ u64 temp; /* temp and everything after is not
+ * initialized to 0 before calling
+ * the BPF program. New fields that
+ * should be initialized to 0 should
+ * be inserted before temp.
+ * temp is scratch storage used by
+ * sock_ops_convert_ctx_access
+ * as temporary storage of a register.
+ */
};
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f0953c41..8f4c54986f97 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
int linger2;
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+ u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
+ * values defined in uapi/linux/tcp.h
+ */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
/* Receiver side RTT estimation */
struct {
u32 rtt_us;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5a1d26a18599..093e967a2960 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2006,12 +2006,12 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;
- memset(&sock_ops, 0, sizeof(sock_ops));
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
@@ -2019,6 +2019,8 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
sock_ops.sk = sk;
sock_ops.op = op;
+ if (nargs > 0)
+ memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
@@ -2027,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ u32 args[2] = {arg1, arg2};
+
+ return tcp_call_bpf(sk, op, 2, args);
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ u32 args[3] = {arg1, arg2, arg3};
+
+ return tcp_call_bpf(sk, op, 3, args);
+}
+
#else
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ return -EPERM;
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ return -EPERM;
+}
+
#endif
static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
@@ -2049,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0)
rwnd = 0;
@@ -2058,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
- return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
#if IS_ENABLED(CONFIG_SMC)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406c19d6016b..db6bdc375126 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3aa0658add76..5f35f93dcab2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -782,6 +782,137 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
+/* All UAPI available opcodes. */
+#define BPF_INSN_MAP(INSN_2, INSN_3) \
+ /* 32 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU, ADD, X), \
+ INSN_3(ALU, SUB, X), \
+ INSN_3(ALU, AND, X), \
+ INSN_3(ALU, OR, X), \
+ INSN_3(ALU, LSH, X), \
+ INSN_3(ALU, RSH, X), \
+ INSN_3(ALU, XOR, X), \
+ INSN_3(ALU, MUL, X), \
+ INSN_3(ALU, MOV, X), \
+ INSN_3(ALU, DIV, X), \
+ INSN_3(ALU, MOD, X), \
+ INSN_2(ALU, NEG), \
+ INSN_3(ALU, END, TO_BE), \
+ INSN_3(ALU, END, TO_LE), \
+ /* Immediate based. */ \
+ INSN_3(ALU, ADD, K), \
+ INSN_3(ALU, SUB, K), \
+ INSN_3(ALU, AND, K), \
+ INSN_3(ALU, OR, K), \
+ INSN_3(ALU, LSH, K), \
+ INSN_3(ALU, RSH, K), \
+ INSN_3(ALU, XOR, K), \
+ INSN_3(ALU, MUL, K), \
+ INSN_3(ALU, MOV, K), \
+ INSN_3(ALU, DIV, K), \
+ INSN_3(ALU, MOD, K), \
+ /* 64 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU64, ADD, X), \
+ INSN_3(ALU64, SUB, X), \
+ INSN_3(ALU64, AND, X), \
+ INSN_3(ALU64, OR, X), \
+ INSN_3(ALU64, LSH, X), \
+ INSN_3(ALU64, RSH, X), \
+ INSN_3(ALU64, XOR, X), \
+ INSN_3(ALU64, MUL, X), \
+ INSN_3(ALU64, MOV, X), \
+ INSN_3(ALU64, ARSH, X), \
+ INSN_3(ALU64, DIV, X), \
+ INSN_3(ALU64, MOD, X), \
+ INSN_2(ALU64, NEG), \
+ /* Immediate based. */ \
+ INSN_3(ALU64, ADD, K), \
+ INSN_3(ALU64, SUB, K), \
+ INSN_3(ALU64, AND, K), \
+ INSN_3(ALU64, OR, K), \
+ INSN_3(ALU64, LSH, K), \
+ INSN_3(ALU64, RSH, K), \
+ INSN_3(ALU64, XOR, K), \
+ INSN_3(ALU64, MUL, K), \
+ INSN_3(ALU64, MOV, K), \
+ INSN_3(ALU64, ARSH, K), \
+ INSN_3(ALU64, DIV, K), \
+ INSN_3(ALU64, MOD, K), \
+ /* Call instruction. */ \
+ INSN_2(JMP, CALL), \
+ /* Exit instruction. */ \
+ INSN_2(JMP, EXIT), \
+ /* Jump instructions. */ \
+ /* Register based. */ \
+ INSN_3(JMP, JEQ, X), \
+ INSN_3(JMP, JNE, X), \
+ INSN_3(JMP, JGT, X), \
+ INSN_3(JMP, JLT, X), \
+ INSN_3(JMP, JGE, X), \
+ INSN_3(JMP, JLE, X), \
+ INSN_3(JMP, JSGT, X), \
+ INSN_3(JMP, JSLT, X), \
+ INSN_3(JMP, JSGE, X), \
+ INSN_3(JMP, JSLE, X), \
+ INSN_3(JMP, JSET, X), \
+ /* Immediate based. */ \
+ INSN_3(JMP, JEQ, K), \
+ INSN_3(JMP, JNE, K), \
+ INSN_3(JMP, JGT, K), \
+ INSN_3(JMP, JLT, K), \
+ INSN_3(JMP, JGE, K), \
+ INSN_3(JMP, JLE, K), \
+ INSN_3(JMP, JSGT, K), \
+ INSN_3(JMP, JSLT, K), \
+ INSN_3(JMP, JSGE, K), \
+ INSN_3(JMP, JSLE, K), \
+ INSN_3(JMP, JSET, K), \
+ INSN_2(JMP, JA), \
+ /* Store instructions. */ \
+ /* Register based. */ \
+ INSN_3(STX, MEM, B), \
+ INSN_3(STX, MEM, H), \
+ INSN_3(STX, MEM, W), \
+ INSN_3(STX, MEM, DW), \
+ INSN_3(STX, XADD, W), \
+ INSN_3(STX, XADD, DW), \
+ /* Immediate based. */ \
+ INSN_3(ST, MEM, B), \
+ INSN_3(ST, MEM, H), \
+ INSN_3(ST, MEM, W), \
+ INSN_3(ST, MEM, DW), \
+ /* Load instructions. */ \
+ /* Register based. */ \
+ INSN_3(LDX, MEM, B), \
+ INSN_3(LDX, MEM, H), \
+ INSN_3(LDX, MEM, W), \
+ INSN_3(LDX, MEM, DW), \
+ /* Immediate based. */ \
+ INSN_3(LD, IMM, DW), \
+ /* Misc (old cBPF carry-over). */ \
+ INSN_3(LD, ABS, B), \
+ INSN_3(LD, ABS, H), \
+ INSN_3(LD, ABS, W), \
+ INSN_3(LD, IND, B), \
+ INSN_3(LD, IND, H), \
+ INSN_3(LD, IND, W)
+
+bool bpf_opcode_in_insntable(u8 code)
+{
+#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true
+#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
+ static const bool public_insntable[256] = {
+ [0 ... 255] = false,
+ /* Now overwrite non-defaults ... */
+ BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+ };
+#undef BPF_INSN_3_TBL
+#undef BPF_INSN_2_TBL
+ return public_insntable[code];
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
@@ -793,115 +924,18 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
u64 tmp;
+#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
+#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
- /* 32 bit ALU operations */
- [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
- [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
- [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
- [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
- [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
- [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
- [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
- [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
- [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
- [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
- [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
- [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
- [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
- [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
- [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
- [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
- [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
- [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
- [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
- [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
- [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
- [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
- [BPF_ALU | BPF_NEG] = &&ALU_NEG,
- [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
- [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
- /* 64 bit ALU operations */
- [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
- [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
- [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
- [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
- [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
- [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
- [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
- [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
- [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
- [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
- [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
- [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
- [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
- [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
- [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
- [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
- [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
- [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
- [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
- [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
- [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
- [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
- [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
- [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
- [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
- /* Call instruction */
- [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
+ /* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
- /* Jumps */
- [BPF_JMP | BPF_JA] = &&JMP_JA,
- [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
- [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
- [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
- [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
- [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
- [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
- [BPF_JMP | BPF_JLT | BPF_X] = &&JMP_JLT_X,
- [BPF_JMP | BPF_JLT | BPF_K] = &&JMP_JLT_K,
- [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
- [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
- [BPF_JMP | BPF_JLE | BPF_X] = &&JMP_JLE_X,
- [BPF_JMP | BPF_JLE | BPF_K] = &&JMP_JLE_K,
- [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
- [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
- [BPF_JMP | BPF_JSLT | BPF_X] = &&JMP_JSLT_X,
- [BPF_JMP | BPF_JSLT | BPF_K] = &&JMP_JSLT_K,
- [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
- [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
- [BPF_JMP | BPF_JSLE | BPF_X] = &&JMP_JSLE_X,
- [BPF_JMP | BPF_JSLE | BPF_K] = &&JMP_JSLE_K,
- [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
- [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
- /* Program return */
- [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
- /* Store instructions */
- [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
- [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
- [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
- [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
- [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
- [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
- [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
- [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
- [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
- [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
- /* Load instructions */
- [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
- [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
- [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
- [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
- [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
- [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
- [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
- [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
- [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
- [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
- [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
};
+#undef BPF_INSN_3_LBL
+#undef BPF_INSN_2_LBL
u32 tail_call_cnt = 0;
void *ptr;
int off;
@@ -965,14 +999,10 @@ select_insn:
(*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- if (unlikely(SRC == 0))
- return 0;
div64_u64_rem(DST, SRC, &tmp);
DST = tmp;
CONT;
ALU_MOD_X:
- if (unlikely((u32)SRC == 0))
- return 0;
tmp = (u32) DST;
DST = do_div(tmp, (u32) SRC);
CONT;
@@ -985,13 +1015,9 @@ select_insn:
DST = do_div(tmp, (u32) IMM);
CONT;
ALU64_DIV_X:
- if (unlikely(SRC == 0))
- return 0;
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- if (unlikely((u32)SRC == 0))
- return 0;
tmp = (u32) DST;
do_div(tmp, (u32) SRC);
DST = (u32) tmp;
@@ -1302,8 +1328,14 @@ load_byte:
goto load_byte;
default_label:
- /* If we ever reach this, we have a bug somewhere. */
- WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
+ /* If we ever reach this, we have a bug somewhere. Die hard here
+ * instead of just returning 0; we could be somewhere in a subprog,
+ * so execution could continue otherwise which we do /not/ want.
+ *
+ * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
+ */
+ pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code);
+ BUG_ON(1);
return 0;
}
STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index d7ea96218516..7b469d10d0e9 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -593,11 +593,10 @@ unlock:
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
{
+ struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
- struct lpm_trie_node *node, *next_node = NULL, *parent;
struct lpm_trie_node **node_stack = NULL;
- struct lpm_trie_node __rcu **root;
int err = 0, stack_ptr = -1;
unsigned int next_bit;
size_t matchlen;
@@ -614,22 +613,21 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
*/
/* Empty trie */
- if (!rcu_dereference(trie->root))
+ search_root = rcu_dereference(trie->root);
+ if (!search_root)
return -ENOENT;
/* For invalid key, find the leftmost node in the trie */
- if (!key || key->prefixlen > trie->max_prefixlen) {
- root = &trie->root;
+ if (!key || key->prefixlen > trie->max_prefixlen)
goto find_leftmost;
- }
node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
- GFP_USER | __GFP_NOWARN);
+ GFP_ATOMIC | __GFP_NOWARN);
if (!node_stack)
return -ENOMEM;
/* Try to find the exact node for the given key */
- for (node = rcu_dereference(trie->root); node;) {
+ for (node = search_root; node;) {
node_stack[++stack_ptr] = node;
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
@@ -640,10 +638,8 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = rcu_dereference(node->child[next_bit]);
}
if (!node || node->prefixlen != key->prefixlen ||
- (node->flags & LPM_TREE_NODE_FLAG_IM)) {
- root = &trie->root;
+ (node->flags & LPM_TREE_NODE_FLAG_IM))
goto find_leftmost;
- }
/* The node with the exactly-matching key has been found,
* find the first node in postorder after the matched node.
@@ -651,10 +647,10 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = node_stack[stack_ptr];
while (stack_ptr > 0) {
parent = node_stack[stack_ptr - 1];
- if (rcu_dereference(parent->child[0]) == node &&
- rcu_dereference(parent->child[1])) {
- root = &parent->child[1];
- goto find_leftmost;
+ if (rcu_dereference(parent->child[0]) == node) {
+ search_root = rcu_dereference(parent->child[1]);
+ if (search_root)
+ goto find_leftmost;
}
if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
next_node = parent;
@@ -673,7 +669,7 @@ find_leftmost:
/* Find the leftmost non-intermediate node, all intermediate nodes
* have exact two children, so this function will never return NULL.
*/
- for (node = rcu_dereference(*root); node;) {
+ for (node = search_root; node;) {
if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
next_node = node;
node = rcu_dereference(node->child[0]);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5bdb0cc84ad2..e24aa3241387 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -709,10 +709,7 @@ static int map_update_elem(union bpf_attr *attr)
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
- } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
- map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
- map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
+ } else if (IS_FD_ARRAY(map)) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dfb138b46488..5fb69a85d967 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4981,6 +4981,13 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
next_insn:
insn++;
i++;
+ continue;
+ }
+
+ /* Basic sanity check before we invest more work here. */
+ if (!bpf_opcode_in_insntable(insn->code)) {
+ verbose(env, "unknown opcode %02x\n", insn->code);
+ return -EINVAL;
}
}
@@ -5064,14 +5071,21 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
-/* The verifier does more data flow analysis than llvm and will not explore
- * branches that are dead at run time. Malicious programs can have dead code
- * too. Therefore replace all dead at-run-time code with nops.
+/* The verifier does more data flow analysis than llvm and will not
+ * explore branches that are dead at run time. Malicious programs can
+ * have dead code too. Therefore replace all dead at-run-time code
+ * with 'ja -1'.
+ *
+ * Just nops are not optimal, e.g. if they would sit at the end of the
+ * program and through another bug we would manage to jump there, then
+ * we'd execute beyond program memory otherwise. Returning exception
+ * code also wouldn't work since we can have subprogs where the dead
+ * code could be located.
*/
static void sanitize_dead_code(struct bpf_verifier_env *env)
{
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+ struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
struct bpf_insn *insn = env->prog->insnsi;
const int insn_cnt = env->prog->len;
int i;
@@ -5079,7 +5093,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
if (aux_data[i].seen)
continue;
- memcpy(insn + i, &nop, sizeof(nop));
+ memcpy(insn + i, &trap, sizeof(trap));
}
}
@@ -5386,15 +5400,37 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
int i, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- /* due to JIT bugs clear upper 32-bits of src register
- * before div/mod operation
- */
- insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
- insn_buf[1] = *insn;
- cnt = 2;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ struct bpf_insn mask_and_div[] = {
+ BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ /* Rx div 0 -> 0 */
+ BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+ BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ };
+ struct bpf_insn mask_and_mod[] = {
+ BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ /* Rx mod 0 -> Rx */
+ BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+ *insn,
+ };
+ struct bpf_insn *patchlet;
+
+ if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+ patchlet = mask_and_div + (is64 ? 1 : 0);
+ cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
+ } else {
+ patchlet = mask_and_mod + (is64 ? 1 : 0);
+ cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
return -ENOMEM;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index e3938e395cba..4cd9ea9b3449 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -2003,10 +2003,14 @@ static struct bpf_test tests[] = {
{ { 4, 0 }, { 5, 10 } }
},
{
- "INT: DIV by zero",
+ /* This one doesn't go through verifier, but is just raw insn
+ * as opposed to cBPF tests from here. Thus div by 0 tests are
+ * done in test_verifier in BPF kselftests.
+ */
+ "INT: DIV by -1",
.u.insns_int = {
BPF_ALU64_REG(BPF_MOV, R6, R1),
- BPF_ALU64_IMM(BPF_MOV, R7, 0),
+ BPF_ALU64_IMM(BPF_MOV, R7, -1),
BPF_LD_ABS(BPF_B, 3),
BPF_ALU32_REG(BPF_DIV, R0, R7),
BPF_EXIT_INSN(),
diff --git a/net/core/filter.c b/net/core/filter.c
index 18da42a81d0c..08ab4c65a998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -401,8 +401,8 @@ do_pass:
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
/* All programs must keep CTX in callee saved BPF_REG_CTX.
* In eBPF case it's done by the compiler, here we need to
@@ -459,8 +459,15 @@ do_pass:
break;
if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
- fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+ /* Error with exception code on div/mod by 0.
+ * For cBPF programs, this was always return 0.
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
+ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *insn++ = BPF_EXIT_INSN();
+ }
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -3232,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ np->tclass = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+#endif
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
@@ -3241,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false, reinit);
+ ret = tcp_set_congestion_control(sk, name, false,
+ reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -3307,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ *((int *)optval) = (int)np->tclass;
+ break;
+ default:
+ goto err_clear;
+ }
+#endif
} else {
goto err_clear;
}
@@ -3328,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+ int, argval)
+{
+ struct sock *sk = bpf_sock->sk;
+ int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+#ifdef CONFIG_INET
+ if (val)
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+ return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+ return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+ .func = bpf_sock_ops_cb_flags_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3510,6 +3584,8 @@ static const struct bpf_func_proto *
return &bpf_setsockopt_proto;
case BPF_FUNC_getsockopt:
return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_ops_cb_flags_set:
+ return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3826,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static bool __is_valid_sock_ops_access(int off, int size)
+static bool sock_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (off < 0 || off >= sizeof(struct bpf_sock_ops))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
-
- return true;
-}
-static bool sock_ops_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
-{
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct bpf_sock_ops, op) ...
- offsetof(struct bpf_sock_ops, replylong[3]):
+ case offsetof(struct bpf_sock_ops, reply):
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ if (size != size_default)
+ return false;
break;
default:
return false;
}
+ } else {
+ switch (off) {
+ case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
+ bytes_acked):
+ if (size != sizeof(__u64))
+ return false;
+ break;
+ default:
+ if (size != size_default)
+ return false;
+ break;
+ }
}
- return __is_valid_sock_ops_access(off, size);
+ return true;
}
static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4470,10 +4556,37 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
is_fullsock));
break;
-/* Helper macro for adding read access to tcp_sock fields. */
-#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
+ case offsetof(struct bpf_sock_ops, state):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_state));
+ break;
+
+ case offsetof(struct bpf_sock_ops, rtt_min):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ sizeof(struct minmax));
+ BUILD_BUG_ON(sizeof(struct minmax) <
+ sizeof(struct minmax_sample));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct tcp_sock, rtt_min) +
+ FIELD_SIZEOF(struct minmax_sample, t));
+ break;
+
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@ -4485,17 +4598,159 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
- offsetof(struct tcp_sock, FIELD_NAME)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
+ OBJ_FIELD), \
+ si->dst_reg, si->dst_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ } while (0)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ int reg = BPF_REG_9; \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
+ do { \
+ if (TYPE == BPF_WRITE) \
+ SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ else \
+ SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
case offsetof(struct bpf_sock_ops, snd_cwnd):
- SOCK_OPS_GET_TCP32(snd_cwnd);
+ SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, srtt_us):
- SOCK_OPS_GET_TCP32(srtt_us);
+ SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
break;
+
+ case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+ SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_ssthresh):
+ SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rcv_nxt):
+ SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_nxt):
+ SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_una):
+ SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, mss_cache):
+ SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, ecn_flags):
+ SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_delivered):
+ SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_interval_us):
+ SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, packets_out):
+ SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, retrans_out):
+ SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, total_retrans):
+ SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_in):
+ SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_in):
+ SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_out):
+ SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_out):
+ SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, lost_out):
+ SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sacked_out):
+ SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
+ struct sock, type);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_received):
+ SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_acked):
+ SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+ break;
+
}
return insn - insn_buf;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d7cf861bf699..f013ddc191e0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -463,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tcp_mtup_init(sk);
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
- tcp_call_bpf(sk, bpf_op);
+ tcp_call_bpf(sk, bpf_op, 0, NULL);
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
@@ -2042,6 +2042,30 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
+ /* We defined a new enum for TCP states that are exported in BPF
+ * so as not force the internal TCP states to be frozen. The
+ * following checks will detect if an internal state value ever
+ * differs from the BPF value. If this ever happens, then we will
+ * need to remap the internal value to the BPF value before calling
+ * tcp_call_bpf_2arg.
+ */
+ BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
+ BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
+ BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
+ BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
+ tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
+
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..ddbce73edae8 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
* within a datacenter, where we have reasonable estimates of
* RTTs
*/
- base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
if (base_rtt > 0) {
ca->nv_base_rtt = base_rtt;
ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95461f02ac9a..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2905,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
+ TCP_SKB_CB(skb)->seq, segs, err);
+
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
@@ -3469,7 +3473,7 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
- tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6db3124cdbda..257abdde23b0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -213,11 +213,18 @@ static int tcp_write_timeout(struct sock *sk)
icsk->icsk_user_timeout);
}
tcp_fastopen_active_detect_blackhole(sk, expired);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
+ icsk->icsk_retransmits,
+ icsk->icsk_rto, (int)expired);
+
if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
}
+
return 0;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 7f61a3d57fa7..64335bb94f9f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -201,13 +201,16 @@ CLANG_ARCH_ARGS = -target $(ARCH)
endif
# Trick to allow make to be run from this directory
-all:
+all: $(LIBBPF)
$(MAKE) -C ../../ $(CURDIR)/
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@rm -f *~
+$(LIBBPF): FORCE
+ $(MAKE) -C $(dir $@) $(notdir $@)
+
$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
$(call if_changed_dep,cc_s_c)
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7cc9d228216f..7c25c0c112bc 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -23,8 +23,11 @@
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
#include <sys/time.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <linux/netlink.h>
@@ -35,6 +38,8 @@
#include <assert.h>
#include <libgen.h>
+#include <getopt.h>
+
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"
@@ -46,15 +51,42 @@ void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-static int sockmap_test_sockets(int rate, int dot)
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(void)
{
- int i, sc, err, max_fd, one = 1;
- int s1, s2, c1, c2, p1, p2;
+ int i, err, one = 1;
struct sockaddr_in addr;
- struct timeval timeout;
- char buf[1024] = {0};
int *fds[4] = {&s1, &s2, &c1, &c2};
- fd_set w;
s1 = s2 = p1 = p2 = c1 = c2 = 0;
@@ -63,8 +95,7 @@ static int sockmap_test_sockets(int rate, int dot)
*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
if (*fds[i] < 0) {
perror("socket s1 failed()");
- err = *fds[i];
- goto out;
+ return errno;
}
}
@@ -74,16 +105,16 @@ static int sockmap_test_sockets(int rate, int dot)
(char *)&one, sizeof(one));
if (err) {
perror("setsockopt failed()");
- goto out;
+ return errno;
}
}
/* Non-blocking sockets */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 2; i++) {
err = ioctl(*fds[i], FIONBIO, (char *)&one);
if (err < 0) {
perror("ioctl s1 failed()");
- goto out;
+ return errno;
}
}
@@ -96,14 +127,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s2 failed()\n");
- goto out;
+ return errno;
}
/* Listen server sockets */
@@ -111,14 +142,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = listen(s1, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
/* Initiate Connect */
@@ -126,46 +157,232 @@ static int sockmap_test_sockets(int rate, int dot)
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c2 failed()\n");
- goto out;
+ return errno;
+ } else if (err < 0) {
+ err = 0;
}
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
- max_fd = p2;
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ for (i = 0; i < iov_count; i++) {
+ char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ }
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ struct timeval timeout;
+ float total_bytes;
+ fd_set w;
+
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(int iov_count, int iov_buf, int cnt,
+ int verbose, bool base)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int status;
+
+ errno = 0;
+
+ if (base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+ if (err)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &status, 0) == rxpid);
+ assert(waitpid(txpid, &status, 0) == txpid);
+ return err;
+}
+
+static int forever_ping_pong(int rate, int verbose)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
perror("send failed()\n");
- goto out;
+ return sc;
}
do {
- int s, rc, i;
+ int s, rc, i, max_fd = p2;
+ fd_set w;
/* FD sets */
FD_ZERO(&w);
@@ -193,7 +410,7 @@ static int sockmap_test_sockets(int rate, int dot)
if (rc < 0) {
if (errno != EWOULDBLOCK) {
perror("recv failed()\n");
- break;
+ return rc;
}
}
@@ -205,35 +422,92 @@ static int sockmap_test_sockets(int rate, int dot)
sc = send(i, buf, rc, 0);
if (sc < 0) {
perror("send failed()\n");
- break;
+ return sc;
}
}
- sleep(rate);
- if (dot) {
+
+ if (rate)
+ sleep(rate);
+
+ if (verbose) {
printf(".");
fflush(stdout);
}
} while (running);
-out:
- close(s1);
- close(s2);
- close(p1);
- close(p2);
- close(c1);
- close(c2);
- return err;
+ return 0;
}
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+};
+
int main(int argc, char **argv)
{
- int rate = 1, dot = 1;
+ int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ int opt, longindex, err, cg_fd = 0;
+ int test = PING_PONG;
char filename[256];
- int err, cg_fd;
- char *cg_path;
- cg_path = argv[argc - 1];
+ while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ /* Cgroup configuration */
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
running = 1;
@@ -241,20 +515,16 @@ int main(int argc, char **argv)
/* catch SIGINT */
signal(SIGINT, running_handler);
+ /* If base test skip BPF setup */
+ if (test == BASE)
+ goto run;
+
if (load_bpf_file(filename)) {
fprintf(stderr, "load_bpf_file: (%s) %s\n",
filename, strerror(errno));
return 1;
}
- /* Cgroup configuration */
- cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
- if (cg_fd < 0) {
- fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, cg_path);
- return cg_fd;
- }
-
/* Attach programs to sockmap */
err = bpf_prog_attach(prog_fd[0], map_fd[0],
BPF_SK_SKB_STREAM_PARSER, 0);
@@ -280,12 +550,30 @@ int main(int argc, char **argv)
return err;
}
- err = sockmap_test_sockets(rate, dot);
+run:
+ err = sockmap_init_sockets();
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
- return err;
+ goto out;
}
- return 0;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(rate, verbose);
+ else if (test == SENDMSG)
+ err = sendmsg_test(iov_count, length, rate, verbose, false);
+ else if (test == BASE)
+ err = sendmsg_test(iov_count, length, rate, verbose, true);
+ else
+ fprintf(stderr, "unknown test\n");
+out:
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ close(cg_fd);
+ return err;
}
void running_handler(int a)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index af1f49ad8b88..db6bdc375126 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -17,7 +17,7 @@
#define BPF_ALU64 0x07 /* alu mode in double word width */
/* ld/ldx fields */
-#define BPF_DW 0x18 /* double word */
+#define BPF_DW 0x18 /* double word (64-bit) */
#define BPF_XADD 0xc0 /* exclusive add */
/* alu/jmp fields */
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3a44b655d852..bf05bc5e36e5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,16 +11,16 @@ ifneq ($(wildcard $(GENHDR)),)
endif
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf -lrt
+LDLIBS += -lcap -lelf -lrt -lpthread
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup
+ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
- sample_map_ret0.o
+ sample_map_ret0.o test_tcpbpf_kern.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 33cb00e46c49..dde2c11d7771 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+ (void *) BPF_FUNC_sock_ops_cb_flags_set;
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000000..481dccdf140c
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = ''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error), e: return ''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error), e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+ sock.connect((HostName, serverPort))
+except socket.error as e:
+ sys.exit(1)
+
+buf = ''
+n = 0
+while n < 1000:
+ buf += '+'
+ n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000000..bc454d7d0be2
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = ''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error), e: return ''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error), e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+ print 'bind fails: ', msg
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = ''
+n = 0
+while n < 500:
+ buf += '.'
+ n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+ readyRead, readyWrite, inError = \
+ select.select(readList, [], [], 2)
+
+ if len(readyRead) > 0:
+ waitCount = 0
+ for sock in readyRead:
+ if sock == serverSocket:
+ (clientSocket, address) = serverSocket.accept()
+ address = str(address[0])
+ readList.append(clientSocket)
+ else:
+ sock.settimeout(1);
+ s = read(sock, 1000)
+ n = send(sock, buf)
+ sock.close()
+ serverSocket.close()
+ sys.exit(0)
+ else:
+ print 'Select timeout!'
+ sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index e19b410125eb..ff8bd7e3e50c 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = {
.insns = {
PREP_PKT_POINTERS,
BPF_MOV64_IMM(BPF_REG_0, 0),
- /* ptr & const => unknown & const */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
- /* ptr << const => unknown << const */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ /* (ptr - ptr) << 2 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
/* We have a (4n) value. Let's make a packet offset
* out of it. First add 14, to make it a (4n+2)
@@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
- /* R5 bitwise operator &= on pointer prohibited */
+ {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ /* (ptr - ptr) << 2 == unknown, (4n) */
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ /* (4n) + 14 == (4n+2). We blow our bounds, because
+ * the add could overflow.
+ */
+ {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>=0 */
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* packet pointer + nonnegative (4n+2) */
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+ * We checked the bounds, but it might have been able
+ * to overflow if the packet pointer started in the
+ * upper half of the address space.
+ * So we did not get a 'range' on R6, and the access
+ * attempt will fail.
+ */
+ {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
}
},
{
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c1535b34f14f..3489cc283433 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -21,7 +21,7 @@
#define DEV_CGROUP_PROG "./dev_cgroup.o"
-#define TEST_CGROUP "test-bpf-based-device-cgroup/"
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 081510853c6d..2be87e9ee28d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -14,6 +14,7 @@
#include <errno.h>
#include <inttypes.h>
#include <linux/bpf.h>
+#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void)
close(map_fd);
}
+#define MAX_TEST_KEYS 4
+struct lpm_mt_test_info {
+ int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+ int iter;
+ int map_fd;
+ struct {
+ __u32 prefixlen;
+ __u32 data;
+ } key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+ int i, j, ret, iter, key_size;
+ struct lpm_mt_test_info *info = arg;
+ struct bpf_lpm_trie_key *key_p;
+
+ key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_p = alloca(key_size);
+ for (iter = 0; iter < info->iter; iter++)
+ for (i = 0; i < MAX_TEST_KEYS; i++) {
+ /* first half of iterations in forward order,
+ * and second half in backward order.
+ */
+ j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+ key_p->prefixlen = info->key[j].prefixlen;
+ memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+ if (info->cmd == 0) {
+ __u32 value = j;
+ /* update must succeed */
+ assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+ } else if (info->cmd == 1) {
+ ret = bpf_map_delete_elem(info->map_fd, key_p);
+ assert(ret == 0 || errno == ENOENT);
+ } else if (info->cmd == 2) {
+ __u32 value;
+ ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+ assert(ret == 0 || errno == ENOENT);
+ } else {
+ struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+ assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+ }
+ }
+
+ // Pass successful exit info back to the main thread
+ pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+ info->iter = 2000;
+ info->map_fd = map_fd;
+ info->key[0].prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+ info->key[1].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+ info->key[2].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+ info->key[3].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+ struct lpm_mt_test_info info[4];
+ size_t key_size, value_size;
+ pthread_t thread_id[4];
+ int i, map_fd;
+ void *ret;
+
+ /* create a trie */
+ value_size = sizeof(__u32);
+ key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+ 100, BPF_F_NO_PREALLOC);
+
+ /* create 4 threads to test update, delete, lookup and get_next_key */
+ setup_lpm_mt_test_info(&info[0], map_fd);
+ for (i = 0; i < 4; i++) {
+ if (i != 0)
+ memcpy(&info[i], &info[0], sizeof(info[i]));
+ info[i].cmd = i;
+ assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+ }
+
+ for (i = 0; i < 4; i++)
+ assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+ close(map_fd);
+}
+
int main(void)
{
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
@@ -667,6 +760,8 @@ int main(void)
test_lpm_get_next_key();
+ test_lpm_multi_thread();
+
printf("test_lpm: OK\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 040356ecc862..436c4c72414f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data)
static void test_hashmap_walk(int task, void *data)
{
- int fd, i, max_entries = 100000;
+ int fd, i, max_entries = 1000;
long long key, value, next_key;
bool next_key_valid = true;
@@ -463,7 +463,7 @@ static void test_devmap(int task, void *data)
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
static void test_sockmap(int tasks, void *data)
{
- int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+ int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
@@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap;
}
- /* Test map close sockets */
- for (i = 0; i < 6; i++)
+ /* Test map close sockets and empty maps */
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
+ }
close(fd);
close(map_fd_rx);
bpf_object__close(obj);
@@ -881,8 +884,13 @@ out:
printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
exit(1);
out_sockmap:
- for (i = 0; i < 6; i++)
+ for (i = 0; i < 6; i++) {
+ if (map_fd_tx)
+ bpf_map_delete_elem(map_fd_tx, &i);
+ if (map_fd_rx)
+ bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
+ }
close(fd);
exit(1);
}
@@ -931,8 +939,12 @@ static void test_map_large(void)
close(fd);
}
-static void run_parallel(int tasks, void (*fn)(int task, void *data),
- void *data)
+#define run_parallel(N, FN, DATA) \
+ printf("Fork %d tasks to '" #FN "'\n", N); \
+ __run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+ void *data)
{
pid_t pid[tasks];
int i;
@@ -972,7 +984,7 @@ static void test_map_stress(void)
#define DO_UPDATE 1
#define DO_DELETE 0
-static void do_work(int fn, void *data)
+static void test_update_delete(int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
@@ -1012,7 +1024,7 @@ static void test_map_parallel(void)
*/
data[0] = fd;
data[1] = DO_UPDATE;
- run_parallel(TASKS, do_work, data);
+ run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
@@ -1035,7 +1047,7 @@ static void test_map_parallel(void)
/* Now let's delete all elemenets in parallel. */
data[1] = DO_DELETE;
- run_parallel(TASKS, do_work, data);
+ run_parallel(TASKS, test_update_delete, data);
/* Nothing should be left. */
key = -1;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000000..2fe43289943c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+ __u32 event_map;
+ __u32 total_retrans;
+ __u32 data_segs_in;
+ __u32 data_segs_out;
+ __u32 bad_cb_test_rv;
+ __u32 good_cb_test_rv;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000000..57119ad57a3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (gp == NULL) {
+ struct tcpbpf_globals g = {0};
+
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ } else {
+ g = *gp;
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int bad_call_rv = 0;
+ int good_call_rv = 0;
+ int op;
+ int v = 0;
+
+ op = (int) skops->op;
+
+ update_event_map(op);
+
+ switch (op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Test failure to set largest cb flag (assumes not defined) */
+ bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ /* Set callback */
+ good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ /* Update results */
+ {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.bad_cb_test_rv = bad_call_rv;
+ g.good_cb_test_rv = good_call_rv;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ skops->sk_txhash = 0x12345f;
+ v = 0xff;
+ rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+ sizeof(v));
+ break;
+ case BPF_SOCK_OPS_RTO_CB:
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB:
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE) {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.total_retrans = skops->total_retrans;
+ g.data_segs_in = skops->data_segs_in;
+ g.data_segs_out = skops->data_segs_out;
+ g.bytes_received = skops->bytes_received;
+ g.bytes_acked = skops->bytes_acked;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000000..95a370f3d378
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include <linux/perf_event.h>
+#include "test_tcpbpf.h"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+#define SYSTEM(CMD) \
+ do { \
+ if (system(CMD)) { \
+ printf("system(%s) FAILS!\n", CMD); \
+ } \
+ } while (0)
+
+int main(int argc, char **argv)
+{
+ const char *file = "test_tcpbpf_kern.o";
+ struct tcpbpf_globals g = {0};
+ int cg_fd, prog_fd, map_fd;
+ bool debug_flag = false;
+ int error = EXIT_FAILURE;
+ struct bpf_object *obj;
+ char cmd[100], *dir;
+ struct stat buffer;
+ __u32 key = 0;
+ int pid;
+ int rv;
+
+ if (argc > 1 && strcmp(argv[1], "-d") == 0)
+ debug_flag = true;
+
+ dir = "/tmp/cgroupv2/foo";
+
+ if (stat(dir, &buffer) != 0) {
+ SYSTEM("mkdir -p /tmp/cgroupv2");
+ SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
+ SYSTEM("mkdir -p /tmp/cgroupv2/foo");
+ }
+ pid = (int) getpid();
+ sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
+ SYSTEM(cmd);
+
+ cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
+ if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ printf("FAILED: load_bpf_file failed for: %s\n", file);
+ goto err;
+ }
+
+ rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (rv) {
+ printf("FAILED: bpf_prog_attach: %d (%s)\n",
+ error, strerror(errno));
+ goto err;
+ }
+
+ SYSTEM("./tcp_server.py");
+
+ map_fd = bpf_find_map(__func__, obj, "global_map");
+ if (map_fd < 0)
+ goto err;
+
+ rv = bpf_map_lookup_elem(map_fd, &key, &g);
+ if (rv != 0) {
+ printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+ goto err;
+ }
+
+ if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
+ g.data_segs_in != 1 || g.data_segs_out != 1 ||
+ (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 ||
+ g.good_cb_test_rv != 0) {
+ printf("FAILED: Wrong stats\n");
+ if (debug_flag) {
+ printf("\n");
+ printf("bytes_received: %d (expecting 501)\n",
+ (int)g.bytes_received);
+ printf("bytes_acked: %d (expecting 1002)\n",
+ (int)g.bytes_acked);
+ printf("data_segs_in: %d (expecting 1)\n",
+ g.data_segs_in);
+ printf("data_segs_out: %d (expecting 1)\n",
+ g.data_segs_out);
+ printf("event_map: 0x%x (at least 0x47e)\n",
+ g.event_map);
+ printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n",
+ g.bad_cb_test_rv);
+ printf("good_cb_test_rv:0x%x (expecting 0)\n",
+ g.good_cb_test_rv);
+ }
+ goto err;
+ }
+ printf("PASSED!\n");
+ error = 0;
+err:
+ bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+ return error;
+
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index fb82d29ee863..697bd83de295 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -21,6 +21,7 @@
#include <stddef.h>
#include <stdbool.h>
#include <sched.h>
+#include <limits.h>
#include <sys/capability.h>
#include <sys/resource.h>
@@ -111,7 +112,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"DIV32 by 0, zero check 2",
@@ -123,7 +124,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"DIV64 by 0, zero check",
@@ -135,7 +136,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD32 by 0, zero check 1",
@@ -147,7 +148,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD32 by 0, zero check 2",
@@ -159,7 +160,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD64 by 0, zero check",
@@ -171,13 +172,245 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_2, 16),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 8,
+ },
+ {
+ "DIV32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 by 0, zero check, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
.retval = 0,
},
{
+ "MOD32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 3),
+ BPF_MOV32_IMM(BPF_REG_2, 5),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD64 by 0, zero check 2, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, -1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = -1,
+ },
+ /* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+ {
+ "DIV32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "xor32 zero extend check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_2, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+ BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
"empty prog",
.insns = {
},
- .errstr = "last insn is not an exit or jmp",
+ .errstr = "unknown opcode 00",
.result = REJECT,
},
{
@@ -374,7 +607,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_ARSH not supported for 32 bit ALU",
+ .errstr = "unknown opcode c4",
},
{
"arsh32 on reg",
@@ -385,7 +618,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_ARSH not supported for 32 bit ALU",
+ .errstr = "unknown opcode cc",
},
{
"arsh64 on imm",
@@ -501,7 +734,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_CALL uses reserved",
+ .errstr = "unknown opcode 8d",
.result = REJECT,
},
{
@@ -691,7 +924,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(0, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM",
+ .errstr = "unknown opcode 00",
.result = REJECT,
},
{
@@ -709,7 +942,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(-1, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_ALU opcode f0",
+ .errstr = "unknown opcode ff",
.result = REJECT,
},
{
@@ -718,7 +951,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(-1, -1, -1, -1, -1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_ALU opcode f0",
+ .errstr = "unknown opcode ff",
.result = REJECT,
},
{
@@ -7543,7 +7776,7 @@ static struct bpf_test tests[] = {
},
BPF_EXIT_INSN(),
},
- .errstr = "BPF_END uses reserved fields",
+ .errstr = "unknown opcode d7",
.result = REJECT,
},
{
@@ -8766,6 +8999,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 1,
},
{
"check deducing bounds from const, 3",
@@ -8963,6 +9197,90 @@ static struct bpf_test tests[] = {
.retval = 1,
},
{
+ "calls: div by 0 in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: multiple ret types in subprog 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: multiple ret types in subprog 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 16 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the array range",
+ },
+ {
"calls: overlapping caller/callee",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),