From c4675f935399cbdd3ba3869b0bf6c60528c8111a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Jul 2015 20:49:32 +0200 Subject: ebpf: remove self-assignment in interpreter's tail call ARG1 = BPF_R1 as it stands, evaluates to regs[BPF_REG_1] = regs[BPF_REG_1] and thus has no effect. Add a comment instead, explaining what happens and why it's okay to just remove it. Since from user space side, a tail call is invoked as a pseudo helper function via bpf_tail_call_proto, the verifier checks the arguments just like with any other helper function and makes sure that the first argument (regs[BPF_REG_1])'s type is ARG_PTR_TO_CTX. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c5bedc82bc1c..bf38f5e8196c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -453,7 +453,11 @@ select_insn: if (unlikely(!prog)) goto out; - ARG1 = BPF_R1; + /* ARG1 at this point is guaranteed to point to CTX from + * the verifier side due to the fact that the tail call is + * handeled like a helper, that is, bpf_tail_call_proto, + * where arg1_type is ARG_PTR_TO_CTX. + */ insn = prog->insnsi; goto select_insn; out: -- cgit v1.3-6-gb490 From 4d9c5c53ac99e4cb5d031897863203d7817b36e0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 20 Jul 2015 20:34:19 -0700 Subject: test_bpf: add bpf_skb_vlan_push/pop() tests improve accuracy of timing in test_bpf and add two stress tests: - {skb->data[0], get_smp_processor_id} repeated 2k times - {skb->data[0], vlan_push} x 68 followed by {skb->data[0], vlan_pop} x 68 1st test is useful to test performance of JIT implementation of BPF_LD_ABS together with BPF_CALL instructions. 2nd test is stressing skb_vlan_push/pop logic together with skb->data access via BPF_LD_ABS insn which checks that re-caching of skb->data is done correctly. In order to call bpf_skb_vlan_push() from test_bpf.ko have to add three export_symbol_gpl. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 1 + lib/test_bpf.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- net/core/filter.c | 2 ++ 3 files changed, 98 insertions(+), 3 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bf38f5e8196c..fafa74161445 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -177,6 +177,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return 0; } +EXPORT_SYMBOL_GPL(__bpf_call_base); /** * __bpf_prog_run - run eBPF program on a given context diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9198f28a5528..8b5e66f008b0 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -355,6 +356,81 @@ static int bpf_fill_ja(struct bpf_test *self) return __bpf_fill_ja(self, 12, 9); } +static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 1; i += 2) { + insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0); + insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU); + } + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +#define PUSH_CNT 68 +/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ +static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i = 0, j, k = 0; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[i++] = BPF_MOV64_REG(R6, R1); +loop: + for (j = 0; j < PUSH_CNT; j++) { + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); + i++; + insn[i++] = BPF_MOV64_REG(R1, R6); + insn[i++] = BPF_MOV64_IMM(R2, 1); + insn[i++] = BPF_MOV64_IMM(R3, 2); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + bpf_skb_vlan_push_proto.func - __bpf_call_base); + insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); + i++; + } + + for (j = 0; j < PUSH_CNT; j++) { + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); + i++; + insn[i++] = BPF_MOV64_REG(R1, R6); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + bpf_skb_vlan_pop_proto.func - __bpf_call_base); + insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); + i++; + } + if (++k < 5) + goto loop; + + for (; i < len - 1; i++) + insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef); + + insn[len - 1] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -4398,6 +4474,22 @@ static struct bpf_test tests[] = { { { 0, 0xababcbac } }, .fill_helper = bpf_fill_maxinsns11, }, + { + "BPF_MAXINSNS: ld_abs+get_processor_id", + { }, + CLASSIC, + { }, + { { 1, 0xbee } }, + .fill_helper = bpf_fill_ld_abs_get_processor_id, + }, + { + "BPF_MAXINSNS: ld_abs+vlan_push/pop", + { }, + INTERNAL, + { 0x34 }, + { { 1, 0xbef } }, + .fill_helper = bpf_fill_ld_abs_vlan_push_pop, + }, }; static struct net_device dev; @@ -4551,14 +4643,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data, u64 start, finish; int ret = 0, i; - start = ktime_to_us(ktime_get()); + start = ktime_get_ns(); for (i = 0; i < runs; i++) ret = BPF_PROG_RUN(fp, data); - finish = ktime_to_us(ktime_get()); + finish = ktime_get_ns(); - *duration = (finish - start) * 1000ULL; + *duration = finish - start; do_div(*duration, runs); return ret; diff --git a/net/core/filter.c b/net/core/filter.c index 50338071fac4..786722a9c6f2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1457,6 +1457,7 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, }; +EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { @@ -1471,6 +1472,7 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; +EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); bool bpf_helper_changes_skb_data(void *func) { -- cgit v1.3-6-gb490 From 24b4d2abd0bd628f396dada3e915d395cbf459eb Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Thu, 23 Jul 2015 14:24:40 -0700 Subject: ebpf: Allow dereferences of PTR_TO_STACK registers mov %rsp, %r1 ; r1 = rsp add $-8, %r1 ; r1 = rsp - 8 store_q $123, -8(%rsp) ; *(u64*)r1 = 123 <- valid store_q $123, (%r1) ; *(u64*)r1 = 123 <- previously invalid mov $0, %r0 exit ; Always need to exit And we'd get the following error: 0: (bf) r1 = r10 1: (07) r1 += -8 2: (7a) *(u64 *)(r10 -8) = 999 3: (7a) *(u64 *)(r1 +0) = 999 R1 invalid mem access 'fp' Unable to load program We already know that a register is a stack address and the appropriate offset, so we should be able to validate those references as well. Signed-off-by: Alex Gartrell Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 6 ++++- samples/bpf/test_verifier.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 039d866fd36a..cd307df98cb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -648,6 +648,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, struct verifier_state *state = &env->cur_state; int size, err = 0; + if (state->regs[regno].type == PTR_TO_STACK) + off += state->regs[regno].imm; + size = bpf_size_to_bytes(bpf_size); if (size < 0) return size; @@ -667,7 +670,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); - } else if (state->regs[regno].type == FRAME_PTR) { + } else if (state->regs[regno].type == FRAME_PTR || + state->regs[regno].type == PTR_TO_STACK) { if (off >= 0 || off < -MAX_BPF_STACK) { verbose("invalid stack off=%d size=%d\n", off, size); return -EACCES; diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 693605997abc..ee0f110c9c54 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -822,6 +822,65 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "PTR_TO_STACK store/load", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "PTR_TO_STACK store/load - bad alignment on off", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned access off -6 size 8", + }, + { + "PTR_TO_STACK store/load - bad alignment on reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned access off -2 size 8", + }, + { + "PTR_TO_STACK store/load - out of bounds low", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=-79992 size=8", + }, + { + "PTR_TO_STACK store/load - out of bounds high", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=0 size=8", + }, }; static int probe_filter_length(struct bpf_insn *fp) -- cgit v1.3-6-gb490 From 2a36f0b92eb638dd023870574eb471b1c56be9ad Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Thu, 6 Aug 2015 07:02:33 +0000 Subject: bpf: Make the bpf_prog_array_map more generic All the map backends are of generic nature. In order to avoid adding much special code into the eBPF core, rewrite part of the bpf_prog_array map code and make it more generic. So the new perf_event_array map type can reuse most of code with bpf_prog_array map and add fewer lines of special code. Signed-off-by: Wang Nan Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 6 ++-- include/linux/bpf.h | 8 +++-- kernel/bpf/arraymap.c | 80 +++++++++++++++++++++++++++------------------ kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 2 +- 5 files changed, 60 insertions(+), 38 deletions(-) (limited to 'kernel/bpf') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ec5214f39aa8..70efcd0940f9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -246,7 +246,7 @@ static void emit_prologue(u8 **pprog) * goto out; * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; - * prog = array->prog[index]; + * prog = array->ptrs[index]; * if (prog == NULL) * goto out; * goto *(prog->bpf_func + prologue_size); @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ - /* prog = array->prog[index]; */ + /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ - offsetof(struct bpf_array, prog)); + offsetof(struct bpf_array, ptrs)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 139d6d2e123f..d495211d63d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,10 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + + /* funcs called by prog_array and perf_event_array map */ + void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); + void (*map_fd_put_ptr) (void *ptr); }; struct bpf_map { @@ -142,13 +146,13 @@ struct bpf_array { bool owner_jited; union { char value[0] __aligned(8); - struct bpf_prog *prog[0] __aligned(8); + void *ptrs[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -void bpf_prog_array_map_clear(struct bpf_map *map); +void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cb31229a6fa4..45df6572ecfd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -150,15 +150,15 @@ static int __init register_array_map(void) } late_initcall(register_array_map); -static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) { - /* only bpf_prog file descriptors can be stored in prog_array map */ + /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return ERR_PTR(-EINVAL); return array_map_alloc(attr); } -static void prog_array_map_free(struct bpf_map *map) +static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -167,21 +167,21 @@ static void prog_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) - BUG_ON(array->prog[i] != NULL); + BUG_ON(array->ptrs[i] != NULL); kvfree(array); } -static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* only called from syscall */ -static int prog_array_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static int fd_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog, *old_prog; + void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) @@ -191,57 +191,75 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, return -E2BIG; ufd = *(u32 *)value; - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!bpf_prog_array_compatible(array, prog)) { - bpf_prog_put(prog); - return -EINVAL; - } + new_ptr = map->ops->map_fd_get_ptr(map, ufd); + if (IS_ERR(new_ptr)) + return PTR_ERR(new_ptr); - old_prog = xchg(array->prog + index, prog); - if (old_prog) - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, new_ptr); + if (old_ptr) + map->ops->map_fd_put_ptr(old_ptr); return 0; } -static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +static int fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *old_prog; + void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; - old_prog = xchg(array->prog + index, NULL); - if (old_prog) { - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, NULL); + if (old_ptr) { + map->ops->map_fd_put_ptr(old_ptr); return 0; } else { return -ENOENT; } } +static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog = bpf_prog_get(fd); + if (IS_ERR(prog)) + return prog; + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return ERR_PTR(-EINVAL); + } + return prog; +} + +static void prog_fd_array_put_ptr(void *ptr) +{ + struct bpf_prog *prog = ptr; + + bpf_prog_put_rcu(prog); +} + /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_prog_array_map_clear(struct bpf_map *map) +void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) - prog_array_map_delete_elem(map, &i); + fd_array_map_delete_elem(map, &i); } static const struct bpf_map_ops prog_array_ops = { - .map_alloc = prog_array_map_alloc, - .map_free = prog_array_map_free, + .map_alloc = fd_array_map_alloc, + .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, - .map_lookup_elem = prog_array_map_lookup_elem, - .map_update_elem = prog_array_map_update_elem, - .map_delete_elem = prog_array_map_delete_elem, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = prog_fd_array_get_ptr, + .map_fd_put_ptr = prog_fd_array_put_ptr, }; static struct bpf_map_type_list prog_array_type __read_mostly = { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fafa74161445..67c380cfa9ca 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -450,7 +450,7 @@ select_insn: tail_call_cnt++; - prog = READ_ONCE(array->prog[index]); + prog = READ_ONCE(array->ptrs[index]); if (unlikely(!prog)) goto out; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a1b14d197a4f..dc9b464fefa9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -72,7 +72,7 @@ static int bpf_map_release(struct inode *inode, struct file *filp) /* prog_array stores refcnt-ed bpf_prog pointers * release them all when user space closes prog_array_fd */ - bpf_prog_array_map_clear(map); + bpf_fd_array_map_clear(map); bpf_map_put(map); return 0; -- cgit v1.3-6-gb490 From ea317b267e9d03a8241893aa176fba7661d07579 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:34 +0000 Subject: bpf: Add new bpf map type to store the pointer to struct perf_event Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. This map only stores the pointer to struct perf_event. The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) (limited to 'kernel/bpf') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d495211d63d1..4fc1f4070789 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bpf_map; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2ce13c109b00..a1814e8e53a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -114,6 +114,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 45df6572ecfd..29ace107f236 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -273,3 +273,60 @@ static int __init register_prog_array_map(void) return 0; } late_initcall(register_prog_array_map); + +static void perf_event_array_map_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct perf_event *event; + const struct perf_event_attr *attr; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return event; + + attr = perf_event_attrs(event); + if (IS_ERR(attr)) + return (void *)attr; + + if (attr->type != PERF_TYPE_RAW && + attr->type != PERF_TYPE_HARDWARE) { + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); + } + return event; +} + +static void perf_event_fd_array_put_ptr(void *ptr) +{ + struct perf_event *event = ptr; + + perf_event_release_kernel(event); +} + +static const struct bpf_map_ops perf_event_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = perf_event_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = perf_event_fd_array_get_ptr, + .map_fd_put_ptr = perf_event_fd_array_put_ptr, +}; + +static struct bpf_map_type_list perf_event_array_type __read_mostly = { + .ops = &perf_event_array_ops, + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +}; + +static int __init register_perf_event_array_map(void) +{ + bpf_register_map_type(&perf_event_array_type); + return 0; +} +late_initcall(register_perf_event_array_map); -- cgit v1.3-6-gb490 From 35578d7984003097af2b1e34502bc943d40c1804 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:35 +0000 Subject: bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter According to the perf_event_map_fd and index, the function bpf_perf_event_read() can convert the corresponding map value to the pointer to struct perf_event and return the Hardware PMU counter value. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++++++++--------------- kernel/trace/bpf_trace.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 15 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fc1f4070789..f57d7fed9ec3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_perf_event_read_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1814e8e53a7..92a48e2d5461 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -271,6 +271,7 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_set_tunnel_key, + BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cd307df98cb3..48e1c7192560 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -238,6 +238,14 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; +static const struct { + int map_type; + int func_id; +} func_limit[] = { + {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, + {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, +}; + static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -837,6 +845,28 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return err; } +static int check_map_func_compatibility(struct bpf_map *map, int func_id) +{ + bool bool_map, bool_func; + int i; + + if (!map) + return 0; + + for (i = 0; i <= ARRAY_SIZE(func_limit); i++) { + bool_map = (map->map_type == func_limit[i].map_type); + bool_func = (func_id == func_limit[i].func_id); + /* only when map & func pair match it can continue. + * don't allow any other map type to be passed into + * the special func; + */ + if (bool_map != bool_func) + return -EINVAL; + } + + return 0; +} + static int check_call(struct verifier_env *env, int func_id) { struct verifier_state *state = &env->cur_state; @@ -912,21 +942,9 @@ static int check_call(struct verifier_env *env, int func_id) return -EINVAL; } - if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && - func_id != BPF_FUNC_tail_call) - /* prog_array map type needs extra care: - * only allow to pass it into bpf_tail_call() for now. - * bpf_map_delete_elem() can be allowed in the future, - * while bpf_map_update_elem() must only be done via syscall - */ - return -EINVAL; - - if (func_id == BPF_FUNC_tail_call && - map->map_type != BPF_MAP_TYPE_PROG_ARRAY) - /* don't allow any other map type to be passed into - * bpf_tail_call() - */ - return -EINVAL; + err = check_map_func_compatibility(map, func_id); + if (err) + return err; return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a041adee90..ef9936df1b04 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -158,6 +158,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (!event) + return -ENOENT; + + /* + * we don't know if the function is run successfully by the + * return value. It can be judged in other places, such as + * eBPF programs. + */ + return perf_event_read_local(event); +} + +const struct bpf_func_proto bpf_perf_event_read_proto = { + .func = bpf_perf_event_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -183,6 +212,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; default: return NULL; } -- cgit v1.3-6-gb490 From 140d8b335a9beb234fd0ed9a15aa6a47f47fd771 Mon Sep 17 00:00:00 2001 From: Wei-Chun Chao Date: Wed, 12 Aug 2015 07:57:12 -0700 Subject: bpf: fix bpf_perf_event_read() loop upper bound Verifier rejects programs incorrectly. Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read()") Cc: Kaixu Xia Cc: Alexei Starovoitov Signed-off-by: Wei-Chun Chao Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48e1c7192560..ed12e385fb75 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -853,7 +853,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (!map) return 0; - for (i = 0; i <= ARRAY_SIZE(func_limit); i++) { + for (i = 0; i < ARRAY_SIZE(func_limit); i++) { bool_map = (map->map_type == func_limit[i].map_type); bool_func = (func_id == func_limit[i].func_id); /* only when map & func pair match it can continue. -- cgit v1.3-6-gb490 From 592867bfabe2fcb449393ba7eb0de4f972a08c63 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 8 Sep 2015 18:00:09 +0200 Subject: ebpf: fix fd refcount leaks related to maps in bpf syscall We may already have gotten a proper fd struct through fdget(), so whenever we return at the end of an map operation, we need to call fdput(). However, each map operation from syscall side first probes CHECK_ATTR() to verify that unused fields in the bpf_attr union are zero. In case of malformed input, we return with error, but the lookup to the map_fd was already performed at that time, so that we return without an corresponding fdput(). Fix it by performing an fdget() only right before bpf_map_get(). The fdget() invocation on maps in the verifier is not affected. Fixes: db20fd2b0108 ("bpf: add lookup/update/delete/iterate methods to BPF maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dc9b464fefa9..35bac8e8b071 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -155,14 +155,15 @@ static int map_lookup_elem(union bpf_attr *attr) void __user *ukey = u64_to_ptr(attr->key); void __user *uvalue = u64_to_ptr(attr->value); int ufd = attr->map_fd; - struct fd f = fdget(ufd); struct bpf_map *map; void *key, *value, *ptr; + struct fd f; int err; if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; + f = fdget(ufd); map = bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -213,14 +214,15 @@ static int map_update_elem(union bpf_attr *attr) void __user *ukey = u64_to_ptr(attr->key); void __user *uvalue = u64_to_ptr(attr->value); int ufd = attr->map_fd; - struct fd f = fdget(ufd); struct bpf_map *map; void *key, *value; + struct fd f; int err; if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) return -EINVAL; + f = fdget(ufd); map = bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -265,14 +267,15 @@ static int map_delete_elem(union bpf_attr *attr) { void __user *ukey = u64_to_ptr(attr->key); int ufd = attr->map_fd; - struct fd f = fdget(ufd); struct bpf_map *map; + struct fd f; void *key; int err; if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) return -EINVAL; + f = fdget(ufd); map = bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -305,14 +308,15 @@ static int map_get_next_key(union bpf_attr *attr) void __user *ukey = u64_to_ptr(attr->key); void __user *unext_key = u64_to_ptr(attr->next_key); int ufd = attr->map_fd; - struct fd f = fdget(ufd); struct bpf_map *map; void *key, *next_key; + struct fd f; int err; if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) return -EINVAL; + f = fdget(ufd); map = bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); -- cgit v1.3-6-gb490 From 687f07156b0c99205c21aa4e2986564046d342fe Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 8 Sep 2015 13:40:01 -0700 Subject: bpf: fix out of bounds access in verifier log when the verifier log is enabled the print_bpf_insn() is doing bpf_alu_string[BPF_OP(insn->code) >> 4] and bpf_jmp_string[BPF_OP(insn->code) >> 4] where BPF_OP is a 4-bit instruction opcode. Malformed insns can cause out of bounds access. Fix it by sizing arrays appropriately. The bug was found by clang address sanitizer with libfuzzer. Reported-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ed12e385fb75..b074b23000d6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -283,7 +283,7 @@ static const char *const bpf_class_string[] = { [BPF_ALU64] = "alu64", }; -static const char *const bpf_alu_string[] = { +static const char *const bpf_alu_string[16] = { [BPF_ADD >> 4] = "+=", [BPF_SUB >> 4] = "-=", [BPF_MUL >> 4] = "*=", @@ -307,7 +307,7 @@ static const char *const bpf_ldst_string[] = { [BPF_DW >> 3] = "u64", }; -static const char *const bpf_jmp_string[] = { +static const char *const bpf_jmp_string[16] = { [BPF_JA >> 4] = "jmp", [BPF_JEQ >> 4] = "==", [BPF_JGT >> 4] = ">", -- cgit v1.3-6-gb490