From faeb2dce084aff92d466c6ce68481989b815435b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:08 -0800 Subject: bpf: Add kernel test functions for fentry testing Add few kernel functions with various number of arguments, their types and sizes for BPF trampoline testing to cover different calling conventions. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191114185720.1641606-9-ast@kernel.org --- net/bpf/test_run.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0be4497cb832..62933279fbba 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -105,6 +105,40 @@ out: return err; } +/* Integer types of various sizes and pointer combinations cover variety of + * architecture dependent calling conventions. 7+ can be supported in the + * future. + */ +int noinline bpf_fentry_test1(int a) +{ + return a + 1; +} + +int noinline bpf_fentry_test2(int a, u64 b) +{ + return a + b; +} + +int noinline bpf_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +int noinline bpf_fentry_test4(void *a, char b, int c, u64 d) +{ + return (long)a + b + c + d; +} + +int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) +{ + return a + (long)b + c + d + e; +} + +int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) +{ + return a + (long)b + c + d + (long)e + f; +} + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { @@ -122,6 +156,13 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, kfree(data); return ERR_PTR(-EFAULT); } + if (bpf_fentry_test1(1) != 2 || + bpf_fentry_test2(2, 3) != 5 || + bpf_fentry_test3(4, 5, 6) != 15 || + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + return ERR_PTR(-EFAULT); return data; } -- cgit v1.2.3-59-g8ed1b From 9cc31b3a092d9bf2a18f09ad77e727ddb42a5b1e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:14 -0800 Subject: bpf: Fix race in btf_resolve_helper_id() btf_resolve_helper_id() caching logic is a bit racy, since under root the verifier can verify several programs in parallel. Fix it with READ/WRITE_ONCE. Fix the type as well, since error is also recorded. Fixes: a7658e1a4164 ("bpf: Check types of arguments passed into helpers") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191114185720.1641606-15-ast@kernel.org --- include/linux/bpf.h | 5 +++-- kernel/bpf/btf.c | 26 +++++++++++++++++++++++++- kernel/bpf/verifier.c | 8 +++----- net/core/filter.c | 2 +- 4 files changed, 32 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0d4c5c224d79..cb5a356381f5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -248,7 +248,7 @@ struct bpf_func_proto { }; enum bpf_arg_type arg_type[5]; }; - u32 *btf_id; /* BTF ids of arguments */ + int *btf_id; /* BTF ids of arguments */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -881,7 +881,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); -u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); +int btf_resolve_helper_id(struct bpf_verifier_log *log, + const struct bpf_func_proto *fn, int); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9e1164e5b429..033d071eb59c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3721,7 +3721,8 @@ again: return -EINVAL; } -u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) +static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, + int arg) { char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; const struct btf_param *args; @@ -3789,6 +3790,29 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) return btf_id; } +int btf_resolve_helper_id(struct bpf_verifier_log *log, + const struct bpf_func_proto *fn, int arg) +{ + int *btf_id = &fn->btf_id[arg]; + int ret; + + if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID) + return -EINVAL; + + ret = READ_ONCE(*btf_id); + if (ret) + return ret; + /* ok to race the search. The result is the same */ + ret = __btf_resolve_helper_id(log, fn->func, arg); + if (!ret) { + /* Function argument cannot be type 'void' */ + bpf_log(log, "BTF resolution bug\n"); + return -EFAULT; + } + WRITE_ONCE(*btf_id, ret); + return ret; +} + static int __get_type_size(struct btf *btf, u32 btf_id, const struct btf_type **bad_type) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8f89cfa93e88..e78ec7990767 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4147,11 +4147,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn meta.func_id = func_id; /* check args */ for (i = 0; i < 5; i++) { - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) { - if (!fn->btf_id[i]) - fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i); - meta.btf_id = fn->btf_id[i]; - } + err = btf_resolve_helper_id(&env->log, fn, i); + if (err > 0) + meta.btf_id = err; err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); if (err) return err; diff --git a/net/core/filter.c b/net/core/filter.c index fc303abec8fa..f72face90659 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3816,7 +3816,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static u32 bpf_skb_output_btf_ids[5]; +static int bpf_skb_output_btf_ids[5]; const struct bpf_func_proto bpf_skb_output_proto = { .func = bpf_skb_event_output, .gpl_only = true, -- cgit v1.2.3-59-g8ed1b From 91cc1a99740e2ed1d903b5906afb470cc5a07379 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:15 -0800 Subject: bpf: Annotate context types Annotate BPF program context types with program-side type and kernel-side type. This type information is used by the verifier. btf_get_prog_ctx_type() is used in the later patches to verify that BTF type of ctx in BPF program matches to kernel expected ctx type. For example, the XDP program type is: BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, struct xdp_md, struct xdp_buff) That means that XDP program should be written as: int xdp_prog(struct xdp_md *ctx) { ... } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191114185720.1641606-16-ast@kernel.org --- include/linux/bpf.h | 11 ++++- include/linux/bpf_types.h | 78 ++++++++++++++++++++----------- kernel/bpf/btf.c | 114 ++++++++++++++++++++++++++++++++++++++++++++-- kernel/bpf/syscall.c | 4 +- kernel/bpf/verifier.c | 2 +- net/core/filter.c | 10 ---- 6 files changed, 176 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cb5a356381f5..9c48f11fe56e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -747,7 +747,7 @@ DECLARE_PER_CPU(int, bpf_prog_active); extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ @@ -1213,6 +1213,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, #endif #ifdef CONFIG_INET +struct sk_reuseport_kern { + struct sk_buff *skb; + struct sock *sk; + struct sock *selected_sk; + void *data_end; + u32 hash; + u32 reuseport_id; + bool bind_inany; +}; bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index de14872b01ba..93740b3614d7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -2,42 +2,68 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, + struct xdp_md, struct xdp_buff) #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, + struct bpf_sock, struct sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, + struct bpf_sock_addr, struct bpf_sock_addr_kern) #endif -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) -BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops, + struct bpf_sock_ops, struct bpf_sock_ops_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg, + struct sk_msg_md, struct sk_msg) +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector, + struct __sk_buff, struct bpf_flow_dissector) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe, + bpf_user_pt_regs_t, struct pt_regs) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint, + __u64, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event, + struct bpf_perf_event_data, struct bpf_perf_event_data_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing, + void *, void *) #endif #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev, + struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, + struct bpf_sysctl, struct bpf_sysctl_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, + struct bpf_sockopt, struct bpf_sockopt_kern) #endif #ifdef CONFIG_BPF_LIRC_MODE2 -BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) +BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, + __u32, u32) #endif #ifdef CONFIG_INET -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, + struct sk_reuseport_md, struct sk_reuseport_kern) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 033d071eb59c..4b7c8bd423d6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2,6 +2,8 @@ /* Copyright (c) 2018 Facebook */ #include +#include +#include #include #include #include @@ -16,6 +18,9 @@ #include #include #include +#include +#include +#include /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus @@ -3439,13 +3444,98 @@ errout: extern char __weak _binary__btf_vmlinux_bin_start[]; extern char __weak _binary__btf_vmlinux_bin_end[]; +extern struct btf *btf_vmlinux; + +#define BPF_MAP_TYPE(_id, _ops) +static union { + struct bpf_ctx_convert { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + prog_ctx_type _id##_prog; \ + kern_ctx_type _id##_kern; +#include +#undef BPF_PROG_TYPE + } *__t; + /* 't' is written once under lock. Read many times. */ + const struct btf_type *t; +} bpf_ctx_convert; +enum { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + __ctx_convert##_id, +#include +#undef BPF_PROG_TYPE +}; +static u8 bpf_ctx_convert_map[] = { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ + [_id] = __ctx_convert##_id, +#include +#undef BPF_PROG_TYPE +}; +#undef BPF_MAP_TYPE + +static const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type) +{ + const struct btf_type *conv_struct; + const struct btf_type *ctx_struct; + const struct btf_member *ctx_type; + const char *tname, *ctx_tname; + + conv_struct = bpf_ctx_convert.t; + if (!conv_struct) { + bpf_log(log, "btf_vmlinux is malformed\n"); + return NULL; + } + t = btf_type_by_id(btf, t->type); + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_struct(t)) { + /* Only pointer to struct is supported for now. + * That means that BPF_PROG_TYPE_TRACEPOINT with BTF + * is not supported yet. + * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. + */ + bpf_log(log, "BPF program ctx type is not a struct\n"); + return NULL; + } + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) { + bpf_log(log, "BPF program ctx struct doesn't have a name\n"); + return NULL; + } + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; + /* ctx_struct is a pointer to prog_ctx_type in vmlinux. + * Like 'struct __sk_buff' + */ + ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); + if (!ctx_struct) + /* should not happen */ + return NULL; + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); + if (!ctx_tname) { + /* should not happen */ + bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); + return NULL; + } + /* only compare that prog's ctx type name is the same as + * kernel expects. No need to compare field by field. + * It's ok for bpf prog to do: + * struct __sk_buff {}; + * int socket_filter_bpf_prog(struct __sk_buff *skb) + * { // no fields of skb are ever used } + */ + if (strcmp(ctx_tname, tname)) + return NULL; + return ctx_type; +} struct btf *btf_parse_vmlinux(void) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; - int err; + int err, i; env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) @@ -3479,6 +3569,26 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; + /* find struct bpf_ctx_convert for type checking later */ + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t; + const char *tname; + + t = btf_type_by_id(btf, i); + if (!__btf_type_is_struct(t)) + continue; + tname = __btf_name_by_offset(btf, t->name_off); + if (!strcmp(tname, "bpf_ctx_convert")) { + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = t; + break; + } + } + if (i > btf->nr_types) { + err = -ENOENT; + goto errout; + } + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; @@ -3492,8 +3602,6 @@ errout: return ERR_PTR(err); } -extern struct btf *btf_vmlinux; - bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e2e37bea86bc..05a0ee75eca0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(map_idr_lock); int sysctl_unprivileged_bpf_disabled __read_mostly; static const struct bpf_map_ops * const bpf_map_types[] = { -#define BPF_PROG_TYPE(_id, _ops) +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) #define BPF_MAP_TYPE(_id, _ops) \ [_id] = &_ops, #include @@ -1189,7 +1189,7 @@ err_put: } static const struct bpf_prog_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _prog_ops, #define BPF_MAP_TYPE(_id, _ops) #include diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e78ec7990767..7395d6bebefd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23,7 +23,7 @@ #include "disasm.h" static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) #include diff --git a/net/core/filter.c b/net/core/filter.c index f72face90659..49ded4a7588a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8684,16 +8684,6 @@ out: } #ifdef CONFIG_INET -struct sk_reuseport_kern { - struct sk_buff *skb; - struct sock *sk; - struct sock *selected_sk; - void *data_end; - u32 hash; - u32 reuseport_id; - bool bind_inany; -}; - static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, struct sock_reuseport *reuse, struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3-59-g8ed1b From 1e0bd5a091e5d9e0f1d5b0e6329b87bb1792f784 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 17 Nov 2019 09:28:02 -0800 Subject: bpf: Switch bpf_map ref counter to atomic64_t so bpf_map_inc() never fails 92117d8443bc ("bpf: fix refcnt overflow") turned refcounting of bpf_map into potentially failing operation, when refcount reaches BPF_MAX_REFCNT limit (32k). Due to using 32-bit counter, it's possible in practice to overflow refcounter and make it wrap around to 0, causing erroneous map free, while there are still references to it, causing use-after-free problems. But having a failing refcounting operations are problematic in some cases. One example is mmap() interface. After establishing initial memory-mapping, user is allowed to arbitrarily map/remap/unmap parts of mapped memory, arbitrarily splitting it into multiple non-contiguous regions. All this happening without any control from the users of mmap subsystem. Rather mmap subsystem sends notifications to original creator of memory mapping through open/close callbacks, which are optionally specified during initial memory mapping creation. These callbacks are used to maintain accurate refcount for bpf_map (see next patch in this series). The problem is that open() callback is not supposed to fail, because memory-mapped resource is set up and properly referenced. This is posing a problem for using memory-mapping with BPF maps. One solution to this is to maintain separate refcount for just memory-mappings and do single bpf_map_inc/bpf_map_put when it goes from/to zero, respectively. There are similar use cases in current work on tcp-bpf, necessitating extra counter as well. This seems like a rather unfortunate and ugly solution that doesn't scale well to various new use cases. Another approach to solve this is to use non-failing refcount_t type, which uses 32-bit counter internally, but, once reaching overflow state at UINT_MAX, stays there. This utlimately causes memory leak, but prevents use after free. But given refcounting is not the most performance-critical operation with BPF maps (it's not used from running BPF program code), we can also just switch to 64-bit counter that can't overflow in practice, potentially disadvantaging 32-bit platforms a tiny bit. This simplifies semantics and allows above described scenarios to not worry about failing refcount increment operation. In terms of struct bpf_map size, we are still good and use the same amount of space: BEFORE (3 cache lines, 8 bytes of padding at the end): struct bpf_map { const struct bpf_map_ops * ops __attribute__((__aligned__(64))); /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ int spin_lock_off; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ u32 btf_key_type_id; /* 56 4 */ u32 btf_value_type_id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct btf * btf; /* 64 8 */ struct bpf_map_memory memory; /* 72 16 */ bool unpriv_array; /* 88 1 */ bool frozen; /* 89 1 */ /* XXX 38 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ atomic_t refcnt __attribute__((__aligned__(64))); /* 128 4 */ atomic_t usercnt; /* 132 4 */ struct work_struct work; /* 136 32 */ char name[16]; /* 168 16 */ /* size: 192, cachelines: 3, members: 21 */ /* sum members: 146, holes: 1, sum holes: 38 */ /* padding: 8 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 38 */ } __attribute__((__aligned__(64))); AFTER (same 3 cache lines, no extra padding now): struct bpf_map { const struct bpf_map_ops * ops __attribute__((__aligned__(64))); /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ int spin_lock_off; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ u32 btf_key_type_id; /* 56 4 */ u32 btf_value_type_id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct btf * btf; /* 64 8 */ struct bpf_map_memory memory; /* 72 16 */ bool unpriv_array; /* 88 1 */ bool frozen; /* 89 1 */ /* XXX 38 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ atomic64_t refcnt __attribute__((__aligned__(64))); /* 128 8 */ atomic64_t usercnt; /* 136 8 */ struct work_struct work; /* 144 32 */ char name[16]; /* 176 16 */ /* size: 192, cachelines: 3, members: 21 */ /* sum members: 154, holes: 1, sum holes: 38 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 38 */ } __attribute__((__aligned__(64))); This patch, while modifying all users of bpf_map_inc, also cleans up its interface to match bpf_map_put with separate operations for bpf_map_inc and bpf_map_inc_with_uref (to match bpf_map_put and bpf_map_put_with_uref, respectively). Also, given there are no users of bpf_map_inc_not_zero specifying uref=true, remove uref flag and default to uref=false internally. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191117172806.2195367-2-andriin@fb.com --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 +- include/linux/bpf.h | 10 ++--- kernel/bpf/inode.c | 2 +- kernel/bpf/map_in_map.c | 2 +- kernel/bpf/syscall.c | 51 ++++++++++-------------- kernel/bpf/verifier.c | 6 +-- kernel/bpf/xskmap.c | 6 +-- net/core/bpf_sk_storage.c | 2 +- 8 files changed, 34 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 88fab6a82acf..06927ba5a3ae 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -46,9 +46,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, /* Grab a single ref to the map for our record. The prog destroy ndo * happens after free_used_maps(). */ - map = bpf_map_inc(map, false); - if (IS_ERR(map)) - return PTR_ERR(map); + bpf_map_inc(map); record = kmalloc(sizeof(*record), GFP_KERNEL); if (!record) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b81cde47314..34a34445c009 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,8 +103,8 @@ struct bpf_map { /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ - atomic_t refcnt ____cacheline_aligned; - atomic_t usercnt; + atomic64_t refcnt ____cacheline_aligned; + atomic64_t usercnt; struct work_struct work; char name[BPF_OBJ_NAME_LEN]; }; @@ -783,9 +783,9 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); -struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, - bool uref); +void bpf_map_inc(struct bpf_map *map); +void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index a70f7209cda3..2f17f24258dc 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type) raw = bpf_prog_inc(raw); break; case BPF_TYPE_MAP: - raw = bpf_map_inc(raw, true); + bpf_map_inc_with_uref(raw); break; default: WARN_ON_ONCE(1); diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index fab4fb134547..4cbe987be35b 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -98,7 +98,7 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map, return inner_map; if (bpf_map_meta_equal(map->inner_map_meta, inner_map)) - inner_map = bpf_map_inc(inner_map, false); + bpf_map_inc(inner_map); else inner_map = ERR_PTR(-EINVAL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c88c815c2154..20030751b7a2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -311,7 +311,7 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { - if (atomic_dec_and_test(&map->usercnt)) { + if (atomic64_dec_and_test(&map->usercnt)) { if (map->ops->map_release_uref) map->ops->map_release_uref(map); } @@ -322,7 +322,7 @@ static void bpf_map_put_uref(struct bpf_map *map) */ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) { - if (atomic_dec_and_test(&map->refcnt)) { + if (atomic64_dec_and_test(&map->refcnt)) { /* bpf_map_free_id() must be called first */ bpf_map_free_id(map, do_idr_lock); btf_put(map->btf); @@ -575,8 +575,8 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map; - atomic_set(&map->refcnt, 1); - atomic_set(&map->usercnt, 1); + atomic64_set(&map->refcnt, 1); + atomic64_set(&map->usercnt, 1); if (attr->btf_key_type_id || attr->btf_value_type_id) { struct btf *btf; @@ -653,21 +653,19 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -/* prog's and map's refcnt limit */ -#define BPF_MAX_REFCNT 32768 - -struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) +void bpf_map_inc(struct bpf_map *map) { - if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { - atomic_dec(&map->refcnt); - return ERR_PTR(-EBUSY); - } - if (uref) - atomic_inc(&map->usercnt); - return map; + atomic64_inc(&map->refcnt); } EXPORT_SYMBOL_GPL(bpf_map_inc); +void bpf_map_inc_with_uref(struct bpf_map *map) +{ + atomic64_inc(&map->refcnt); + atomic64_inc(&map->usercnt); +} +EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); + struct bpf_map *bpf_map_get_with_uref(u32 ufd) { struct fd f = fdget(ufd); @@ -677,38 +675,30 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) if (IS_ERR(map)) return map; - map = bpf_map_inc(map, true); + bpf_map_inc_with_uref(map); fdput(f); return map; } /* map_idr_lock should have been held */ -static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, - bool uref) +static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) { int refold; - refold = atomic_fetch_add_unless(&map->refcnt, 1, 0); - - if (refold >= BPF_MAX_REFCNT) { - __bpf_map_put(map, false); - return ERR_PTR(-EBUSY); - } - + refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); if (!refold) return ERR_PTR(-ENOENT); - if (uref) - atomic_inc(&map->usercnt); + atomic64_inc(&map->usercnt); return map; } -struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) { spin_lock_bh(&map_idr_lock); - map = __bpf_map_inc_not_zero(map, uref); + map = __bpf_map_inc_not_zero(map, false); spin_unlock_bh(&map_idr_lock); return map; @@ -1455,6 +1445,9 @@ static struct bpf_prog *____bpf_prog_get(struct fd f) return f.file->private_data; } +/* prog's refcnt limit */ +#define BPF_MAX_REFCNT 32768 + struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) { if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e9dc95a18d44..9f59f7a19dd0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8179,11 +8179,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) * will be used by the valid program until it's unloaded * and all maps are released in free_used_maps() */ - map = bpf_map_inc(map, false); - if (IS_ERR(map)) { - fdput(f); - return PTR_ERR(map); - } + bpf_map_inc(map); aux->map_index = env->used_map_cnt; env->used_maps[env->used_map_cnt++] = map; diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index da16c30868f3..90c4fce1c981 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -11,10 +11,8 @@ int xsk_map_inc(struct xsk_map *map) { - struct bpf_map *m = &map->map; - - m = bpf_map_inc(m, false); - return PTR_ERR_OR_ZERO(m); + bpf_map_inc(&map->map); + return 0; } void xsk_map_put(struct xsk_map *map) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index da5639a5bd3b..458be6b3eda9 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -798,7 +798,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) * Try to grab map refcnt to make sure that it's still * alive and prevent concurrent removal. */ - map = bpf_map_inc_not_zero(&smap->map, false); + map = bpf_map_inc_not_zero(&smap->map); if (IS_ERR(map)) continue; -- cgit v1.2.3-59-g8ed1b From a25ecd9d1e60241df905b29fb84765eb74545c4f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Nov 2019 11:40:59 +0000 Subject: bpf: Fix memory leak on object 'data' The error return path on when bpf_fentry_test* tests fail does not kfree 'data'. Fix this by adding the missing kfree. Addresses-Coverity: ("Resource leak") Fixes: faeb2dce084a ("bpf: Add kernel test functions for fentry testing") Signed-off-by: Colin Ian King Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191118114059.37287-1-colin.king@canonical.com --- net/bpf/test_run.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 62933279fbba..915c2d6f7fb9 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -161,8 +161,10 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, bpf_fentry_test3(4, 5, 6) != 15 || bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { + kfree(data); return ERR_PTR(-EFAULT); + } return data; } -- cgit v1.2.3-59-g8ed1b