From e647815a4d3b3be9d85b5750ed0f2947fd78fac7 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 8 Nov 2018 04:08:42 -0500 Subject: bpf: let verifier to calculate and record max_pkt_offset In check_packet_access, update max_pkt_offset after the offset has passed __check_packet_access. It should be safe to use u32 for max_pkt_offset as explained in code comment. Also, when there is tail call, the max_pkt_offset of the called program is unknown, so conservatively set max_pkt_offset to MAX_PACKET_OFF for such case. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 33014ae73103..b6a296e01f6a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -293,6 +293,7 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + u32 max_pkt_offset; u32 stack_depth; u32 id; u32 func_cnt; -- cgit v1.2.3-59-g8ed1b From c8123ead13a5c92dc5fd15c0fdfe88eef41e6ac1 Mon Sep 17 00:00:00 2001 From: Nitin Hande Date: Sun, 28 Oct 2018 21:02:45 -0700 Subject: bpf: Extend the sk_lookup() helper to XDP hookpoint. This patch proposes to extend the sk_lookup() BPF API to the XDP hookpoint. The sk_lookup() helper supports a lookup on incoming packet to find the corresponding socket that will receive this packet. Current support for this BPF API is at the tc hookpoint. This patch will extend this API at XDP hookpoint. A XDP program can map the incoming packet to the 5-tuple parameter and invoke the API to find the corresponding socket structure. Signed-off-by: Nitin Hande Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++ net/core/filter.c | 105 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 90 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..47d606d744cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2201,6 +2201,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description @@ -2233,6 +2235,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description diff --git a/net/core/filter.c b/net/core/filter.c index ba97a6bee6f9..53d50fb75ea1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4845,38 +4845,32 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { #ifdef CONFIG_INET static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, - struct sk_buff *skb, u8 family, u8 proto) + int dif, int sdif, u8 family, u8 proto) { bool refcounted = false; struct sock *sk = NULL; - int dif = 0; - - if (skb->dev) - dif = skb->dev->ifindex; if (family == AF_INET) { __be32 src4 = tuple->ipv4.saddr; __be32 dst4 = tuple->ipv4.daddr; - int sdif = inet_sdif(skb); if (proto == IPPROTO_TCP) - sk = __inet_lookup(net, &tcp_hashinfo, skb, 0, + sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, dif, sdif, &refcounted); else sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, - dif, sdif, &udp_table, skb); + dif, sdif, &udp_table, NULL); #if IS_ENABLED(CONFIG_IPV6) } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; u16 hnum = ntohs(tuple->ipv6.dport); - int sdif = inet6_sdif(skb); if (proto == IPPROTO_TCP) - sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, + sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0, src6, tuple->ipv6.sport, dst6, hnum, dif, sdif, &refcounted); @@ -4885,7 +4879,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, hnum, dif, sdif, - &udp_table, skb); + &udp_table, NULL); #endif } @@ -4902,31 +4896,33 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, * callers to satisfy BPF_CALL declarations. */ static unsigned long -bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - u8 proto, u64 netns_id, u64 flags) +__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) { - struct net *caller_net; struct sock *sk = NULL; u8 family = AF_UNSPEC; struct net *net; + int sdif; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) goto out; - if (skb->dev) - caller_net = dev_net(skb->dev); + if (family == AF_INET) + sdif = inet_sdif(skb); else - caller_net = sock_net(skb->sk); + sdif = inet6_sdif(skb); + if (netns_id) { net = get_net_ns_by_id(caller_net, netns_id); if (unlikely(!net)) goto out; - sk = sk_lookup(net, tuple, skb, family, proto); + sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); put_net(net); } else { net = caller_net; - sk = sk_lookup(net, tuple, skb, family, proto); + sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); } if (sk) @@ -4935,6 +4931,25 @@ out: return (unsigned long) sk; } +static unsigned long +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct net *caller_net; + int ifindex; + + if (skb->dev) { + caller_net = dev_net(skb->dev); + ifindex = skb->dev->ifindex; + } else { + caller_net = sock_net(skb->sk); + ifindex = 0; + } + + return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex, + proto, netns_id, flags); +} + BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -4984,6 +4999,50 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_SOCKET, }; + +BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + + return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, + IPPROTO_UDP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { + .func = bpf_xdp_sk_lookup_udp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + + return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, + IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { + .func = bpf_xdp_sk_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5234,6 +5293,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; +#ifdef CONFIG_INET + case BPF_FUNC_sk_lookup_udp: + return &bpf_xdp_sk_lookup_udp_proto; + case BPF_FUNC_sk_lookup_tcp: + return &bpf_xdp_sk_lookup_tcp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; +#endif default: return bpf_base_func_proto(func_id); } -- cgit v1.2.3-59-g8ed1b From 1385d755cfb42f596ef1cf9f5c761010ff3b34e7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:25 +0000 Subject: bpf: pass a struct with offload callbacks to bpf_offload_dev_create() For passing device functions for offloaded eBPF programs, there used to be no place where to store the pointer without making the non-offloaded programs pay a memory price. As a consequence, three functions were called with ndo_bpf() through specific commands. Now that we have struct bpf_offload_dev, and since none of those operations rely on RTNL, we can turn these three commands into hooks inside the struct bpf_prog_offload_ops, and pass them as part of bpf_offload_dev_create(). This commit effectively passes a pointer to the struct to bpf_offload_dev_create(). We temporarily have two struct bpf_prog_offload_ops instances, one under offdev->ops and one under offload->dev_ops. The next patches will make the transition towards the former, so that offload->dev_ops can be removed, and callbacks relying on ndo_bpf() added to offdev->ops as well. While at it, rename "nfp_bpf_analyzer_ops" as "nfp_bpf_dev_ops" (and similarly for netdevsim). Suggested-by: Jakub Kicinski Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 +- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 +- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 ++-- drivers/net/netdevsim/bpf.c | 6 +++--- include/linux/bpf.h | 3 ++- kernel/bpf/offload.c | 5 ++++- 6 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 6243af0ab025..dccae0319204 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app) app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf); } - bpf->bpf_dev = bpf_offload_dev_create(); + bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops); err = PTR_ERR_OR_ZERO(bpf->bpf_dev); if (err) goto err_free_neutral_maps; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index abdd93d14439..941277936475 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -513,7 +513,7 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int nfp_bpf_finalize(struct bpf_verifier_env *env); -extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; +extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops; struct netdev_bpf; struct nfp_app; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index dc548bb4089e..2fca996a7e77 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -209,7 +209,7 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, goto err_free; nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); - bpf->verifier.ops = &nfp_bpf_analyzer_ops; + bpf->verifier.ops = &nfp_bpf_dev_ops; return 0; @@ -602,7 +602,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, return 0; } -const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = { +const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index cb3518474f0e..135aee864162 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -91,7 +91,7 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env) return 0; } -static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = { +static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, }; @@ -547,7 +547,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) if (err) return err; - bpf->verifier.ops = &nsim_bpf_analyzer_ops; + bpf->verifier.ops = &nsim_bpf_dev_ops; return 0; case BPF_OFFLOAD_TRANSLATE: state = bpf->offload.prog->aux->offload->dev_priv; @@ -599,7 +599,7 @@ int nsim_bpf_init(struct netdevsim *ns) if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs)) return -ENOMEM; - ns->sdev->bpf_dev = bpf_offload_dev_create(); + ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops); err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev); if (err) return err; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b6a296e01f6a..c0197c37b2b2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -692,7 +692,8 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); -struct bpf_offload_dev *bpf_offload_dev_create(void); +struct bpf_offload_dev * +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8e93c47f0779..d513fbf9ca53 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -33,6 +33,7 @@ static DECLARE_RWSEM(bpf_devs_lock); struct bpf_offload_dev { + const struct bpf_prog_offload_ops *ops; struct list_head netdevs; }; @@ -655,7 +656,8 @@ unlock: } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); -struct bpf_offload_dev *bpf_offload_dev_create(void) +struct bpf_offload_dev * +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) { struct bpf_offload_dev *offdev; int err; @@ -673,6 +675,7 @@ struct bpf_offload_dev *bpf_offload_dev_create(void) if (!offdev) return ERR_PTR(-ENOMEM); + offdev->ops = ops; INIT_LIST_HEAD(&offdev->netdevs); return offdev; -- cgit v1.2.3-59-g8ed1b From 341b3e7b7b89315c43d262da3199098bcf9bbe57 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:26 +0000 Subject: bpf: call verify_insn from its callback in struct bpf_offload_dev We intend to remove the dev_ops in struct bpf_prog_offload, and to only keep the ops in struct bpf_offload_dev instead, which is accessible from more locations for passing function pointers. But dev_ops is used for calling the verify_insn hook. Switch to the newly added ops in struct bpf_prog_offload instead. To avoid table lookups for each eBPF instruction to verify, we remember the offdev attached to a netdev and modify bpf_offload_find_netdev() to avoid performing more than once a lookup for a given offload object. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/offload.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c0197c37b2b2..672714cd904f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,6 +273,7 @@ struct bpf_prog_offload_ops { struct bpf_prog_offload { struct bpf_prog *prog; struct net_device *netdev; + struct bpf_offload_dev *offdev; void *dev_priv; struct list_head offloads; bool dev_state; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index d513fbf9ca53..2cd3c0d0417b 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -107,6 +107,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) err = -EINVAL; goto err_unlock; } + offload->offdev = ondev->offdev; prog->aux->offload = offload; list_add_tail(&offload->offloads, &ondev->progs); dev_put(offload->netdev); @@ -167,7 +168,8 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) - ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); + ret = offload->offdev->ops->insn_hook(env, insn_idx, + prev_insn_idx); up_read(&bpf_devs_lock); return ret; -- cgit v1.2.3-59-g8ed1b From 00db12c3d141356a4d1e6b6f688e0d5ed3b1f757 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:28 +0000 Subject: bpf: call verifier_prep from its callback in struct bpf_offload_dev In a way similar to the change previously brought to the verify_insn hook and to the finalize callback, switch to the newly added ops in struct bpf_prog_offload for calling the functions used to prepare driver verifiers. Since the dev_ops pointer in struct bpf_prog_offload is no longer used by any callback, we can now remove it from struct bpf_prog_offload. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 ++++---- drivers/net/netdevsim/bpf.c | 32 +++++++++++++----------- include/linux/bpf.h | 2 +- include/linux/netdevice.h | 6 ----- kernel/bpf/offload.c | 22 +++++++--------- 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 2fca996a7e77..16a3a9c55852 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -188,10 +188,11 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) } static int -nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) +nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env) { - struct bpf_prog *prog = bpf->verifier.prog; + struct nfp_net *nn = netdev_priv(netdev); + struct bpf_prog *prog = env->prog; + struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -209,7 +210,6 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, goto err_free; nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); - bpf->verifier.ops = &nfp_bpf_dev_ops; return 0; @@ -422,8 +422,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_bpf_verifier_prep(app, nn, bpf); case BPF_OFFLOAD_TRANSLATE: return nfp_bpf_translate(nn, bpf->offload.prog); case BPF_OFFLOAD_DESTROY: @@ -605,4 +603,5 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, + .prepare = nfp_bpf_verifier_prep, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 135aee864162..d045b7d666d9 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -91,11 +91,6 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env) return 0; } -static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { - .insn_hook = nsim_bpf_verify_insn, - .finalize = nsim_bpf_finalize, -}; - static bool nsim_xdp_offload_active(struct netdevsim *ns) { return ns->xdp_hw.prog; @@ -263,6 +258,17 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) return 0; } +static int +nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (!ns->bpf_bind_accept) + return -EOPNOTSUPP; + + return nsim_bpf_create_prog(ns, env->prog); +} + static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; @@ -275,6 +281,12 @@ static void nsim_bpf_destroy_prog(struct bpf_prog *prog) kfree(state); } +static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { + .insn_hook = nsim_bpf_verify_insn, + .finalize = nsim_bpf_finalize, + .prepare = nsim_bpf_verifier_prep, +}; + static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { if (bpf->prog && bpf->prog->aux->offload) { @@ -539,16 +551,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_VERIFIER_PREP: - if (!ns->bpf_bind_accept) - return -EOPNOTSUPP; - - err = nsim_bpf_create_prog(ns, bpf->verifier.prog); - if (err) - return err; - - bpf->verifier.ops = &nsim_bpf_dev_ops; - return 0; case BPF_OFFLOAD_TRANSLATE: state = bpf->offload.prog->aux->offload->dev_priv; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 672714cd904f..f250494a4f56 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,6 +268,7 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); }; struct bpf_prog_offload { @@ -277,7 +278,6 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; - const struct bpf_prog_offload_ops *dev_ops; void *jited_image; u32 jited_len; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 857f8abf7b91..0fa2c2744928 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_VERIFIER_PREP, BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, @@ -891,11 +890,6 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_VERIFIER_PREP */ - struct { - struct bpf_prog *prog; - const struct bpf_prog_offload_ops *ops; /* callee set */ - } verifier; /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ struct { struct bpf_prog *prog; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 2c88cb4ddfd8..1f7ac00a494d 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -142,21 +142,17 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { - struct netdev_bpf data = {}; - int err; - - data.verifier.prog = env->prog; + struct bpf_prog_offload *offload; + int ret = -ENODEV; - rtnl_lock(); - err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data); - if (err) - goto exit_unlock; + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) + ret = offload->offdev->ops->prepare(offload->netdev, env); + offload->dev_state = !ret; + up_read(&bpf_devs_lock); - env->prog->aux->offload->dev_ops = data.verifier.ops; - env->prog->aux->offload->dev_state = true; -exit_unlock: - rtnl_unlock(); - return err; + return ret; } int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, -- cgit v1.2.3-59-g8ed1b From b07ade27e93360197e453e5ca80eebdc9099dcb5 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:29 +0000 Subject: bpf: pass translate() as a callback and remove its ndo_bpf subcommand As part of the transition from ndo_bpf() to callbacks attached to struct bpf_offload_dev for some of the eBPF offload operations, move the functions related to code translation to the struct and remove the subcommand that was used to call them through the NDO. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 +++-------- drivers/net/netdevsim/bpf.c | 14 +++++++++----- include/linux/bpf.h | 1 + include/linux/netdevice.h | 3 +-- kernel/bpf/offload.c | 14 +++++++------- 5 files changed, 21 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 16a3a9c55852..8653a2189c19 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -33,9 +33,6 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, struct nfp_bpf_neutral_map *record; int err; - /* Map record paths are entered via ndo, update side is protected. */ - ASSERT_RTNL(); - /* Reuse path - other offloaded program is already tracking this map. */ record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, nfp_bpf_maps_neutral_params); @@ -84,8 +81,6 @@ nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) bool freed = false; int i; - ASSERT_RTNL(); - for (i = 0; i < nfp_prog->map_records_cnt; i++) { if (--nfp_prog->map_records[i]->count) { nfp_prog->map_records[i] = NULL; @@ -219,9 +214,10 @@ err_free: return ret; } -static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) +static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + struct nfp_net *nn = netdev_priv(netdev); unsigned int max_instr; int err; @@ -422,8 +418,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_TRANSLATE: - return nfp_bpf_translate(nn, bpf->offload.prog); case BPF_OFFLOAD_DESTROY: return nfp_bpf_destroy(nn, bpf->offload.prog); case BPF_OFFLOAD_MAP_ALLOC: @@ -604,4 +598,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, .prepare = nfp_bpf_verifier_prep, + .translate = nfp_bpf_translate, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index d045b7d666d9..30c2cd516d1c 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -269,6 +269,14 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) return nsim_bpf_create_prog(ns, env->prog); } +static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) +{ + struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; + + state->state = "xlated"; + return 0; +} + static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; @@ -285,6 +293,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, + .translate = nsim_bpf_translate, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) @@ -551,11 +560,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_TRANSLATE: - state = bpf->offload.prog->aux->offload->dev_priv; - - state->state = "xlated"; - return 0; case BPF_OFFLOAD_DESTROY: nsim_bpf_destroy_prog(bpf->offload.prog); return 0; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f250494a4f56..d1eb3c8a3fa9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -269,6 +269,7 @@ struct bpf_prog_offload_ops { int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); + int (*translate)(struct net_device *netdev, struct bpf_prog *prog); }; struct bpf_prog_offload { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0fa2c2744928..27499127e038 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, @@ -890,7 +889,7 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ + /* BPF_OFFLOAD_DESTROY */ struct { struct bpf_prog *prog; } offload; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 1f7ac00a494d..ae0167366c12 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -219,14 +219,14 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog) static int bpf_prog_offload_translate(struct bpf_prog *prog) { - struct netdev_bpf data = {}; - int ret; - - data.offload.prog = prog; + struct bpf_prog_offload *offload; + int ret = -ENODEV; - rtnl_lock(); - ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); - rtnl_unlock(); + down_read(&bpf_devs_lock); + offload = prog->aux->offload; + if (offload) + ret = offload->offdev->ops->translate(offload->netdev, prog); + up_read(&bpf_devs_lock); return ret; } -- cgit v1.2.3-59-g8ed1b From eb9119471efbf730c8f830f706026b486eb701dd Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:30 +0000 Subject: bpf: pass destroy() as a callback and remove its ndo_bpf subcommand As part of the transition from ndo_bpf() to callbacks attached to struct bpf_offload_dev for some of the eBPF offload operations, move the functions related to program destruction to the struct and remove the subcommand that was used to call them through the NDO. Remove function __bpf_offload_ndo(), which is no longer used. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 7 ++----- drivers/net/netdevsim/bpf.c | 4 +--- include/linux/bpf.h | 1 + include/linux/netdevice.h | 5 ----- kernel/bpf/offload.c | 24 +----------------------- 5 files changed, 5 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 8653a2189c19..91085cc3c843 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -238,15 +238,13 @@ static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); } -static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) +static void nfp_bpf_destroy(struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; kvfree(nfp_prog->prog); nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); nfp_prog_free(nfp_prog); - - return 0; } /* Atomic engine requires values to be in big endian, we need to byte swap @@ -418,8 +416,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_DESTROY: - return nfp_bpf_destroy(nn, bpf->offload.prog); case BPF_OFFLOAD_MAP_ALLOC: return nfp_bpf_map_alloc(app->priv, bpf->offmap); case BPF_OFFLOAD_MAP_FREE: @@ -599,4 +595,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .finalize = nfp_bpf_finalize, .prepare = nfp_bpf_verifier_prep, .translate = nfp_bpf_translate, + .destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 30c2cd516d1c..33e3d54c3a0a 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -294,6 +294,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, .translate = nsim_bpf_translate, + .destroy = nsim_bpf_destroy_prog, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) @@ -560,9 +561,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_DESTROY: - nsim_bpf_destroy_prog(bpf->offload.prog); - return 0; case XDP_QUERY_PROG: return xdp_attachment_query(&ns->xdp, bpf); case XDP_QUERY_PROG_HW: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d1eb3c8a3fa9..867d2801db64 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -270,6 +270,7 @@ struct bpf_prog_offload_ops { int (*finalize)(struct bpf_verifier_env *env); int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); int (*translate)(struct net_device *netdev, struct bpf_prog *prog); + void (*destroy)(struct bpf_prog *prog); }; struct bpf_prog_offload { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 27499127e038..17d52a647fe5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, XDP_QUERY_XSK_UMEM, @@ -889,10 +888,6 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_DESTROY */ - struct { - struct bpf_prog *prog; - } offload; /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */ struct { struct bpf_offloaded_map *offmap; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index ae0167366c12..d665e75a0ac3 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -123,23 +123,6 @@ err_maybe_put: return err; } -static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, - struct netdev_bpf *data) -{ - struct bpf_prog_offload *offload = prog->aux->offload; - struct net_device *netdev; - - ASSERT_RTNL(); - - if (!offload) - return -ENODEV; - netdev = offload->netdev; - - data->command = cmd; - - return netdev->netdev_ops->ndo_bpf(netdev, data); -} - int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { struct bpf_prog_offload *offload; @@ -192,12 +175,9 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; - struct netdev_bpf data = {}; - - data.offload.prog = prog; if (offload->dev_state) - WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); + offload->offdev->ops->destroy(prog); /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ bpf_prog_free_id(prog, true); @@ -209,12 +189,10 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) void bpf_prog_offload_destroy(struct bpf_prog *prog) { - rtnl_lock(); down_write(&bpf_devs_lock); if (prog->aux->offload) __bpf_prog_offload_destroy(prog); up_write(&bpf_devs_lock); - rtnl_unlock(); } static int bpf_prog_offload_translate(struct bpf_prog *prog) -- cgit v1.2.3-59-g8ed1b From a40a26322a83d4a26a99ad2616cbd77394c19587 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:31 +0000 Subject: bpf: pass prog instead of env to bpf_prog_offload_verifier_prep() Function bpf_prog_offload_verifier_prep(), called from the kernel BPF verifier to run a driver-specific callback for preparing for the verification step for offloaded programs, takes a pointer to a struct bpf_verifier_env object. However, no driver callback needs the whole structure at this time: the two drivers supporting this, nfp and netdevsim, only need a pointer to the struct bpf_prog instance held by env. Update the callback accordingly, on kernel side and in these two drivers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 +-- drivers/net/netdevsim/bpf.c | 4 ++-- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 2 +- kernel/bpf/offload.c | 6 +++--- kernel/bpf/verifier.c | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 91085cc3c843..e6b26d2f651d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -183,10 +183,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) } static int -nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env) +nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog) { struct nfp_net *nn = netdev_priv(netdev); - struct bpf_prog *prog = env->prog; struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 33e3d54c3a0a..560bdaf1c98b 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -259,14 +259,14 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) } static int -nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) +nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) { struct netdevsim *ns = netdev_priv(dev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; - return nsim_bpf_create_prog(ns, env->prog); + return nsim_bpf_create_prog(ns, prog); } static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 867d2801db64..888111350d0e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,7 +268,7 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); - int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); + int (*prepare)(struct net_device *netdev, struct bpf_prog *prog); int (*translate)(struct net_device *netdev, struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d93e89761a8b..11f5df1092d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -245,7 +245,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return cur_func(env)->regs; } -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); +int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index d665e75a0ac3..397d206e184b 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -123,15 +123,15 @@ err_maybe_put: return err; } -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +int bpf_prog_offload_verifier_prep(struct bpf_prog *prog) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); - offload = env->prog->aux->offload; + offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->prepare(offload->netdev, env); + ret = offload->offdev->ops->prepare(offload->netdev, prog); offload->dev_state = !ret; up_read(&bpf_devs_lock); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 75dab40b19a3..8d0977980cfa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6368,7 +6368,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; if (bpf_prog_is_dev_bound(env->prog->aux)) { - ret = bpf_prog_offload_verifier_prep(env); + ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) goto skip_full_check; } -- cgit v1.2.3-59-g8ed1b From 16a8cb5cffd0a2929ae97bc258d2d9c92a4e7f6d Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:32 +0000 Subject: bpf: do not pass netdev to translate() and prepare() offload callbacks The kernel functions to prepare verifier and translate for offloaded program retrieve "offload" from "prog", and "netdev" from "offload". Then both "prog" and "netdev" are passed to the callbacks. Simplify this by letting the drivers retrieve the net device themselves from the offload object attached to prog - if they need it at all. There is currently no need to pass the netdev as an argument to those functions. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 9 ++++----- drivers/net/netdevsim/bpf.c | 7 +++---- include/linux/bpf.h | 4 ++-- kernel/bpf/offload.c | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index e6b26d2f651d..f0283854fade 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -182,10 +182,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -static int -nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog) +static int nfp_bpf_verifier_prep(struct bpf_prog *prog) { - struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -213,10 +212,10 @@ err_free: return ret; } -static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) +static int nfp_bpf_translate(struct bpf_prog *prog) { + struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - struct nfp_net *nn = netdev_priv(netdev); unsigned int max_instr; int err; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 560bdaf1c98b..6a5b7bd9a1f9 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -258,10 +258,9 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) return 0; } -static int -nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) +static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { - struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; @@ -269,7 +268,7 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) return nsim_bpf_create_prog(ns, prog); } -static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) +static int nsim_bpf_translate(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 888111350d0e..987815152629 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,8 +268,8 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); - int (*prepare)(struct net_device *netdev, struct bpf_prog *prog); - int (*translate)(struct net_device *netdev, struct bpf_prog *prog); + int (*prepare)(struct bpf_prog *prog); + int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); }; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 397d206e184b..52c5617e3716 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -131,7 +131,7 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog) down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->prepare(offload->netdev, prog); + ret = offload->offdev->ops->prepare(prog); offload->dev_state = !ret; up_read(&bpf_devs_lock); @@ -203,7 +203,7 @@ static int bpf_prog_offload_translate(struct bpf_prog *prog) down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->translate(offload->netdev, prog); + ret = offload->offdev->ops->translate(prog); up_read(&bpf_devs_lock); return ret; -- cgit v1.2.3-59-g8ed1b From 46f53a65d2de3e1591636c22b626b09d8684fd71 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sat, 10 Nov 2018 22:15:13 -0800 Subject: bpf: Allow narrow loads with offset > 0 Currently BPF verifier allows narrow loads for a context field only with offset zero. E.g. if there is a __u32 field then only the following loads are permitted: * off=0, size=1 (narrow); * off=0, size=2 (narrow); * off=0, size=4 (full). On the other hand LLVM can generate a load with offset different than zero that make sense from program logic point of view, but verifier doesn't accept it. E.g. tools/testing/selftests/bpf/sendmsg4_prog.c has code: #define DST_IP4 0xC0A801FEU /* 192.168.1.254 */ ... if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) && where ctx is struct bpf_sock_addr. Some versions of LLVM can produce the following byte code for it: 8: 71 12 07 00 00 00 00 00 r2 = *(u8 *)(r1 + 7) 9: 67 02 00 00 18 00 00 00 r2 <<= 24 10: 18 03 00 00 00 00 00 fe 00 00 00 00 00 00 00 00 r3 = 4261412864 ll 12: 5d 32 07 00 00 00 00 00 if r2 != r3 goto +7 where `*(u8 *)(r1 + 7)` means narrow load for ctx->user_ip4 with size=1 and offset=3 (7 - sizeof(ctx->user_family) = 3). This load is currently rejected by verifier. Verifier code that rejects such loads is in bpf_ctx_narrow_access_ok() what means any is_valid_access implementation, that uses the function, works this way, e.g. bpf_skb_is_valid_access() for __sk_buff or sock_addr_is_valid_access() for bpf_sock_addr. The patch makes such loads supported. Offset can be in [0; size_default) but has to be multiple of load size. E.g. for __u32 field the following loads are supported now: * off=0, size=1 (narrow); * off=1, size=1 (narrow); * off=2, size=1 (narrow); * off=3, size=1 (narrow); * off=0, size=2 (narrow); * off=2, size=2 (narrow); * off=0, size=4 (full). Reported-by: Yonghong Song Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 16 +--------------- kernel/bpf/verifier.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index de629b706d1d..cc17f5f32fbb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -668,24 +668,10 @@ static inline u32 bpf_ctx_off_adjust_machine(u32 size) return size; } -static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access, - u32 size_default) -{ - size_default = bpf_ctx_off_adjust_machine(size_default); - size_access = bpf_ctx_off_adjust_machine(size_access); - -#ifdef __LITTLE_ENDIAN - return (off & (size_default - 1)) == 0; -#else - return (off & (size_default - 1)) + size_access == size_default; -#endif -} - static inline bool bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) { - return bpf_ctx_narrow_align_ok(off, size, size_default) && - size <= size_default && (size & (size - 1)) == 0; + return size <= size_default && (size & (size - 1)) == 0; } #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8d0977980cfa..b5222aa61d54 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5718,10 +5718,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; + u32 target_size, size_default, off; struct bpf_prog *new_prog; enum bpf_access_type type; bool is_narrower_load; - u32 target_size; if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { @@ -5814,9 +5814,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) * we will apply proper mask to the result. */ is_narrower_load = size < ctx_field_size; + size_default = bpf_ctx_off_adjust_machine(ctx_field_size); + off = insn->off; if (is_narrower_load) { - u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); - u32 off = insn->off; u8 size_code; if (type == BPF_WRITE) { @@ -5844,12 +5844,23 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } if (is_narrower_load && size < target_size) { - if (ctx_field_size <= 4) + u8 shift = (off & (size_default - 1)) * 8; + + if (ctx_field_size <= 4) { + if (shift) + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); - else + } else { + if (shift) + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); + } } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); -- cgit v1.2.3-59-g8ed1b From 96b3b6c9091d23289721350e32c63cc8749686be Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:08 +0000 Subject: bpf: allow zero-initializing hash map seed Add a new flag BPF_F_ZERO_SEED, which forces a hash map to initialize the seed to zero. This is useful when doing performance analysis both on individual BPF programs, as well as the kernel's hash table implementation. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/hashtab.c | 13 +++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 47d606d744cc..8c01b89a4cb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,9 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 2c1790288138..4b7c76765d9d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -23,7 +23,7 @@ #define HTAB_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ - BPF_F_RDONLY | BPF_F_WRONLY) + BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED) struct bucket { struct hlist_nulls_head head; @@ -244,6 +244,7 @@ static int htab_map_alloc_check(union bpf_attr *attr) */ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); + bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); int numa_node = bpf_map_attr_numa_node(attr); BUILD_BUG_ON(offsetof(struct htab_elem, htab) != @@ -257,6 +258,10 @@ static int htab_map_alloc_check(union bpf_attr *attr) */ return -EPERM; + if (zero_seed && !capable(CAP_SYS_ADMIN)) + /* Guard against local DoS, and discourage production use. */ + return -EPERM; + if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) /* reserved bits should not be used */ return -EINVAL; @@ -373,7 +378,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (!htab->buckets) goto free_htab; - htab->hashrnd = get_random_int(); + if (htab->map.map_flags & BPF_F_ZERO_SEED) + htab->hashrnd = 0; + else + htab->hashrnd = get_random_int(); + for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); -- cgit v1.2.3-59-g8ed1b From 2f1833607aed6a9c1e1729bf0e2588c341ceb409 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:09 +0000 Subject: bpf: move BPF_F_QUERY_EFFECTIVE after map flags BPF_F_QUERY_EFFECTIVE is in the middle of the flags valid for BPF_MAP_CREATE. Move it to its own section to reduce confusion. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8c01b89a4cb4..05d95290b848 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -257,9 +257,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -272,6 +269,9 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3-59-g8ed1b From 2667a2626f4da370409c2830552f6e8c8b8c41e2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:08 -0800 Subject: bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO to support the function debug info. BTF_KIND_FUNC_PROTO must not have a name (i.e. !t->name_off) and it is followed by >= 0 'struct bpf_param' objects to describe the function arguments. The BTF_KIND_FUNC must have a valid name and it must refer back to a BTF_KIND_FUNC_PROTO. The above is the conclusion after the discussion between Edward Cree, Alexei, Daniel, Yonghong and Martin. By combining BTF_KIND_FUNC and BTF_LIND_FUNC_PROTO, a complete function signature can be obtained. It will be used in the later patches to learn the function signature of a running bpf program. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/btf.h | 18 ++- kernel/bpf/btf.c | 389 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 354 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2a50d87de485..6a2be79b73fc 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -259,6 +260,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VOLATILE] = "VOLATILE", [BTF_KIND_CONST] = "CONST", [BTF_KIND_RESTRICT] = "RESTRICT", + [BTF_KIND_FUNC] = "FUNC", + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", }; struct btf_kind_operations { @@ -281,6 +284,9 @@ struct btf_kind_operations { static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; static struct btf_type btf_void; +static int btf_resolve(struct btf_verifier_env *env, + const struct btf_type *t, u32 type_id); + static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier @@ -314,9 +320,20 @@ static bool btf_type_is_fwd(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } +static bool btf_type_is_func(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; +} + +static bool btf_type_is_func_proto(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; +} + static bool btf_type_nosize(const struct btf_type *t) { - return btf_type_is_void(t) || btf_type_is_fwd(t); + return btf_type_is_void(t) || btf_type_is_fwd(t) || + btf_type_is_func(t) || btf_type_is_func_proto(t); } static bool btf_type_nosize_or_null(const struct btf_type *t) @@ -433,6 +450,30 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset) offset < btf->hdr.str_len; } +/* Only C-style identifier is permitted. This can be relaxed if + * necessary. + */ +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) +{ + /* offset must be valid */ + const char *src = &btf->strings[offset]; + const char *src_limit; + + if (!isalpha(*src) && *src != '_') + return false; + + /* set a limit on identifier length */ + src_limit = src + KSYM_NAME_LEN; + src++; + while (*src && src < src_limit) { + if (!isalnum(*src) && *src != '_') + return false; + src++; + } + + return !*src; +} + static const char *btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) @@ -747,11 +788,15 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, /* int, enum or void is a sink */ return !btf_type_needs_resolve(next_type); case RESOLVE_PTR: - /* int, enum, void, struct or array is a sink for ptr */ + /* int, enum, void, struct, array, func or func_proto is a sink + * for ptr + */ return !btf_type_is_modifier(next_type) && !btf_type_is_ptr(next_type); case RESOLVE_STRUCT_OR_ARRAY: - /* int, enum, void or ptr is a sink for struct and array */ + /* int, enum, void, ptr, func or func_proto is a sink + * for struct and array + */ return !btf_type_is_modifier(next_type) && !btf_type_is_array(next_type) && !btf_type_is_struct(next_type); @@ -1170,10 +1215,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, return -EINVAL; } - /* "typedef void new_void", "const void"...etc */ - if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type)) - goto resolved; - if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); @@ -1184,13 +1225,18 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, * save us a few type-following when we use it later (e.g. in * pretty print). */ - if (!btf_type_id_size(btf, &next_type_id, &next_type_size) && - !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) { - btf_verifier_log_type(env, v->t, "Invalid type_id"); - return -EINVAL; + if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) { + if (env_type_is_resolved(env, next_type_id)) + next_type = btf_type_id_resolve(btf, &next_type_id); + + /* "typedef void new_void", "const void"...etc */ + if (!btf_type_is_void(next_type) && + !btf_type_is_fwd(next_type)) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } } -resolved: env_stack_pop_resolved(env, next_type_id, next_type_size); return 0; @@ -1203,7 +1249,6 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, const struct btf_type *t = v->t; u32 next_type_id = t->type; struct btf *btf = env->btf; - u32 next_type_size = 0; next_type = btf_type_by_id(btf, next_type_id); if (!next_type) { @@ -1211,10 +1256,6 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, return -EINVAL; } - /* "void *" */ - if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type)) - goto resolved; - if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); @@ -1241,13 +1282,18 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, resolved_type_id); } - if (!btf_type_id_size(btf, &next_type_id, &next_type_size) && - !btf_type_nosize(btf_type_id_resolve(btf, &next_type_id))) { - btf_verifier_log_type(env, v->t, "Invalid type_id"); - return -EINVAL; + if (!btf_type_id_size(btf, &next_type_id, NULL)) { + if (env_type_is_resolved(env, next_type_id)) + next_type = btf_type_id_resolve(btf, &next_type_id); + + if (!btf_type_is_void(next_type) && + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } } -resolved: env_stack_pop_resolved(env, next_type_id, 0); return 0; @@ -1787,6 +1833,232 @@ static struct btf_kind_operations enum_ops = { .seq_show = btf_enum_seq_show, }; +static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (t->name_off) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return meta_needed; +} + +static void btf_func_proto_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + const struct btf_param *args = (const struct btf_param *)(t + 1); + u16 nr_args = btf_type_vlen(t), i; + + btf_verifier_log(env, "return=%u args=(", t->type); + if (!nr_args) { + btf_verifier_log(env, "void"); + goto done; + } + + if (nr_args == 1 && !args[0].type) { + /* Only one vararg */ + btf_verifier_log(env, "vararg"); + goto done; + } + + btf_verifier_log(env, "%u %s", args[0].type, + btf_name_by_offset(env->btf, + args[0].name_off)); + for (i = 1; i < nr_args - 1; i++) + btf_verifier_log(env, ", %u %s", args[i].type, + btf_name_by_offset(env->btf, + args[i].name_off)); + + if (nr_args > 1) { + const struct btf_param *last_arg = &args[nr_args - 1]; + + if (last_arg->type) + btf_verifier_log(env, ", %u %s", last_arg->type, + btf_name_by_offset(env->btf, + last_arg->name_off)); + else + btf_verifier_log(env, ", vararg"); + } + +done: + btf_verifier_log(env, ")"); +} + +static struct btf_kind_operations func_proto_ops = { + .check_meta = btf_func_proto_check_meta, + .resolve = btf_df_resolve, + /* + * BTF_KIND_FUNC_PROTO cannot be directly referred by + * a struct's member. + * + * It should be a funciton pointer instead. + * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO) + * + * Hence, there is no btf_func_check_member(). + */ + .check_member = btf_df_check_member, + .log_details = btf_func_proto_log, + .seq_show = btf_df_seq_show, +}; + +static s32 btf_func_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + if (!t->name_off || + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return 0; +} + +static struct btf_kind_operations func_ops = { + .check_meta = btf_func_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_df_check_member, + .log_details = btf_ref_type_log, + .seq_show = btf_df_seq_show, +}; + +static int btf_func_proto_check(struct btf_verifier_env *env, + const struct btf_type *t) +{ + const struct btf_type *ret_type; + const struct btf_param *args; + const struct btf *btf; + u16 nr_args, i; + int err; + + btf = env->btf; + args = (const struct btf_param *)(t + 1); + nr_args = btf_type_vlen(t); + + /* Check func return type which could be "void" (t->type == 0) */ + if (t->type) { + u32 ret_type_id = t->type; + + ret_type = btf_type_by_id(btf, ret_type_id); + if (!ret_type) { + btf_verifier_log_type(env, t, "Invalid return type"); + return -EINVAL; + } + + if (btf_type_needs_resolve(ret_type) && + !env_type_is_resolved(env, ret_type_id)) { + err = btf_resolve(env, ret_type, ret_type_id); + if (err) + return err; + } + + /* Ensure the return type is a type that has a size */ + if (!btf_type_id_size(btf, &ret_type_id, NULL)) { + btf_verifier_log_type(env, t, "Invalid return type"); + return -EINVAL; + } + } + + if (!nr_args) + return 0; + + /* Last func arg type_id could be 0 if it is a vararg */ + if (!args[nr_args - 1].type) { + if (args[nr_args - 1].name_off) { + btf_verifier_log_type(env, t, "Invalid arg#%u", + nr_args); + return -EINVAL; + } + nr_args--; + } + + err = 0; + for (i = 0; i < nr_args; i++) { + const struct btf_type *arg_type; + u32 arg_type_id; + + arg_type_id = args[i].type; + arg_type = btf_type_by_id(btf, arg_type_id); + if (!arg_type) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + err = -EINVAL; + break; + } + + if (args[i].name_off && + (!btf_name_offset_valid(btf, args[i].name_off) || + !btf_name_valid_identifier(btf, args[i].name_off))) { + btf_verifier_log_type(env, t, + "Invalid arg#%u", i + 1); + err = -EINVAL; + break; + } + + if (btf_type_needs_resolve(arg_type) && + !env_type_is_resolved(env, arg_type_id)) { + err = btf_resolve(env, arg_type, arg_type_id); + if (err) + break; + } + + if (!btf_type_id_size(btf, &arg_type_id, NULL)) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + err = -EINVAL; + break; + } + } + + return err; +} + +static int btf_func_check(struct btf_verifier_env *env, + const struct btf_type *t) +{ + const struct btf_type *proto_type; + const struct btf_param *args; + const struct btf *btf; + u16 nr_args, i; + + btf = env->btf; + proto_type = btf_type_by_id(btf, t->type); + + if (!proto_type || !btf_type_is_func_proto(proto_type)) { + btf_verifier_log_type(env, t, "Invalid type_id"); + return -EINVAL; + } + + args = (const struct btf_param *)(proto_type + 1); + nr_args = btf_type_vlen(proto_type); + for (i = 0; i < nr_args; i++) { + if (!args[i].name_off && args[i].type) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + return -EINVAL; + } + } + + return 0; +} + static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_INT] = &int_ops, [BTF_KIND_PTR] = &ptr_ops, @@ -1799,6 +2071,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_VOLATILE] = &modifier_ops, [BTF_KIND_CONST] = &modifier_ops, [BTF_KIND_RESTRICT] = &modifier_ops, + [BTF_KIND_FUNC] = &func_ops, + [BTF_KIND_FUNC_PROTO] = &func_proto_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -1870,30 +2144,6 @@ static int btf_check_all_metas(struct btf_verifier_env *env) return 0; } -static int btf_resolve(struct btf_verifier_env *env, - const struct btf_type *t, u32 type_id) -{ - const struct resolve_vertex *v; - int err = 0; - - env->resolve_mode = RESOLVE_TBD; - env_stack_push(env, t, type_id); - while (!err && (v = env_stack_peak(env))) { - env->log_type_id = v->type_id; - err = btf_type_ops(v->t)->resolve(env, v); - } - - env->log_type_id = type_id; - if (err == -E2BIG) - btf_verifier_log_type(env, t, - "Exceeded max resolving depth:%u", - MAX_RESOLVE_DEPTH); - else if (err == -EEXIST) - btf_verifier_log_type(env, t, "Loop detected"); - - return err; -} - static bool btf_resolve_valid(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) @@ -1927,6 +2177,39 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return false; } +static int btf_resolve(struct btf_verifier_env *env, + const struct btf_type *t, u32 type_id) +{ + u32 save_log_type_id = env->log_type_id; + const struct resolve_vertex *v; + int err = 0; + + env->resolve_mode = RESOLVE_TBD; + env_stack_push(env, t, type_id); + while (!err && (v = env_stack_peak(env))) { + env->log_type_id = v->type_id; + err = btf_type_ops(v->t)->resolve(env, v); + } + + env->log_type_id = type_id; + if (err == -E2BIG) { + btf_verifier_log_type(env, t, + "Exceeded max resolving depth:%u", + MAX_RESOLVE_DEPTH); + } else if (err == -EEXIST) { + btf_verifier_log_type(env, t, "Loop detected"); + } + + /* Final sanity check */ + if (!err && !btf_resolve_valid(env, t, type_id)) { + btf_verifier_log_type(env, t, "Invalid resolve state"); + err = -EINVAL; + } + + env->log_type_id = save_log_type_id; + return err; +} + static int btf_check_all_types(struct btf_verifier_env *env) { struct btf *btf = env->btf; @@ -1949,10 +2232,16 @@ static int btf_check_all_types(struct btf_verifier_env *env) return err; } - if (btf_type_needs_resolve(t) && - !btf_resolve_valid(env, t, type_id)) { - btf_verifier_log_type(env, t, "Invalid resolve state"); - return -EINVAL; + if (btf_type_is_func_proto(t)) { + err = btf_func_proto_check(env, t); + if (err) + return err; + } + + if (btf_type_is_func(t)) { + err = btf_func_check(env, t); + if (err) + return err; } } -- cgit v1.2.3-59-g8ed1b From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:11 -0800 Subject: bpf: Introduce bpf_func_info This patch added interface to load a program with the following additional information: . prog_btf_fd . func_info, func_info_rec_size and func_info_cnt where func_info will provide function range and type_id corresponding to each function. The func_info_rec_size is introduced in the UAPI to specify struct bpf_func_info size passed from user space. This intends to make bpf_func_info structure growable in the future. If the kernel gets a different bpf_func_info size from userspace, it will try to handle user request with part of bpf_func_info it can understand. In this patch, kernel can understand struct bpf_func_info { __u32 insn_offset; __u32 type_id; }; If user passed a bpf func_info record size of 16 bytes, the kernel can still handle part of records with the above definition. If verifier agrees with function range provided by the user, the bpf_prog ksym for each function will use the func name provided in the type_id, which is supposed to provide better encoding as it is not limited by 16 bytes program name limitation and this is better for bpf program which contains multiple subprograms. The bpf_prog_info interface is also extended to return btf_id, func_info, func_info_rec_size and func_info_cnt to userspace, so userspace can print out the function prototype for each xlated function. The insn_offset in the returned func_info corresponds to the insn offset for xlated functions. With other jit related fields in bpf_prog_info, userspace can also print out function prototypes for each jited function. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +- include/linux/bpf_verifier.h | 1 + include/linux/btf.h | 2 + include/uapi/linux/bpf.h | 13 +++++ kernel/bpf/btf.c | 4 +- kernel/bpf/core.c | 13 +++++ kernel/bpf/syscall.c | 59 +++++++++++++++++++-- kernel/bpf/verifier.c | 120 ++++++++++++++++++++++++++++++++++++++++++- 8 files changed, 209 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 987815152629..7f0e225bf630 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -316,6 +316,8 @@ struct bpf_prog_aux { void *security; #endif struct bpf_prog_offload *offload; + struct btf *btf; + u32 type_id; /* type id for this prog/func */ union { struct work_struct work; struct rcu_head rcu; @@ -527,7 +529,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, + union bpf_attr __user *uattr); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 11f5df1092d9..204382f46fd8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -204,6 +204,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u16 stack_depth; /* max. stack depth used by this function */ + u32 type_id; /* btf type_id for this subprog */ }; /* single container for all structs diff --git a/include/linux/btf.h b/include/linux/btf.h index e076c4697049..7f2c0a4a45ea 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); int btf_get_fd_by_id(u32 id); u32 btf_id(const struct btf *btf); +const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); +const char *btf_name_by_offset(const struct btf *btf, u32 offset); #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6a2be79b73fc..69da9169819a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -474,7 +474,7 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) return !*src; } -static const char *btf_name_by_offset(const struct btf *btf, u32 offset) +const char *btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) return "(anon)"; @@ -484,7 +484,7 @@ static const char *btf_name_by_offset(const struct btf *btf, u32 offset) return "(invalid-name-offset)"; } -static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) +const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { if (type_id > btf->nr_types) return NULL; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1a796e0799ec..16d77012ad3e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -21,12 +21,14 @@ * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ +#include #include #include #include #include #include #include +#include #include #include #include @@ -390,6 +392,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; + const struct btf_type *type; + const char *func_name; BUILD_BUG_ON(sizeof("bpf_prog_") + sizeof(prog->tag) * 2 + @@ -404,6 +408,15 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); + + /* prog->aux->name will be ignored if full btf name is available */ + if (prog->aux->btf) { + type = btf_type_by_id(prog->aux->btf, prog->aux->type_id); + func_name = btf_name_by_offset(prog->aux->btf, type->name_off); + snprintf(sym, (size_t)(end - sym), "_%s", func_name); + return; + } + if (prog->aux->name[0]) snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); else diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf5040fd5434..998377808102 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1213,6 +1213,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); + btf_put(prog->aux->btf); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -1437,9 +1438,9 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type +#define BPF_PROG_LOAD_LAST_FIELD func_info_cnt -static int bpf_prog_load(union bpf_attr *attr) +static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog; @@ -1525,7 +1526,7 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_prog; /* run eBPF verifier */ - err = bpf_check(&prog, attr); + err = bpf_check(&prog, attr, uattr); if (err < 0) goto free_used_maps; @@ -2079,6 +2080,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.xlated_prog_len = 0; info.nr_jited_ksyms = 0; info.nr_jited_func_lens = 0; + info.func_info_cnt = 0; goto done; } @@ -2216,6 +2218,55 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } } + if (prog->aux->btf) { + u32 ucnt, urec_size; + + info.btf_id = btf_id(prog->aux->btf); + + ucnt = info.func_info_cnt; + info.func_info_cnt = prog->aux->func_cnt ? : 1; + urec_size = info.func_info_rec_size; + info.func_info_rec_size = sizeof(struct bpf_func_info); + if (ucnt) { + /* expect passed-in urec_size is what the kernel expects */ + if (urec_size != info.func_info_rec_size) + return -EINVAL; + + if (bpf_dump_raw_ok()) { + struct bpf_func_info kern_finfo; + char __user *user_finfo; + u32 i, insn_offset; + + user_finfo = u64_to_user_ptr(info.func_info); + if (prog->aux->func_cnt) { + ucnt = min_t(u32, info.func_info_cnt, ucnt); + insn_offset = 0; + for (i = 0; i < ucnt; i++) { + kern_finfo.insn_offset = insn_offset; + kern_finfo.type_id = prog->aux->func[i]->aux->type_id; + if (copy_to_user(user_finfo, &kern_finfo, + sizeof(kern_finfo))) + return -EFAULT; + + /* func[i]->len holds the prog len */ + insn_offset += prog->aux->func[i]->len; + user_finfo += urec_size; + } + } else { + kern_finfo.insn_offset = 0; + kern_finfo.type_id = prog->aux->type_id; + if (copy_to_user(user_finfo, &kern_finfo, + sizeof(kern_finfo))) + return -EFAULT; + } + } else { + info.func_info_cnt = 0; + } + } + } else { + info.func_info_cnt = 0; + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) @@ -2501,7 +2552,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = map_get_next_key(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr); + err = bpf_prog_load(&attr, uattr); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b5222aa61d54..f102c4fd0c5a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11,10 +11,12 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ +#include #include #include #include #include +#include #include #include #include @@ -4639,6 +4641,114 @@ err_free: return ret; } +/* The minimum supported BTF func info size */ +#define MIN_BPF_FUNCINFO_SIZE 8 +#define MAX_FUNCINFO_REC_SIZE 252 + +static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, + union bpf_attr *attr, union bpf_attr __user *uattr) +{ + u32 i, nfuncs, urec_size, min_size, prev_offset; + u32 krec_size = sizeof(struct bpf_func_info); + struct bpf_func_info krecord = {}; + const struct btf_type *type; + void __user *urecord; + struct btf *btf; + int ret = 0; + + nfuncs = attr->func_info_cnt; + if (!nfuncs) + return 0; + + if (nfuncs != env->subprog_cnt) { + verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); + return -EINVAL; + } + + urec_size = attr->func_info_rec_size; + if (urec_size < MIN_BPF_FUNCINFO_SIZE || + urec_size > MAX_FUNCINFO_REC_SIZE || + urec_size % sizeof(u32)) { + verbose(env, "invalid func info rec size %u\n", urec_size); + return -EINVAL; + } + + btf = btf_get_by_fd(attr->prog_btf_fd); + if (IS_ERR(btf)) { + verbose(env, "unable to get btf from fd\n"); + return PTR_ERR(btf); + } + + urecord = u64_to_user_ptr(attr->func_info); + min_size = min_t(u32, krec_size, urec_size); + + for (i = 0; i < nfuncs; i++) { + ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); + if (ret) { + if (ret == -E2BIG) { + verbose(env, "nonzero tailing record in func info"); + /* set the size kernel expects so loader can zero + * out the rest of the record. + */ + if (put_user(min_size, &uattr->func_info_rec_size)) + ret = -EFAULT; + } + goto free_btf; + } + + if (copy_from_user(&krecord, urecord, min_size)) { + ret = -EFAULT; + goto free_btf; + } + + /* check insn_offset */ + if (i == 0) { + if (krecord.insn_offset) { + verbose(env, + "nonzero insn_offset %u for the first func info record", + krecord.insn_offset); + ret = -EINVAL; + goto free_btf; + } + } else if (krecord.insn_offset <= prev_offset) { + verbose(env, + "same or smaller insn offset (%u) than previous func info record (%u)", + krecord.insn_offset, prev_offset); + ret = -EINVAL; + goto free_btf; + } + + if (env->subprog_info[i].start != krecord.insn_offset) { + verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); + ret = -EINVAL; + goto free_btf; + } + + /* check type_id */ + type = btf_type_by_id(btf, krecord.type_id); + if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { + verbose(env, "invalid type id %d in func info", + krecord.type_id); + ret = -EINVAL; + goto free_btf; + } + + if (i == 0) + prog->aux->type_id = krecord.type_id; + env->subprog_info[i].type_id = krecord.type_id; + + prev_offset = krecord.insn_offset; + urecord += urec_size; + } + + prog->aux->btf = btf; + return 0; + +free_btf: + btf_put(btf); + return ret; +} + /* check %cur's range satisfies %old's */ static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur) @@ -5939,6 +6049,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + /* the btf will be freed only at prog->aux */ + func[i]->aux->btf = prog->aux->btf; + func[i]->aux->type_id = env->subprog_info[i].type_id; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -6325,7 +6438,8 @@ static void free_states(struct bpf_verifier_env *env) kfree(env->explored_states); } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, + union bpf_attr __user *uattr) { struct bpf_verifier_env *env; struct bpf_verifier_log *log; @@ -6397,6 +6511,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (ret < 0) goto skip_full_check; + ret = check_btf_func(env->prog, env, attr, uattr); + if (ret < 0) + goto skip_full_check; + ret = do_check(env); if (env->cur_state) { free_verifier_state(env->cur_state, true); -- cgit v1.2.3-59-g8ed1b From f6161a8f3036caa45f225486be39783e99e0fa29 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 20 Nov 2018 14:08:20 -0800 Subject: bpf: fix a compilation error when CONFIG_BPF_SYSCALL is not defined Kernel test robot (lkp@intel.com) reports a compilation error at https://www.spinics.net/lists/netdev/msg534913.html introduced by commit 838e96904ff3 ("bpf: Introduce bpf_func_info"). If CONFIG_BPF is defined and CONFIG_BPF_SYSCALL is not defined, the following error will appear: kernel/bpf/core.c:414: undefined reference to `btf_type_by_id' kernel/bpf/core.c:415: undefined reference to `btf_name_by_offset' When CONFIG_BPF_SYSCALL is not defined, let us define stub inline functions for btf_type_by_id() and btf_name_by_offset() in include/linux/btf.h. This way, the compilation failure can be avoided. Fixes: 838e96904ff3 ("bpf: Introduce bpf_func_info") Reported-by: kbuild test robot Cc: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 7f2c0a4a45ea..8c2199b5d250 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -46,7 +46,21 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); int btf_get_fd_by_id(u32 id); u32 btf_id(const struct btf *btf); + +#ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); +#else +static inline const struct btf_type *btf_type_by_id(const struct btf *btf, + u32 type_id) +{ + return NULL; +} +static inline const char *btf_name_by_offset(const struct btf *btf, + u32 offset) +{ + return NULL; +} +#endif #endif -- cgit v1.2.3-59-g8ed1b From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Thu, 22 Nov 2018 14:39:16 -0500 Subject: bpf: add skb->tstamp r/w access from tc clsact and cg skb progs This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu Acked-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 29 +++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index f6ca38a7d433..65dc13aeca7c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(struct bpf_flow_keys *)) return false; break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (size != sizeof(__u64)) + return false; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; default: return false; } @@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + case bpf_ctx_range(struct __sk_buff, tstamp): break; default: return false; @@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; + + case offsetof(struct __sk_buff, tstamp): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); + else + *insn++ = BPF_LDX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); } return insn - insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 54d16fbdef8b..537a8f91af02 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, + { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, { "multiple registers share map_lookup_elem result", .insns = { -- cgit v1.2.3-59-g8ed1b