bpf: Add helper to retrieve socket in BPF

This patch adds new BPF helper functions, bpf_sk_lookup_tcp() and bpf_sk_lookup_udp() which allows BPF programs to find out if there is a socket listening on this host, and returns a socket pointer which the BPF program can then access to determine, for instance, whether to forward or drop traffic. bpf_sk_lookup_xxx() may take a reference on the socket, so when a BPF program makes use of this function, it must subsequently pass the returned pointer into the newly added sk_release() to return the reference. By way of example, the following pseudocode would filter inbound connections at XDP if there is no corresponding service listening for the traffic: struct bpf_sock_tuple tuple; struct bpf_sock_ops *sk; populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof tuple, netns, 0); if (!sk) { // Couldn't find a socket listening for this traffic. Drop. return TC_ACT_SHOT; } bpf_sk_release(sk, 0); return TC_ACT_OK; Signed-off-by: Joe Stringer <joe@wand.net.nz> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
author: Joe Stringer <joe@wand.net.nz> 2018-10-02 13:35:36 -0700
committer: Daniel Borkmann <daniel@iogearbox.net> 2018-10-03 02:53:47 +0200
commit: 6acc9b432e6714d72d7d77ec7c27f6f8358d0c71 (patch)
tree: 1b5ac6b607e8d88a815c8b88d795e71a18260f31 /net
parent: bpf: Add reference tracking to verifier (diff)
download: linux-dev-6acc9b432e6714d72d7d77ec7c27f6f8358d0c71.tar.xz
linux-dev-6acc9b432e6714d72d7d77ec7c27f6f8358d0c71.zip
1 files changed, 151 insertions, 0 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index b2cb186252e4..591c698bc517 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -58,13 +58,17 @@
 #include <net/busy_poll.h>
 #include <net/tcp.h>
 #include <net/xfrm.h>
+#include <net/udp.h>
 #include <linux/bpf_trace.h>
 #include <net/xdp_sock.h>
 #include <linux/inetdevice.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
 #include <net/ip_fib.h>
 #include <net/flow.h>
 #include <net/arp.h>
 #include <net/ipv6.h>
+#include <net/net_namespace.h>
 #include <linux/seg6_local.h>
 #include <net/seg6.h>
 #include <net/seg6_local.h>
@@ -4813,6 +4817,141 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 };
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
+struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
+		       struct sk_buff *skb, u8 family, u8 proto)
+{
+	int dif = skb->dev->ifindex;
+	bool refcounted = false;
+	struct sock *sk = NULL;
+
+	if (family == AF_INET) {
+		__be32 src4 = tuple->ipv4.saddr;
+		__be32 dst4 = tuple->ipv4.daddr;
+		int sdif = inet_sdif(skb);
+
+		if (proto == IPPROTO_TCP)
+			sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+					   src4, tuple->ipv4.sport,
+					   dst4, tuple->ipv4.dport,
+					   dif, sdif, &refcounted);
+		else
+			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
+					       dst4, tuple->ipv4.dport,
+					       dif, sdif, &udp_table, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
+		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
+		int sdif = inet6_sdif(skb);
+
+		if (proto == IPPROTO_TCP)
+			sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+					    src6, tuple->ipv6.sport,
+					    dst6, tuple->ipv6.dport,
+					    dif, sdif, &refcounted);
+		else
+			sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
+					       dst6, tuple->ipv6.dport,
+					       dif, sdif, &udp_table, skb);
+#endif
+	}
+
+	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
+		WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+		sk = NULL;
+	}
+	return sk;
+}
+
+/* bpf_sk_lookup performs the core lookup for different types of sockets,
+ * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
+ * Returns the socket as an 'unsigned long' to simplify the casting in the
+ * callers to satisfy BPF_CALL declarations.
+ */
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+	      u8 proto, u64 netns_id, u64 flags)
+{
+	struct net *caller_net;
+	struct sock *sk = NULL;
+	u8 family = AF_UNSPEC;
+	struct net *net;
+
+	family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
+	if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+		goto out;
+
+	if (skb->dev)
+		caller_net = dev_net(skb->dev);
+	else
+		caller_net = sock_net(skb->sk);
+	if (netns_id) {
+		net = get_net_ns_by_id(caller_net, netns_id);
+		if (unlikely(!net))
+			goto out;
+		sk = sk_lookup(net, tuple, skb, family, proto);
+		put_net(net);
+	} else {
+		net = caller_net;
+		sk = sk_lookup(net, tuple, skb, family, proto);
+	}
+
+	if (sk)
+		sk = sk_to_full_sk(sk);
+out:
+	return (unsigned long) sk;
+}
+
+BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
+	.func		= bpf_sk_lookup_tcp,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
+	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+	return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
+	.func		= bpf_sk_lookup_udp,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_sk_release, struct sock *, sk)
+{
+	if (!sock_flag(sk, SOCK_RCU_FREE))
+		sock_gen_put(sk);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_release_proto = {
+	.func		= bpf_sk_release,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCKET,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -5019,6 +5158,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_ancestor_cgroup_id:
 		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5119,6 +5264,12 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_redirect_hash_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
author	Joe Stringer <joe@wand.net.nz>	2018-10-02 13:35:36 -0700
committer	Daniel Borkmann <daniel@iogearbox.net>	2018-10-03 02:53:47 +0200
commit	6acc9b432e6714d72d7d77ec7c27f6f8358d0c71 (patch)
tree	1b5ac6b607e8d88a815c8b88d795e71a18260f31 /net
parent	bpf: Add reference tracking to verifier (diff)
download	linux-dev-6acc9b432e6714d72d7d77ec7c27f6f8358d0c71.tar.xz linux-dev-6acc9b432e6714d72d7d77ec7c27f6f8358d0c71.zip