diff options
author | 2022-09-09 10:40:45 -0700 | |
---|---|---|
committer | 2022-09-09 10:40:46 -0700 | |
commit | 2fae67716bb99a956a4c4a47c0e2ece52a2a15ca (patch) | |
tree | ffea21e0a4d42c7ddd9ddf8dc56c14a2dd52f306 | |
parent | libbpf: Remove gcc support for bpf_tail_call_static for now (diff) | |
parent | selftests/bpf: Ensure cgroup/connect{4,6} programs can bind unpriv ICMP ping (diff) | |
download | wireguard-linux-2fae67716bb99a956a4c4a47c0e2ece52a2a15ca.tar.xz wireguard-linux-2fae67716bb99a956a4c4a47c0e2ece52a2a15ca.zip |
Merge branch 'cgroup/connect{4,6} programs for unprivileged ICMP ping'
YiFei Zhu says:
====================
Usually when a TCP/UDP connection is initiated, we can bind the socket
to a specific IP attached to an interface in a cgroup/connect hook.
But for pings, this is impossible, as the hook is not being called.
This series adds the invocation for cgroup/connect{4,6} programs to
unprivileged ICMP ping (i.e. ping sockets created with SOCK_DGRAM
IPPROTO_ICMP(V6) as opposed to SOCK_RAW). This also adds a test to
verify that the hooks are being called and invoking bpf_bind() from
within the hook actually binds the socket.
Patch 1 adds the invocation of the hook.
Patch 2 deduplicates write_sysctl in BPF test_progs.
Patch 3 adds the tests for this hook.
v1 -> v2:
* Added static to bindaddr_v6 in prog_tests/connect_ping.c
* Deduplicated much of the test logic in prog_tests/connect_ping.c
* Deduplicated write_sysctl() to test_progs.c
v2 -> v3:
* Renamed variable "obj" to "skel" for the BPF skeleton object in
prog_tests/connect_ping.c
v3 -> v4:
* Fixed error path to destroy skel in prog_tests/connect_ping.c
====================
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r-- | net/ipv4/ping.c | 15 | ||||
-rw-r--r-- | net/ipv6/ping.c | 16 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 20 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/connect_ping.c | 178 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 20 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/connect_ping.c | 53 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_progs.c | 17 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_progs.h | 1 |
8 files changed, 280 insertions, 40 deletions
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b83c2bd9d722..517042caf6dc 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,6 +33,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/export.h> +#include <linux/bpf-cgroup.h> #include <net/sock.h> #include <net/ping.h> #include <net/udp.h> @@ -295,6 +296,19 @@ void ping_close(struct sock *sk, long timeout) } EXPORT_SYMBOL_GPL(ping_close); +static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from __ip4_datagram_connect() and + * intended to prevent BPF program called below from accessing bytes + * that are out of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); +} + /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct sockaddr *uaddr, int addr_len) @@ -1009,6 +1023,7 @@ struct proto ping_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .pre_connect = ping_pre_connect, .connect = ip4_datagram_connect, .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 91b840514656..5f2ef8493714 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -20,6 +20,7 @@ #include <net/udp.h> #include <net/transp_v6.h> #include <linux/proc_fs.h> +#include <linux/bpf-cgroup.h> #include <net/ping.h> static void ping_v6_destroy(struct sock *sk) @@ -49,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } +static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from __ip6_datagram_connect() and + * intended to prevent BPF program called below from accessing + * bytes that are out of the bound specified by user in addr_len. + */ + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); +} + static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); @@ -191,6 +206,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .destroy = ping_v6_destroy, + .pre_connect = ping_v6_pre_connect, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 664ffc0364f4..7a277035c275 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -22,26 +22,6 @@ static __u32 duration; #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" -static int write_sysctl(const char *sysctl, const char *value) -{ - int fd, err, len; - - fd = open(sysctl, O_WRONLY); - if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", - sysctl, strerror(errno), errno)) - return -1; - - len = strlen(value); - err = write(fd, value, len); - close(fd); - if (CHECK(err != len, "write sysctl", - "write(%s, %s, %d): err:%d %s (%d)\n", - sysctl, value, len, err, strerror(errno), errno)) - return -1; - - return 0; -} - static int prepare_netns(void) { if (CHECK(unshare(CLONE_NEWNET), "create netns", diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c new file mode 100644 index 000000000000..289218c2216c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2022 Google LLC. + */ + +#define _GNU_SOURCE +#include <sys/mount.h> + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" + +#include "connect_ping.skel.h" + +/* 2001:db8::1 */ +#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } } +static const struct in6_addr bindaddr_v6 = BINDADDR_V6; + +static void subtest(int cgroup_fd, struct connect_ping *skel, + int family, int do_bind) +{ + struct sockaddr_in sa4 = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct sockaddr_in6 sa6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + }; + struct sockaddr *sa; + socklen_t sa_len; + int protocol; + int sock_fd; + + switch (family) { + case AF_INET: + sa = (struct sockaddr *)&sa4; + sa_len = sizeof(sa4); + protocol = IPPROTO_ICMP; + break; + case AF_INET6: + sa = (struct sockaddr *)&sa6; + sa_len = sizeof(sa6); + protocol = IPPROTO_ICMPV6; + break; + } + + memset(skel->bss, 0, sizeof(*skel->bss)); + skel->bss->do_bind = do_bind; + + sock_fd = socket(family, SOCK_DGRAM, protocol); + if (!ASSERT_GE(sock_fd, 0, "sock-create")) + return; + + if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect")) + goto close_sock; + + if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0, + "invocations_v4")) + goto close_sock; + if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0, + "invocations_v6")) + goto close_sock; + if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error")) + goto close_sock; + + if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len), + "getsockname")) + goto close_sock; + + switch (family) { + case AF_INET: + if (!ASSERT_EQ(sa4.sin_family, family, "sin_family")) + goto close_sock; + if (!ASSERT_EQ(sa4.sin_addr.s_addr, + htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK), + "sin_addr")) + goto close_sock; + break; + case AF_INET6: + if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family")) + goto close_sock; + if (!ASSERT_EQ(memcmp(&sa6.sin6_addr, + do_bind ? &bindaddr_v6 : &in6addr_loopback, + sizeof(sa6.sin6_addr)), + 0, "sin6_addr")) + goto close_sock; + break; + } + +close_sock: + close(sock_fd); +} + +void test_connect_ping(void) +{ + struct connect_ping *skel; + int cgroup_fd; + + if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare")) + return; + + /* overmount sysfs, and making original sysfs private so overmount + * does not propagate to other mntns. + */ + if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL), + "remount-private-sys")) + return; + if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL), + "mount-sys")) + return; + if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL), + "mount-bpf")) + goto clean_mount; + + if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up")) + goto clean_mount; + if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4")) + goto clean_mount; + if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6")) + goto clean_mount; + if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0")) + goto clean_mount; + + cgroup_fd = test__join_cgroup("/connect_ping"); + if (!ASSERT_GE(cgroup_fd, 0, "cg-create")) + goto clean_mount; + + skel = connect_ping__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel-load")) + goto close_cgroup; + skel->links.connect_v4_prog = + bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4")) + goto skel_destroy; + skel->links.connect_v6_prog = + bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6")) + goto skel_destroy; + + /* Connect a v4 ping socket to localhost, assert that only v4 is called, + * and called exactly once, and that the socket's bound address is + * original loopback address. + */ + if (test__start_subtest("ipv4")) + subtest(cgroup_fd, skel, AF_INET, 0); + + /* Connect a v4 ping socket to localhost, assert that only v4 is called, + * and called exactly once, and that the socket's bound address is + * address we explicitly bound. + */ + if (test__start_subtest("ipv4-bind")) + subtest(cgroup_fd, skel, AF_INET, 1); + + /* Connect a v6 ping socket to localhost, assert that only v6 is called, + * and called exactly once, and that the socket's bound address is + * original loopback address. + */ + if (test__start_subtest("ipv6")) + subtest(cgroup_fd, skel, AF_INET6, 0); + + /* Connect a v6 ping socket to localhost, assert that only v6 is called, + * and called exactly once, and that the socket's bound address is + * address we explicitly bound. + */ + if (test__start_subtest("ipv6-bind")) + subtest(cgroup_fd, skel, AF_INET6, 1); + +skel_destroy: + connect_ping__destroy(skel); + +close_cgroup: + close(cgroup_fd); + +clean_mount: + umount2("/sys", MNT_DETACH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 1fa772079967..f24436d33cd6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -54,26 +54,6 @@ static int create_netns(void) return 0; } -static int write_sysctl(const char *sysctl, const char *value) -{ - int fd, err, len; - - fd = open(sysctl, O_WRONLY); - if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", - sysctl, strerror(errno), errno)) - return -1; - - len = strlen(value); - err = write(fd, value, len); - close(fd); - if (CHECK(err != len, "write sysctl", - "write(%s, %s): err:%d %s (%d)\n", - sysctl, value, err, strerror(errno), errno)) - return -1; - - return 0; -} - static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix) { fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n", diff --git a/tools/testing/selftests/bpf/progs/connect_ping.c b/tools/testing/selftests/bpf/progs/connect_ping.c new file mode 100644 index 000000000000..60178192b672 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_ping.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2022 Google LLC. + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <netinet/in.h> +#include <sys/socket.h> + +/* 2001:db8::1 */ +#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } } + +__u32 do_bind = 0; +__u32 has_error = 0; +__u32 invocations_v4 = 0; +__u32 invocations_v6 = 0; + +SEC("cgroup/connect4") +int connect_v4_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = bpf_htonl(0x01010101), + }; + + __sync_fetch_and_add(&invocations_v4, 1); + + if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa))) + has_error = 1; + + return 1; +} + +SEC("cgroup/connect6") +int connect_v6_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = BINDADDR_V6, + }; + + __sync_fetch_and_add(&invocations_v6, 1); + + if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa))) + has_error = 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 3561c97701f2..0e9a47f97890 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -943,6 +943,23 @@ int trigger_module_test_write(int write_sz) return 0; } +int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (!ASSERT_NEQ(fd, -1, "open sysctl")) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (!ASSERT_EQ(err, len, "write sysctl")) + return -1; + + return 0; +} + #define MAX_BACKTRACE_SZ 128 void crash_handler(int signum) { diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 5fe1365c2bb1..b090996daee5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -384,6 +384,7 @@ int extract_build_id(char *build_id, size_t size); int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); +int write_sysctl(const char *sysctl, const char *value); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" |