diff options
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 16 | ||||
-rw-r--r-- | tools/testing/selftests/net/psock_lib.h | 39 | ||||
-rw-r--r-- | tools/testing/selftests/net/psock_tpacket.c | 97 | ||||
-rw-r--r-- | tools/testing/selftests/net/reuseport_bpf_numa.c | 255 | ||||
-rwxr-xr-x | tools/testing/selftests/net/run_netsocktests | 2 |
6 files changed, 372 insertions, 38 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0840684deb7d..afe109e5508a 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,4 +3,5 @@ psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu +reuseport_bpf_numa reuseport_dualstack diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0e5340742620..fbfe5d0d5c2e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,19 +1,15 @@ # Makefile for net selftests -CFLAGS = -Wall -O2 -g - +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack - -all: $(NET_PROGS) -%: %.c - $(CC) $(CFLAGS) -o $@ $^ +reuseport_bpf_numa: LDFLAGS += -lnuma TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh -TEST_FILES := $(NET_PROGS) +TEST_GEN_FILES = socket +TEST_GEN_FILES += psock_fanout psock_tpacket +TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa +TEST_GEN_FILES += reuseport_dualstack include ../lib.mk -clean: - $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 24bc7ec1be7d..a77da88bf946 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -40,14 +40,39 @@ static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) { + /* the filter below checks for all of the following conditions that + * are based on the contents of create_payload() + * ether type 0x800 and + * ip proto udp and + * skb->len == DATA_LEN and + * udp[38] == 'a' or udp[38] == 'b' + * It can be generated from the following bpf_asm input: + * ldh [12] + * jne #0x800, drop ; ETH_P_IP + * ldb [23] + * jneq #17, drop ; IPPROTO_UDP + * ld len ; ld skb->len + * jlt #100, drop ; DATA_LEN + * ldb [80] + * jeq #97, pass ; DATA_CHAR + * jne #98, drop ; DATA_CHAR_1 + * pass: + * ret #-1 + * drop: + * ret #0 + */ struct sock_filter bpf_filter[] = { - { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ - { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ - { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ - { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/ - { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/ - { 0x06, 0, 0, 0x00000060 }, /* RET match */ - { 0x06, 0, 0, 0x00000000 }, /* RET no match */ + { 0x28, 0, 0, 0x0000000c }, + { 0x15, 0, 8, 0x00000800 }, + { 0x30, 0, 0, 0x00000017 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x80, 0, 0, 0000000000 }, + { 0x35, 0, 4, 0x00000064 }, + { 0x30, 0, 0, 0x00000050 }, + { 0x15, 1, 0, 0x00000061 }, + { 0x15, 0, 1, 0x00000062 }, + { 0x06, 0, 0, 0xffffffff }, + { 0x06, 0, 0, 0000000000 }, }; struct sock_fprog bpf_prog; diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 24adf709bd9d..7f6cd9fdacf3 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -110,7 +110,7 @@ static unsigned int total_packets, total_bytes; static int pfsocket(int ver) { - int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + int ret, sock = socket(PF_PACKET, SOCK_RAW, 0); if (sock == -1) { perror("socket"); exit(1); @@ -239,7 +239,6 @@ static void walk_v1_v2_rx(int sock, struct ring *ring) bug_on(ring->type != PACKET_RX_RING); pair_udp_open(udp_sock, PORT_BASE); - pair_udp_setfilter(sock); memset(&pfd, 0, sizeof(pfd)); pfd.fd = sock; @@ -311,20 +310,33 @@ static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr) __sync_synchronize(); } -static inline int __v1_v2_tx_kernel_ready(void *base, int version) +static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr) +{ + return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); +} + +static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline int __tx_kernel_ready(void *base, int version) { switch (version) { case TPACKET_V1: return __v1_tx_kernel_ready(base); case TPACKET_V2: return __v2_tx_kernel_ready(base); + case TPACKET_V3: + return __v3_tx_kernel_ready(base); default: bug_on(1); return 0; } } -static inline void __v1_v2_tx_user_ready(void *base, int version) +static inline void __tx_user_ready(void *base, int version) { switch (version) { case TPACKET_V1: @@ -333,6 +345,9 @@ static inline void __v1_v2_tx_user_ready(void *base, int version) case TPACKET_V2: __v2_tx_user_ready(base); break; + case TPACKET_V3: + __v3_tx_user_ready(base); + break; } } @@ -348,7 +363,22 @@ static void __v1_v2_set_packet_loss_discard(int sock) } } -static void walk_v1_v2_tx(int sock, struct ring *ring) +static inline void *get_next_frame(struct ring *ring, int n) +{ + uint8_t *f0 = ring->rd[0].iov_base; + + switch (ring->version) { + case TPACKET_V1: + case TPACKET_V2: + return ring->rd[n].iov_base; + case TPACKET_V3: + return f0 + (n * ring->req3.tp_frame_size); + default: + bug_on(1); + } +} + +static void walk_tx(int sock, struct ring *ring) { struct pollfd pfd; int rcv_sock, ret; @@ -360,9 +390,19 @@ static void walk_v1_v2_tx(int sock, struct ring *ring) .sll_family = PF_PACKET, .sll_halen = ETH_ALEN, }; + int nframes; + + /* TPACKET_V{1,2} sets up the ring->rd* related variables based + * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these + * up based on blocks (e.g, rd_num is tp_block_nr) + */ + if (ring->version <= TPACKET_V2) + nframes = ring->rd_num; + else + nframes = ring->req3.tp_frame_nr; bug_on(ring->type != PACKET_TX_RING); - bug_on(ring->rd_num < NUM_PACKETS); + bug_on(nframes < NUM_PACKETS); rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (rcv_sock == -1) { @@ -388,10 +428,11 @@ static void walk_v1_v2_tx(int sock, struct ring *ring) create_payload(packet, &packet_len); while (total_packets > 0) { - while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base, - ring->version) && + void *next = get_next_frame(ring, frame_num); + + while (__tx_kernel_ready(next, ring->version) && total_packets > 0) { - ppd.raw = ring->rd[frame_num].iov_base; + ppd.raw = next; switch (ring->version) { case TPACKET_V1: @@ -413,14 +454,27 @@ static void walk_v1_v2_tx(int sock, struct ring *ring) packet_len); total_bytes += ppd.v2->tp_h.tp_snaplen; break; + case TPACKET_V3: { + struct tpacket3_hdr *tx = next; + + tx->tp_snaplen = packet_len; + tx->tp_len = packet_len; + tx->tp_next_offset = 0; + + memcpy((uint8_t *)tx + TPACKET3_HDRLEN - + sizeof(struct sockaddr_ll), packet, + packet_len); + total_bytes += tx->tp_snaplen; + break; + } } status_bar_update(); total_packets--; - __v1_v2_tx_user_ready(ppd.raw, ring->version); + __tx_user_ready(next, ring->version); - frame_num = (frame_num + 1) % ring->rd_num; + frame_num = (frame_num + 1) % nframes; } poll(&pfd, 1, 1); @@ -460,7 +514,7 @@ static void walk_v1_v2(int sock, struct ring *ring) if (ring->type == PACKET_RX_RING) walk_v1_v2_rx(sock, ring); else - walk_v1_v2_tx(sock, ring); + walk_tx(sock, ring); } static uint64_t __v3_prev_block_seq_num = 0; @@ -546,7 +600,6 @@ static void walk_v3_rx(int sock, struct ring *ring) bug_on(ring->type != PACKET_RX_RING); pair_udp_open(udp_sock, PORT_BASE); - pair_udp_setfilter(sock); memset(&pfd, 0, sizeof(pfd)); pfd.fd = sock; @@ -583,7 +636,7 @@ static void walk_v3(int sock, struct ring *ring) if (ring->type == PACKET_RX_RING) walk_v3_rx(sock, ring); else - bug_on(1); + walk_tx(sock, ring); } static void __v1_v2_fill(struct ring *ring, unsigned int blocks) @@ -602,12 +655,13 @@ static void __v1_v2_fill(struct ring *ring, unsigned int blocks) ring->flen = ring->req.tp_frame_size; } -static void __v3_fill(struct ring *ring, unsigned int blocks) +static void __v3_fill(struct ring *ring, unsigned int blocks, int type) { - ring->req3.tp_retire_blk_tov = 64; - ring->req3.tp_sizeof_priv = 0; - ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; - + if (type == PACKET_RX_RING) { + ring->req3.tp_retire_blk_tov = 64; + ring->req3.tp_sizeof_priv = 0; + ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + } ring->req3.tp_block_size = getpagesize() << 2; ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7; ring->req3.tp_block_nr = blocks; @@ -641,7 +695,7 @@ static void setup_ring(int sock, struct ring *ring, int version, int type) break; case TPACKET_V3: - __v3_fill(ring, blocks); + __v3_fill(ring, blocks, type); ret = setsockopt(sock, SOL_PACKET, type, &ring->req3, sizeof(ring->req3)); break; @@ -685,6 +739,8 @@ static void bind_ring(int sock, struct ring *ring) { int ret; + pair_udp_setfilter(sock); + ring->ll.sll_family = PF_PACKET; ring->ll.sll_protocol = htons(ETH_P_ALL); ring->ll.sll_ifindex = if_nametoindex("lo"); @@ -796,6 +852,7 @@ int main(void) ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING); ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING); + ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING); if (ret) return 1; diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c new file mode 100644 index 000000000000..6f20bc9ff627 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -0,0 +1,255 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. Same test as + * in reuseport_bpf_cpu, only as one socket per NUMA node. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> +#include <numa.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + static char bpf_log_buf[65536]; + static const char bpf_license[] = ""; + + int bpf_fd; + const struct bpf_insn prog[] = { + /* R0 = bpf_get_numa_node_id() */ + { BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id }, + /* return R0 */ + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; + attr.log_size = sizeof(bpf_log_buf); + attr.log_level = 1; + + bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (bpf_fd < 0) + error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf); + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, + sizeof(bpf_fd))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF"); + + close(bpf_fd); +} + +static void send_from_node(int node_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + if (numa_run_on_node(node_id) < 0) + error(1, errno, "failed to pin to node"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send node %d, receive socket %d\n", node_id, i); + if (node_id != i) + error(1, 0, "node id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, node; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (node = 0; node < len; ++node) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[node]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (node = 0; node < len; ++node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + /* Reverse iterate */ + for (node = len - 1; node >= 0; --node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + close(epfd); + for (node = 0; node < len; ++node) + close(rcv_fd[node]); +} + +int main(void) +{ + int *rcv_fd, nodes; + + if (numa_available() < 0) + error(1, errno, "no numa api support"); + + nodes = numa_max_node() + 1; + + rcv_fd = calloc(nodes, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..16058bbea7a8 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh echo "--------------------" echo "running socket test" |